I am in the process of training my LSTM neural networks that shall predict quintiles of stock price distributions. As I would like to train the model on not just one stock but a sample of 500 I wrote the below training loop that shall fit the model to each stock, save the model params and the load the params again when training the next stock. My question is if I can write the code in the for loop like below or whether I can also just use a complete dataset including all 500 stocks where data is just concatenated along the 0 axis.
The idea is, that the model iterates over each stock, the best model is then saved by the checkpoint function and is reloaded again for the fitting of the next stock.
This is the training loop I would like to use:
def compile_and_fit(model_type,model,checkpoint_path,config, stock_data,macro_data, factor_data, patience, batch_size,
num_epochs,train_set_ratio, val_set_ratio, Y_name):
"""
model = NN model,
data = stock data, factor data, macro data,
batch_size = timesteps per batch
alpha adam = learning rate optimizer
data set ratios = train_set_ratio, val_set_ratio (eg. 0.5)
"""
early_stopping = tf.keras.callbacks.EarlyStopping(
monitor='loss', #'loss'
patience=patience,
mode='min')
cp_callback = tf.keras.callbacks.ModelCheckpoint(
checkpoint_path,
monitor= 'loss',
verbose=True,
save_best_only=True,
save_freq = batch_size,
mode='min')
permno_list = stock_data.permno.unique()
test_data = pd.DataFrame()
counter = 0
for p in permno_list:
#checkpoints
if counter == 0:
trained_model = model
cp_callback = cp_callback
else:
trained_model = tf.keras.models.load_model(checkpoint_path)
cp_callback = tf.keras.callbacks.ModelCheckpoint(checkpoint_path,monitor= 'loss',verbose=True, save_best_only=True,save_freq = batch_size, mode='min')
stock_data_length = len(stock_data.loc[stock_data.permno==p])
train_data_stocks = stock_data.loc[stock_data.permno==p][0:int(stock_data_length*train_set_ratio)]
val_data_stocks = stock_data.loc[stock_data.permno==p][int(stock_data_length*train_set_ratio):int(stock_data_length*(val_set_ratio+train_set_ratio))]
test_data_stocks = stock_data.loc[stock_data.permno==p][int(stock_data_length*(val_set_ratio+train_set_ratio)):]
test_data = pd.concat([test_data, test_data_stocks],axis=0)
train_date_index = train_data_stocks.index.values.tolist()
val_date_index = val_data_stocks.index.values.tolist()
train_data_factors = factor_data.loc[factor_data.index.isin(train_date_index)]
train_data_macro = macro_factors.loc[macro_factors.index.isin(train_date_index)]
train_data_macro_norm = train_data_macro.copy(deep=True)
for c in train_data_macro_norm.columns:
train_data_macro_norm[c] = MinMaxScaler([-1,1]).fit_transform(pd.DataFrame(train_data_macro_norm[c]))
train_data_merged = pd.concat([train_data_factors, train_data_macro_norm],axis=1)
val_data_factors = factor_data.loc[factor_data.index.isin(val_date_index)]
val_data_macro = macro_factors.loc[macro_factors.index.isin(val_date_index)]
val_data_macro_norm = val_data_macro.copy(deep=True)
for c in val_data_macro_norm.columns:
val_data_macro_norm[c] = MinMaxScaler([-1,1]).fit_transform(pd.DataFrame(val_data_macro_norm[c]))
val_data_merged = pd.concat([val_data_factors, val_data_macro_norm],axis=1)
if model_type=='combined':
x_train_factors = []
x_train_macro = []
y_train =[]
for i in range(batch_size, len(train_data_factors)):
x_train_factors.append(train_data_factors.values[i-batch_size:i,:])
x_train_macro.append(train_data_macro_norm.values[i-batch_size:i,:])
y_train.append(train_data_stocks[Y_name].values[i])
x_train_factors, x_train_macro, y_train= np.array(x_train_factors),np.array(x_train_macro), np.array(y_train)
x_val_factors = []
x_val_macro = []
y_val =[]
for i in range(batch_size, len(val_data_factors)):
x_val_factors.append(val_data_factors.values[i-batch_size:i,:])
x_val_macro.append(val_data_macro_norm.values[i-batch_size:i,:])
y_val.append(val_data_stocks[Y_name].values[i])
x_val_factors, x_val_macro, y_val = np.array(x_val_factors),np.array(x_val_macro), np.array(y_val)
score =trained_model.evaluate([x_train_macro,x_train_factors],y_train,batch_size=batch_size)
score = list(score)
score.sort(reverse=True)
score = score[-2]
cp_callback.best = score
trained_model.fit(x=[x_train_macro,x_train_factors],y=y_train,batch_size=batch_size, epochs=num_epochs,
validation_data=[[x_val_macro,x_val_factors], y_val], callbacks=[early_stopping,cp_callback])
if model_type=='merged':
x_train_merged = []
y_train =[]
for i in range(batch_size, len(train_data_merged)):
x_train_merged.append(train_data_merged.values[i-batch_size:i,:])
y_train.append(train_data_stocks[Y_name].values[i])
x_train_merged, y_train= np.array(x_train_merged), np.array(y_train)
x_val_merged = []
y_val =[]
for i in range(batch_size, len(val_data_merged)):
x_val_merged.append(val_data_merged.values[i-batch_size:i,:])
y_val.append(val_data_stocks[Y_name].values[i])
x_val_merged, y_val = np.array(x_val_merged), np.array(y_val)
score =trained_model.evaluate(x_train_merged,y_train,batch_size=batch_size)
score = list(score)
score.sort(reverse=True)
score = score[-2]
cp_callback.best = score
trained_model.fit(x=x_train_merged,y=y_train,batch_size=batch_size, epochs=num_epochs,
validation_data=[x_val_merged, y_val], callbacks=[early_stopping,cp_callback])
return trained_model, test_data
If someone has an idea whether this works or not, I would be incredibly grateful!
In my testing I could see the mse constantly decreasing, however if the loop continues for the next stop the mse starts with avery high value again.
According to this answer
How can I use multiple datasets with one model in Keras?
you can repeatedly fit the same model on more datasets.
If you want to save the model and load it at each iteration, that should also work with the caveat that you loose the optimizer state (see Loading a trained Keras model and continue training).
Related
I am trying to replicate the code available in https://machinelearningmastery.com/neural-networks-are-function-approximators/ using pytorch and adding train and test data but the prediction and loss results are not good.
In first attempt, I tried to change deep network nodes, epochs and learning rate, then
I tried to add scheduler to adjust learning rate and I implemented a small checkpoint to save the good models but still it was not enough to get good results.
I was wondering if the community have any idea to fix my code.
DATA
X = torch.arange(start, end, step, dtype=torch.float32).unsqueeze(dim=1)
y = torch.tensor([i**2.0 for i in X[0:]]).unsqueeze(dim=1)
train_split = int (0.8 *len(X))
X_train, y_train = X[:train_split], y[:train_split]
X_test , y_test = X[train_split:], y[train_split:]
scaler_x = MinMaxScaler()
scaler_x.fit(X_train)
X_Train = scaler_x.transform(X_train)
scaler_y = MinMaxScaler()
scaler_y.fit(y_train)
y_Train = scaler_y.transform(y_train)
Deep Network
class FunctionEstimatorModel(nn.Module):
def __init__(self):
super().__init__()
self.linear_layer_1 = nn.Linear(in_features = 1,out_features = 200)
self.relu = nn.LeakyReLU()
self.linear_layer_2 = nn.Linear(in_features = 200,out_features = 200)
self.relu = nn.LeakyReLU()
self.linear_layer_3 = nn.Linear(in_features = 200,out_features = 1)
def forward (self, x: torch.Tensor) -> torch.Tensor:
return self.linear_layer_3(self.relu(self.linear_layer_2(self.relu(self.linear_layer_1(x)))))
for loop
for epoch in range(epochs):
model_0.train()
y_preds = model_0(X_train).squeeze()
loss = loss_fn(y_preds, y_train)
optimizer.zero_grad()
loss.backward()
optimizer.step()
scheduler.step()
#####Testing
model_0.eval()
with torch.inference_mode():
test_pred = model_0(X_test).squeeze()
test_loss = loss_fn(test_pred, y_test)
print(f"Epoch: {epoch} | Loss: {loss} | Test Loss: {test_loss}")
enter image description here
Thanks a lot
I should point out that compared to the tutorial page you referencing, the points you are trying to predict are much more difficult because they are out of distribution. That's because your model is only able to predict points within the [-50, 25] since it was only ever given training points belonging to that interval. If you look at the example from the page, however, his training points cover the whole range (with a different density of course but still).
I am now working on a neural network that should predict the next activity and the outcome (both or just one, depending on the self.net_out parameter of a trace (sequence of events, taken from an eventlog). The inputs of the net are windows (prefixes) of a trace of a specific size. Right now it looks like this:
def nn(self,params):
#done in this function so that, in case, win_size easily can become a parameter
X_train,Y_train,Z_train = self.build_windows(self.traces_train,self.win_size)
if(self.net_embedding==0):
if(self.net_out!=2):
Y_train = self.leA.fit_transform(Y_train)
Y_train = to_categorical(Y_train)
label=Y_train
if(self.net_out!=1):
Z_train = self.leO.fit_transform(Z_train)
Z_train = to_categorical(Z_train)
label=Z_train
unique_events = len(self.act_dictionary)
input_act = Input(shape=self.win_size, dtype='int32', name='input_act')
if(self.net_embedding==0):
x_act = Embedding(output_dim=params["output_dim_embedding"], input_dim=unique_events + 1, input_length=self.win_size)(
input_act)
else:
print("WIP")
n_layers = int(params["n_layers"]["n_layers"])
l1 = LSTM(params["shared_lstm_size"], return_sequences=True, kernel_initializer='glorot_uniform',dropout=params['dropout'])(x_act)
l1 = BatchNormalization()(l1)
if(self.net_out!=2):
l_a = LSTM(params["lstmA_size_1"], return_sequences=(n_layers != 1), kernel_initializer='glorot_uniform',dropout=params['dropout'])(l1)
l_a = BatchNormalization()(l_a)
elif(self.net_out!=1):
l_o = LSTM(params["lstmO_size_1"], return_sequences=(n_layers != 1), kernel_initializer='glorot_uniform',dropout=params['dropout'])(l1)
l_o = BatchNormalization()(l_o)
for i in range(2,n_layers+1):
if(self.net_out!=2):
l_a = LSTM(params["n_layers"]["lstmA_size_%s_%s" % (i, n_layers)], return_sequences=(n_layers != i), kernel_initializer='glorot_uniform',dropout=params['dropout'])(l_a)
l_a = BatchNormalization()(l_a)
if(self.net_out!=1):
l_o = LSTM(params["n_layers"]["lstmO_size_%s_%s" % (i, n_layers)], return_sequences=(n_layers != i), kernel_initializer='glorot_uniform',dropout=params['dropout'])(l_o)
l_o = BatchNormalization()(l_o)
outputs=[]
if(self.net_out!=2):
output_l = Dense(self.outsize_act, activation='softmax', name='act_output')(l_a)
outputs.append(output_l)
if(self.net_out!=1):
output_o = Dense(self.outsize_out, activation='softmax', name='outcome_output')(l_o)
outputs.append(output_o)
model = Model(inputs=input_act, outputs=outputs)
print(model.summary())
opt = Adam(lr=params["learning_rate"])
if(self.net_out==0):
loss = {'act_output':'categorical_crossentropy', 'outcome_output':'categorical_crossentropy'}
loss_weights= [params['gamma'], 1-params['gamma']]
if(self.net_out==1):
loss = {'act_output':'categorical_crossentropy'}
loss_weights= [1,1]
if(self.net_out==2):
loss = {'outcome_output':'categorical_crossentropy'}
loss_weights=[1,1]
model.compile(loss=loss, optimizer=opt, loss_weights=loss_weights ,metrics=['accuracy'])
early_stopping = EarlyStopping(monitor='val_loss',
patience=20)
lr_reducer = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, verbose=0, mode='auto',
min_delta=0.0001, cooldown=0, min_lr=0)
if(self.net_out==0):
history = model.fit(X_train, [Y_train,Z_train], epochs=3, batch_size=2**params['batch_size'], verbose=2, callbacks=[early_stopping, lr_reducer], validation_split =0.2 )
else:
history = model.fit(X_train, label, epochs=300, batch_size=2**params['batch_size'], verbose=2, callbacks=[early_stopping, lr_reducer], validation_split =0.2 )
scores = [history.history['val_loss'][epoch] for epoch in range(len(history.history['loss']))]
score = min(scores)
#global best_score, best_model
if self.best_score > score:
self.best_score = score
self.best_model = model
return {'loss': score, 'status': STATUS_OK}
As it can be seen, I need to consider 2 types of embeddings: for the one that I already implemented and tested (self.net_embedding=0), each activity/event in each trace (and consequently window) is mapped as an integer; then I apply fit_transform and to_categorical.
The second type of embedding that I have to try is by using word2vec. To do so, I already changed the format of the input, not converting each activity in an integer but by keeping it as a string (the actual name of the activity, standardized to just numbers and letters). I don't know how to proceed though: I guess I should do something like
w2vModel= Word2Vec(X_train, size=params['word2vec_size'], min_count=1)
to get the embedded windows by w2vModel.wv, but how do I pass these to the lstm layers then? Into what should I change the embedding layer after the input one (where I put print(WIP) for now)?
I want to create a for loop in order to run my model various times and keep the best performing model for each run. This because I've noticed that each time I train my model it might perform better on one run and much worse on another. Thus I want to store possibly each model in a list or just select the best.
I have the current process but I'm not sure if this is the most adequate manner and also I'm not actually sure on how to select the best performing model through all these iterations. Here I am doing it only for 10 iterations, but I want to know if there is a better way of doing this.
My Code Implementation
def build_model(input1, input2):
"""
Creates the a multi-channel ANN, capable of accepting multiple inputs.
:param: none
:return: the model of the ANN with a single output given
"""
input1 = np.expand_dims(input1,1)
# Define Inputs for ANN
input1 = Input(shape = (input1.shape[1], ), name = "input1")
input2 = Input(shape = (input2.shape[1],), name = "input2")
# First Branch of ANN (Weight)
x = Dense(units = 1, activation = "relu")(input1)
x = BatchNormalization()(x)
# Second Branch of ANN (Word Embeddings)
y = Dense(units = 36, activation = "relu")(input2)
y = BatchNormalization()(y)
# Merge the input models into a single large vector
combined = Concatenate()([x, y])
#Apply Final Output Layer
outputs = Dense(1, name = "output")(combined)
# Create an Interpretation Model (Accepts the inputs from previous branches and has single output)
model = Model(inputs = [input1, input2], outputs = outputs)
# Compile the Model
model.compile(loss='mse', optimizer = Adam(lr = 0.01), metrics = ['mse'])
# Summarize the Model Summary
model.summary()
return model
test_outcomes = [] # list of model scores
r2_outcomes = [] #list of r2 scores
stored_models = [] #list of stored_models
for i in range(10):
model = build_model(x_train['input1'], x_train['input2'])
print("Model Training")
model.fit([x_train['input1'], x_train['input2']], y_train,
batch_size = 25, epochs = 60, verbose = 0 #, validation_split = 0.2
,validation_data = ([x_valid['input1'],x_valid['input2']], y_valid))
#Determine Model Predictions
print("Model Predictions")
y_pred = model.predict([x_valid['input1'], x_valid['input2']])
y_pred = y_pred.flatten()
#Evaluate the Model
print("Model Evaluations")
score = model.evaluate([x_valid['input1'], x_valid['input2']], y_valid, verbose=1)
test_loss = round(score[0], 3)
print ('Test loss:', test_loss)
test_outcomes.append(test_loss)
#Calculate R_Squared
r_squared = r2_score(y_valid, y_pred)
print(r_squared)
r2_outcomes.append(r_squared)
#Store Final Model
print("Model Stored")
stored_models.append(model) #list of stored_models
mean_test= np.mean(test_outcomes)
r2_means = np.mean(r2_outcomes)
Output Example
You should use Callbacks
you can stop training using callback
Here an example of how you can create a custom callback in order stop training when certain accuracy threshold
#example
acc_threshold =0.95
class myCallback(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs={}):
if(logs.get('acc') > acc_threshold):
print("\nReached %2.2f%% accuracy, so stopping training!!" %(acc_threshold))
self.model.stop_training = True
my_callback = myCallback()
model.fit([x_train['input1'], x_train['input2']], y_train,
batch_size = 25, epochs = 60, verbose = 0 #, validation_split = 0.2
,validation_data = ([x_valid['input1'],x_valid['input2']], y_valid),
callbacks=my_callback )
You can also use EarlyStopping to monitor metrics (Like stopping when loss isnt improving)
I've been training an MLP to predict the time remaining on an assembly sequence. The Training loss, Validation loss and MSE are all less 0.001, however, when I try to do a prediction with one of the datasets I trained the network with the it can't correctly identify any of the outputs from the set of inputs. What am I doing wrong that is producing this error?
I am also struggling to understand how, when the model is deployed, how do I perform the scaling of the result for one prediction? scaler.inverse_transform won't work because the data for that scaler used during training has been lost as the prediction would be done in a separate script to the training using the model the training produced. Is this information saved in the model builder?
I have tried to change the batch size during training, rounding the time column of the dataset to the nearest second (previously was 0.1 seconds), trained over 50, 100 and 200 epochs and I always end up with no correct predictions. I am also training an LSTM to see which is more accurate but that is also having the same issue. The dataset is split 70-30 training-testing and then training is then split 75-25 into training and validation.
Data scaling and model training code:
def scale_data(training_data, training_data_labels, testing_data, testing_data_labels):
# Create X and Y scalers between 0 and 1
x_scaler = MinMaxScaler(feature_range=(0, 1))
y_scaler = MinMaxScaler(feature_range=(0, 1))
# Scale training data
x_scaled_training = x_scaler.fit_transform(training_data)
y_scaled_training = y_scaler.fit_transform(training_data_labels)
# Scale testing data
x_scaled_testing = x_scaler.transform(testing_data)
y_scaled_testing = y_scaler.transform(testing_data_labels)
return x_scaled_training, y_scaled_training, x_scaled_testing, y_scaled_testing
def train_model(training_data, training_labels, testing_data, testing_labels, number_of_epochs, number_of_columns):
model_hidden_neuron_number_list = []
model_repeat_list = []
model_error_rate_list = []
for hidden_layer_1_units in range(int(np.floor(number_of_columns / 2)), int(np.ceil(number_of_columns * 2))):
print("Training starting, number of hidden units = %d" % hidden_layer_1_units)
for repeat in range(1, 6):
print("Repeat %d" % repeat)
model = k.Sequential()
model.add(Dense(hidden_layer_1_units, input_dim=number_of_columns,
activation='relu', name='hidden_layer_1'))
model.add(Dense(1, activation='linear', name='output_layer'))
model.compile(loss='mean_squared_error', optimizer='adam')
# Train Model
model.fit(
training_data,
training_labels,
epochs=number_of_epochs,
shuffle=True,
verbose=2,
callbacks=[logger],
batch_size=1024,
validation_split=0.25
)
# Test Model
test_error_rate = model.evaluate(testing_data, testing_labels, verbose=0)
print("Error on testing data is %.3f" % test_error_rate)
model_hidden_neuron_number_list.append(hidden_layer_1_units)
model_repeat_list.append(repeat)
model_error_rate_list.append(test_error_rate)
# Save Model
model_builder = tf.saved_model.builder.SavedModelBuilder("MLP/models/{hidden_layer_1_units}/{repeat}".format(hidden_layer_1_units=hidden_layer_1_units, repeat=repeat))
inputs = {
'input': tf.saved_model.build_tensor_info(model.input)
}
outputs = { 'time_remaining':tf.saved_model.utils.build_tensor_info(model.output)
}
signature_def = tf.saved_model.signature_def_utils.build_signature_def(
inputs=inputs,
outputs=outputs, method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME
)
model_builder.add_meta_graph_and_variables(
K.get_session(),
tags=[tf.saved_model.tag_constants.SERVING],
signature_def_map={tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature_def
}
)
model_builder.save()
And then to do a prediction:
file_name = top_level_file_path + "./MLP/models/19/1/"
testing_dataset = pd.read_csv(file_path + os.listdir(file_path)[0])
number_of_rows = len(testing_dataset.index)
number_of_columns = len(testing_dataset.columns)
newcol = [number_of_rows]
max_time = testing_dataset['Time'].max()
for j in range(0, number_of_rows - 1):
newcol.append(max_time - testing_dataset.iloc[j].iloc[number_of_columns - 1])
x_scaler = MinMaxScaler(feature_range=(0, 1))
y_scaler = MinMaxScaler(feature_range=(0, 1))
# Scale training data
data_scaled = x_scaler.fit_transform(testing_dataset)
labels = pd.read_csv("Labels.csv")
labels_scaled = y_scaler.fit_transform(labels)
signature_key = tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
input_key = 'input'
output_key = 'time_remaining'
with tf.Session(graph=tf.Graph()) as sess:
saved_model = tf.saved_model.loader.load(sess, [tf.saved_model.tag_constants.SERVING], file_name)
signature = saved_model.signature_def
x_tensor_name = signature[signature_key].inputs[input_key].name
y_tensor_name = signature[signature_key].outputs[output_key].name
x = sess.graph.get_tensor_by_name(x_tensor_name)
y = sess.graph.get_tensor_by_name(y_tensor_name)
#np.expand_dims(data_scaled[600], axis=0)
predictions = sess.run(y, {x: data_scaled})
predictions = y_scaler.inverse_transform(predictions)
#print(np.round(predictions, 2))
correct_result = 0
for i in range(0, number_of_rows):
correct_result = 0
print(np.round(predictions[i]), " ", np.round(newcol[i]))
if np.round(predictions[i]) == np.round(newcol[i]):
correct_result += 1
print((correct_result/number_of_rows)*100)
The output of the first row should 96.0 but it produces 110.0, the last should be 0.1 but is -40.0 when no negatives appear in the dataset.
You can't compute accuracy when you do regression. Compute the mean squared error on the test set as well.
Second, when it comes to the scalers, you always do scaler.fit_transform on the training date so the scaler will compute the parameters (in this case min and max if you use min-max scaler) on the training data. Then, when performing inference on the test set, you should only do scaler.transform prior to feeding the data to the model.
I have ~10000k images that cannot fit in memory. So for now I can only read 1000 images and train on it...
My code is here :
img_dir = "TrainingSet" # Enter Directory of all images
image_path = os.path.join(img_dir+"/images",'*.bmp')
files = glob.glob(image_path)
images = []
masks = []
contours = []
indexes = []
files_names = []
for f1 in np.sort(files):
img = cv2.imread(f1)
result = re.search('original_cropped_(.*).bmp', str(f1))
idx = result.group(1)
mask_path = img_dir+"/masks/mask_cropped_"+str(idx)+".bmp"
mask = cv2.imread(mask_path,0)
contour_path = img_dir+"/contours/contour_cropped_"+str(idx)+".bmp"
contour = cv2.imread(contour_path,0)
indexes.append(idx)
images.append(img)
masks.append(mask)
contours.append(contour)
train_df = pd.DataFrame({"id":indexes,"masks": masks, "images": images,"contours": contours })
train_df.sort_values(by="id",ascending=True,inplace=True)
print(train_df.shape)
img_size_target = (256,256)
ids_train, ids_valid, x_train, x_valid, y_train, y_valid, c_train, c_valid = train_test_split(
train_df.index.values,
np.array(train_df.images.apply(lambda x: cv2.resize(x,img_size_target).reshape(img_size_target[0],img_size_target[1],3))),
np.array(train_df.masks.apply(lambda x: cv2.resize(x,img_size_target).reshape(img_size_target[0],img_size_target[1],1))),
np.array(train_df.contours.apply(lambda x: cv2.resize(x,img_size_target).reshape(img_size_target[0],img_size_target[1],1))),
test_size=0.2, random_state=1337)
#Here we define the model architecture...
#.....
#End of model definition
# Training
optimizer = Adam(lr=1e-3,decay=1e-10)
model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=["accuracy"])
early_stopping = EarlyStopping(patience=10, verbose=1)
model_checkpoint = ModelCheckpoint("./keras.model", save_best_only=True, verbose=1)
reduce_lr = ReduceLROnPlateau(factor=0.5, patience=5, min_lr=0.00001, verbose=1)
epochs = 200
batch_size = 32
history = model.fit(x_train, y_train,
validation_data=[x_valid, y_valid],
epochs=epochs,
batch_size=batch_size,
callbacks=[early_stopping, model_checkpoint, reduce_lr])
What I would like to know is how can I modify my code in order to do batches of a small set of images without loading all the other 10000 into memory ? which means that the algorithm will read X images each epoch from directory and train on it and after that goes for the next X until the last one.
X here would be a reasonable amount of images that can fit into memory.
use fit_generator instead of fit
def generate_batch_data(num):
#load X images here
return images
model.fit_generator(generate_batch_data(X),
samples_per_epoch=10000, nb_epoch=10)
Alternative you could use train_on_batch instead of fit
Discussion on GitHub about this topic: https://github.com/keras-team/keras/issues/2708
np.array(train_df.images.apply(lambda x:cv2.resize(x,img_size_target).reshape(img_size_target[0],img_size_target[1],3)))
You can first apply this filter (and the 2 others) to each individual file and save them to a special folder (images_prepoc, masks_preproc,etc.. ) in a separate script, then load them back already ready for use in the current script.
Assuming that the actual images dimensions are greater than 256x256, you will have a faster algorithm, using less memory at the cost of a single preparation phase.