Custom keras generator __get_item__() is called more than __len__() - python

I've built a custom keras generator.function.
It yields an img and associated gt.
It works well for the training phase with predict_generator() function.
To evaluate my model, I use it on a test set containing 592 images. I call it with the predict_generator() function.
So I get the right number of prediction (592). Every time get_item() function is called, I add the GT to the self.gt list.
Then, after running predict_generator(), I compare the predictions with the stored GT.
My problem :
I want to store ground truth array in a list, everytime the generator is called. But at the end, I have more GT_arrays than the 592 predictions.
So I can't build my confusion matrix...
Here is the code of the generator:
class DataGenerator(Sequence):
def __init__(self, data_folders_txt, gen_data_type, batchsize, shuffle=True, classes=None, selected_class=None):
'''
- data_fodlers_txt : txt_file containing all the paths to different folders of data
- gen_type : string : can be either "train", "val" or "test" (correspond to a specific folder)
- shuffle : Shuffle the dataset at each epoch
- classes : dict of classes with associated nb (class nb must match the class position on the class axis of the ground truth one-hot-encoded array)
- selected_class : name of the selected class (128x128x1) in the 128x128x3 ground truth one-hot-encoded array
'''
self.gt = []
self.shuffle = shuffle
self.gen_data_type = gen_data_type
self.batchsize = batchsize
self.data_folders = open(data_folders_txt, "r").readlines()
self.list_IDs = self.tiles_list_creation(self.data_folders)
self.samples = len(self.list_IDs)
self.classes = classes
self.selected_class = selected_class
self.index = 0
self.on_epoch_end()
def tiles_list_creation(self, list_folders):
list_IDs = []
for folder in list_folders:
samples = glob.glob(folder.rstrip() + self.gen_data_type + '3/tile/*')
list_IDs += samples
random.shuffle(list_IDs)
return list_IDs
def __len__(self):
if len(self.list_IDs) % self.batchsize == 0:
return len(self.list_IDs)//self.batchsize
else:
return len(self.list_IDs) // self.batchsize + 1
def __getitem__(self, index):
self.index = index
X = []
y = []
# min(...,...) is for taking all the data without being out of range
for i in range(index*self.batchsize, min(self.samples, (index+1)*self.batchsize)):
tile = np.load(self.list_IDs[i])
#If specific class is specified, just take the right channel of the GT_array corresponding to the wanted class
if self.classes:
gt = np.load(self.list_IDs[i].replace("tile", "gt"))[:, :, self.classes[self.selected_class]]
gt = np.expand_dims(gt, axis=-1)
else:
gt = np.load(self.list_IDs[i].replace("tile", "gt"))
#store ground truth to compare the values between gt and predictions after running predict_generator()
self.gt.append(gt)
X.append(tile)
y.append(gt)
return np.array(X), np.array(y)
def on_epoch_end(self):
if self.shuffle:
random.shuffle(self.list_IDs)
And here is where I call it:
batchsize = 10
model = load_model(model_path, custom_objects={'jaccard_distance': jaccard_distance, 'auc': auc})
test_gen = DataGenerator("/path/to/data/path/written/in/file.txt",
gen_data_type='test',
batchsize=batchsize,
classes=None,
selected_class=None)
y_pred = model.predict_generator(test_gen, steps=None, verbose=1)
y_true = np.array(test_gen.gt)
plot_confusion_matrix(y_true, y_pred, ["Hedgerows", "No Hedgerows"])
Here is the error:
60/60 [==============================] - 4s 71ms/step
Traceback (most recent call last):
File "/work/stages/mathurin/sentinel_segmentation/unet/confusion_matrix.py", line 95, in <module>
plot_confusion_matrix(y_true, y_pred, ["Hedgrows", "No Hedgerows"], normalize=normalization, title=model_path.split('/')[-1].split('.')[0])
File "/work/stages/mathurin/sentinel_segmentation/unet/confusion_matrix.py", line 35, in plot_confusion_matrix
cm = confusion_matrix(y_true, y_pred)
File "/work/tools/anaconda3/lib/python3.6/site-packages/sklearn/metrics/classification.py", line 253, in confusion_matrix
y_type, y_true, y_pred = _check_targets(y_true, y_pred)
File "/work/tools/anaconda3/lib/python3.6/site-packages/sklearn/metrics/classification.py", line 71, in _check_targets
check_consistent_length(y_true, y_pred)
File "/work/tools/anaconda3/lib/python3.6/site-packages/sklearn/utils/validation.py", line 235, in check_consistent_length
" samples: %r" % [int(l) for l in lengths])
ValueError: Found input variables with inconsistent numbers of samples: [702, 592]
when I look at the index number of the get_item() function, it is not the expected number... It should be the number returned by the len() function but it is always smaller.
In this example, after making the predictions, the self.index parameter value is 8.
Like if it was exceeding then restarting at 0, 1, 2, etc...
EDIT: more strange !
I just re-run and I get a different number of stored_gt arrays ...
60/60 [==============================] - 6s 100ms/step
Traceback (most recent call last):
File "/work/tools/pycharm-community-2019.1.1/helpers/pydev/pydevd.py", line 1741, in <module>
main()
File "/work/tools/pycharm-community-2019.1.1/helpers/pydev/pydevd.py", line 1735, in main
globals = debugger.run(setup['file'], None, None, is_module)
File "/work/tools/pycharm-community-2019.1.1/helpers/pydev/pydevd.py", line 1135, in run
pydev_imports.execfile(file, globals, locals) # execute the script
File "/work/tools/pycharm-community-2019.1.1/helpers/pydev/_pydev_imps/_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "/work/stages/mathurin/sentinel_segmentation/unet/confusion_matrix.py", line 95, in <module>
plot_confusion_matrix(y_true, y_pred, ["Hedgrows", "No Hedgerows"], normalize=normalization, title=model_path.split('/')[-1].split('.')[0])
File "/work/stages/mathurin/sentinel_segmentation/unet/confusion_matrix.py", line 35, in plot_confusion_matrix
cm = confusion_matrix(y_true, y_pred)
File "/work/tools/anaconda3/lib/python3.6/site-packages/sklearn/metrics/classification.py", line 253, in confusion_matrix
y_type, y_true, y_pred = _check_targets(y_true, y_pred)
File "/work/tools/anaconda3/lib/python3.6/site-packages/sklearn/metrics/classification.py", line 71, in _check_targets
check_consistent_length(y_true, y_pred)
File "/work/tools/anaconda3/lib/python3.6/site-packages/sklearn/utils/validation.py", line 235, in check_consistent_length
" samples: %r" % [int(l) for l in lengths])
ValueError: Found input variables with inconsistent numbers of samples: [682, 592]

There is nothing strange in this, generators are run by keras using multiple processes/threads to improve performance, specially for training, that's why fit_generator and predict_generator have keyword arguments like workers, use_multiprocessing, max_queue_size. So the solution is not to store any kind of ground truth or state in the generator instance.
For your specific case, you can use another kind of prediction loop, by calling the generator manually:
labels = []
preds = []
for step in range(len(generator)):
data, label = generator.__getitem__(step)
pred = model.predict(data)
preds.append(pred)
labels.append(label)
Then using preds and labels to make a confusion matrix.

Related

TensorFlow Addition Error when adding results from binary_crossentropy() together

I was in the middle of training my gan when a very unexpected error came up. I have no idea how to fix it. The error doesn't come right away it happens about 2-3 minutes into my training. Here is the Error
Traceback (most recent call last):
File "gan.py", line 103, in <module>
train(X_train_dataset,200)
File "gan.py", line 80, in train
train_step(images) # takes images and improves both the generator and the discriminator
File "gan.py", line 91, in train_step
discriminator_loss = get_discriminator_loss(real_output,fake_output)
File "gan.py", line 48, in get_discriminator_loss
return fake_loss+real_loss
File "/home/jake/.local/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py", line 1125, in binary_op_wrapper
return func(x, y, name=name)
File "/home/jake/.local/lib/python3.6/site-packages/tensorflow/python/util/dispatch.py", line 201, in wrapper
return target(*args, **kwargs)
File "/home/jake/.local/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py", line 1447, in _add_dispatch
return gen_math_ops.add_v2(x, y, name=name)
File "/home/jake/.local/lib/python3.6/site-packages/tensorflow/python/ops/gen_math_ops.py", line 486, in add_v2
_ops.raise_from_not_ok_status(e, name)
File "/home/jake/.local/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 6843, in raise_from_not_ok_status
six.raise_from(core._status_to_exception(e.code, message), None)
File "<string>", line 3, in raise_from
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [100] vs. [13] [Op:AddV2]
So from I can tell from this call back my error occures during my get_discriminator_loss() so here is that code.
def get_discriminator_loss(real_predictions,fake_predictions):
real_predictions = tf.sigmoid(real_predictions)
fake_predictions = tf.sigmoid(fake_predictions)
real_loss=tf.losses.binary_crossentropy(tf.ones_like(real_predictions),real_predictions)
fake_loss=tf.losses.binary_crossentropy(tf.zeros_like(fake_predictions),fake_predictions)
return fake_loss+real_loss
Does anyone have any ideas? And remember this is after running successfully for about 2-3 minutes. The error doesn't occur in the first many passes.
I've found the source of my error but I don't know why it's occuring.
My real loss at one of the passes has only 13 values instead of the normal 100
How can this be?
Here is my full code.
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import time
import pickle
pickle_in_X = open("X.pickle","rb")
pickle_in_y = open("y.pickle","rb")
X=pickle.load(pickle_in_X)
y = pickle.load(pickle_in_y)
y = np.array(y)
X_train = X[ int(len(X)*.3): ]
y_train = y[ int(len(y)*.3 ): ]
X_test = X[ :int(len(X)*.3) ]
y_test = X[ :int(len(y)*.3) ]
X_train = (X_train-127.5)/127.5
BATCH_SIZE = 100
X_train_dataset = tf.data.Dataset.from_tensor_slices(X_train).batch(BATCH_SIZE)
#creates a discriminator model.
#discriminator will ouput 0-1 which represents the probability that the image is real
def make_discriminator():
model = tf.keras.Sequential()
model.add(tf.keras.layers.Conv2D(7,(3,3),padding="same",input_shape=(40,40,1)))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.LeakyReLU())
model.add(tf.keras.layers.Dense(50,activation="relu"))
model.add(tf.keras.layers.Dense(1))
return model
model_discriminator = make_discriminator()
discriminator_optimizer = tf.optimizers.Adam(1e-3)
#real_loss is the amount of error when trying to guess that the real images are in fact real. i.e loss will be if our discriminator guesses that there is a 100% chance that this real image is real
#fake_loss is the amount of error when trying to guess that the fake images are in fact fake. i.e loss will be zero if our discriminator guesses there is a 0% chance that this fake image is fake
#returns the total of our loss
def get_discriminator_loss(real_predictions,fake_predictions):
real_predictions = tf.sigmoid(real_predictions)
fake_predictions = tf.sigmoid(fake_predictions)
real_loss=tf.losses.binary_crossentropy(tf.ones_like(real_predictions),real_predictions)
fake_loss=tf.losses.binary_crossentropy(tf.zeros_like(fake_predictions),fake_predictions)
return fake_loss+real_loss
#take an input of a random string of numbers. and output either a dog or a cat
def make_generator():
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(10*10*256,input_shape = (100,)))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Reshape((10,10,256)))
model.add(tf.keras.layers.Conv2DTranspose(128,(3,3),padding="same"))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Conv2DTranspose(64,(3,3),strides=(2,2),padding="same"))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Conv2DTranspose(1,(3,3),strides=(2,2),padding="same"))
return model
model_generator = make_generator()
#generator gets rewarded when it fools the discriminator
def get_generator_loss(fake_predictions):
fake_predictions = tf.sigmoid(fake_predictions)
fake_loss=tf.losses.binary_crossentropy(tf.ones_like(fake_predictions),fake_predictions)
return fake_loss
generator_optimizer = tf.optimizers.Adam(1e-3)
#training
def train(X_train_dataset,epochs):
for _ in range(epochs):
for images in X_train_dataset:
images = tf.cast(images,tf.dtypes.float32)
train_step(images) # takes images and improves both the generator and the discriminator
def train_step(images):
fake_image_noise = np.random.randn(BATCH_SIZE,100)#produces 100 random numbers that wll be converted to images
with tf.GradientTape() as generator_gradient, tf.GradientTape() as discriminator_gradient:
generated_images = model_generator(fake_image_noise)
real_output = model_discriminator(images)
fake_output = model_discriminator(generated_images)
generator_loss = get_generator_loss(fake_output)
discriminator_loss = get_discriminator_loss(real_output,fake_output)
gradients_of_generator = generator_gradient.gradient(generator_loss,model_generator.trainable_variables)#gradient of gen loss with respect to trainable variables
gradients_of_discriminator = discriminator_gradient.gradient(discriminator_loss,model_discriminator.trainable_variables)
discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator,model_discriminator.trainable_variables))
generator_optimizer.apply_gradients(zip(gradients_of_generator,model_generator.trainable_variables))
print("generator loss: ", np.mean(generator_loss))
print("discriminator loss: ",np.mean(discriminator_loss))
train(X_train_dataset,200)
model_generator.save('genModel')
model_discriminator.save('discModel')
If the size of your dataset is not a multiple of your batch size, then your last batch will have a smaller number of samples than the other batches. To avoid this, you can force a tf.data.Dataset to drop the last batch if it is smaller than the batch size. See the documentation for more information.
tf.data.Dataset.from_tensor_slices(X_train).batch(BATCH_SIZE, drop_remainder=True)

Problem with layer dimensions when using Keras sequence generator and fit_generator

I am running a visual question answering task.
The problems take as input : image features (which I have saved in a
h5py file) and question tokens (which I have pickled) and outputs
are the answers (the whole answer is considered a target , so 3129
answers –one word or more - and 3129 labels)
I am using the Keras sequence utility to create the generator.
I am getting a dimension error in the output layer when the model
is training. when I change the len function, based on its value the training process breaks down
I have copied my getitem function in the generator and also a sample
of my model.
Do I need to change my generator configuration or my model?
Epoch 1/1
Traceback (most recent call last):
File "<ipython-input-45-e55a5853e499>", line 32, in <module>
validation_data=valid_generator)
File "C:\python\envs\tf2-keras\lib\site-packages\keras\legacy\interfaces.py", line 91, in wrapper
return func(*args, **kwargs)
File "C:\python\envs\tf2-keras\lib\site-packages\keras\engine\training.py", line 1732, in fit_generator
initial_epoch=initial_epoch)
File "C:\python\envs\tf2-keras\lib\site-packages\keras\engine\training_generator.py", line 220, in fit_generator
reset_metrics=False)
File "C:\python\envs\tf2-keras\lib\site-packages\keras\engine\training.py", line 1508, in train_on_batch
class_weight=class_weight)
File "C:\python\envs\tf2-keras\lib\site-packages\keras\engine\training.py", line 621, in _standardize_user_data
exception_prefix='target')
File "C:\python\envs\tf2-keras\lib\site-packages\keras\engine\training_utils.py", line 145, in standardize_input_data
str(data_shape))
ValueError: Error when checking target: expected output to have shape (3129,) but got array with shape (1,)
def __len__(self):
'Denotes the number of batches per epoch'
# return int(np.floor(len(self.list_IDs) / self.batch_size))
return 512*866
# this is the getitem function
The __getitem__ of my generator look like this:
def __getitem__(self, index):
'Generate one batch of data'
imfeatures = np.empty((self.batch_size,2048))
question_tokens = np.empty((self.batch_size,14))
answers = np.empty((self.batch_size,3129))
# Generate indexes of the batch
indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
# self.T.append(indexes)
list_IDs_temp = [self.list_IDs[k] for k in indexes]
# Generate data
for i,k in enumerate(list_IDs_temp):
temp =self.Features['image_features'][k]
imfeatures[i,]=temp[0,:]
question_tokens[i,]=self.Questions[indexes[i]]
answers=self.Answer[indexes[i]]
return [imfeatures,question_tokens],answers
# this is where I instantiate the generators
#train_features is h5py file
# entries is where questions, answers, and ids are saved
batch_size=512
train_generator = DataGenerator(entries['train'].images,
train_fetures,
entries['train'].q_token,
entries['train'].target,
batch_size=batch_size,
shuffle = False)
valid_generator = DataGenerator(entries['val'].images,
valid_features,
entries['val'].q_token,
entries['val'].target,
batch_size=batch_size,
shuffle = False)
#And this is what my model looks like:
ImInput = Input(shape=(2048,),name='image_input')
QInput = Input(shape=(14,),name='question')
# some dense layers and dropouts
#Then the layers are merged
M =Multiply()[ImInput,QInput]
#Some dense layers and dropouts
output=Dense(3129,activation='softmax',name='output')(M)
model = Model([ImInput,QInput ],output)
model.compile(optimizer='RMSprop',loss='categorical_crossentropy',metrics = ['accuracy'])
model.fit_generator(train_generator,
epochs=1,
verbose =1,
validation_data=valid_generator)

errors says there have been some errors whereas i almost completely copy and paste the google tutorial code

Those are the complete errors, and I am so confused about what it is trying to say. I was doing a tutorial from google, and I almost completely copy the code from the tutorial but replace its dataset with my own dataset, and those errors occur.
Traceback (most recent call last):
File "C:\Users\julia\Anaconda\envs\myenv\lib\site-packages\pandas\core\indexes\base.py", line 2897, in get_loc
return self._engine.get_loc(key)
File "pandas\_libs\index.pyx", line 107, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 131, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 1607, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 1614, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'Objective 1'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:/Users/julia/Anaconda/envs/myenv/Mycode.py", line 333, in <module>
validation_targets=validation_targets)
File "C:/Users/julia/Anaconda/envs/myenv/Mycode.py", line 288, in train_nn_regression_model
steps=steps_per_period
File "C:\Users\julia\Anaconda\envs\myenv\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 367, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "C:\Users\julia\Anaconda\envs\myenv\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1158, in _train_model
return self._train_model_default(input_fn, hooks, saving_listeners)
File "C:\Users\julia\Anaconda\envs\myenv\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1185, in _train_model_default
input_fn, ModeKeys.TRAIN))
File "C:\Users\julia\Anaconda\envs\myenv\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1022, in _get_features_and_labels_from_input_fn
self._call_input_fn(input_fn, mode))
File "C:\Users\julia\Anaconda\envs\myenv\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1113, in _call_input_fn
return input_fn(**kwargs)
File "C:/Users/julia/Anaconda/envs/myenv/Mycode.py", line 268, in <lambda>
training_targets["Objective 1"],
File "C:\Users\julia\Anaconda\envs\myenv\lib\site-packages\pandas\core\frame.py", line 2980, in __getitem__
indexer = self.columns.get_loc(key)
File "C:\Users\julia\Anaconda\envs\myenv\lib\site-packages\pandas\core\indexes\base.py", line 2899, in get_loc
return self._engine.get_loc(self._maybe_cast_indexer(key))
File "pandas\_libs\index.pyx", line 107, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 131, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 1607, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 1614, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'Objective 1'
I was a machine learning beginner, and when I use python and follow the codes from google machine learning tutorial, some errors came up, and I am not sure what is going on.
# Step 1 - Set up and import necessary packages
from __future__ import print_function
import math
from IPython import display
from matplotlib import cm
from matplotlib import gridspec
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import metrics
import tensorflow as tf
from tensorflow.python.data import Dataset
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
pd.options.display.max_rows = 10
pd.options.display.float_format = '{:.1f}'.format
# Step 2 - Load our data
zerlite_13X_error = pd.read_csv("zerlite_13x_error.csv", sep=",")
# print(zerlite_13X_error.head()) # Load data done
# We will randomize data. just to be sure not to get any pathological ordering effects the
# performance of Stochastic Gradient Descent. And we first consider objective 1
zerlite_13X_error = zerlite_13X_error.reindex(
np.random.permutation(zerlite_13X_error.index))
# Define features and Configure columns
# Define features which are parameters 1 to parameters 8
def preprocess_features(zerlite_13X_error):
"""Prepares input features from zerlite_13X_error
Args:
zerlite_13X_error: A Pandas DataFrame expected to contain data
Return:
A DataFrame that contains the features to be used for the model.
including synthetic features
"""
selected_features = zerlite_13X_error[
["Parameter 1",
"Parameter 2",
"Parameter 3",
"Parameter 4",
"Parameter 5",
"Parameter 6",
"Parameter 7",
"Parameter 8"]]
processed_features = selected_features.copy()
# print(processed_features.head())
return processed_features
def preprocess_targets(zerlite_13X_error):
"""Prepares target features (i.e. labels) from zerlite_13X_error set
Args:
zerlite_13X_error: A Panda dataframe that was expected to contain data from
the zerolite_13X_error data set
Returns:
A dataframe that contains the target feature
"""
output_targets = pd.DataFrame()
# Create the output targets
output_targets["Objective 1"] = zerlite_13X_error["Objective 1"]
print(output_targets.head())
return output_targets
# For training Set, we will choose 14000 out of 20154 number, about 70% of data as training set
training_examples = preprocess_features(zerlite_13X_error.head(14000))
training_examples.describe()
print('-- Training Examples Describe --')
print(training_examples.describe())
training_targets = preprocess_targets(zerlite_13X_error.head(14000))
training_targets.describe()
print('-- Training Targets Describe --')
print(training_targets.describe())
# For Validation Set, we will choose 3000 examples, out of total 20154 examples
validation_examples = preprocess_features(zerlite_13X_error.iloc[14001:17001])
validation_examples.describe()
print('-- Validation Examples Describe --')
print(validation_examples.describe())
validation_targets = preprocess_targets(zerlite_13X_error.iloc[14001:17001])
validation_targets.describe()
print('-- Validation Targets Describe --')
print(validation_targets.describe())
# for Test Set, we will choose the last 3154 examples
test_examples = preprocess_features((zerlite_13X_error.tail(3154)))
test_examples.describe()
print('-- Test Examples Describe --')
print(test_examples.describe())
test_targets = preprocess_targets(zerlite_13X_error.tail(3154))
test_targets.describe()
print('-- Test Targets Describe --')
print(test_targets.describe())
# As we are now working with multiple features, modularize the code for configuring columns into a
# separate function
def construct_feature_columns(input_features):
"""Construct the TensorFlow columns:
Args:
input_features: The name of numerical input features to use
Returns:
A set of feature columns
"""
return set([tf.feature_column.numeric_column(my_feature)
for my_feature in input_features])
# Train and evaluate the model
def my_input_fn(features, targets, batch_size=1, shuffle=True, num_epochs=None):
"""Trains a linear regression model of multiple features
Args:
features: pandas DataFrame of features
targets: pandas DataFrame of targets
batch_size: Size of batches to be passed to the model
shuffle: True or False. Whether to shuffle the data
num_epochs: Number of epochs for which data should be repeated. None = Repeat indefinitely
Returns:
Tuple of (features, labels) for next data batch
"""
# Convert pandas data into a dict of np arrays
features = {key: np.array(value) for key, value in dict(features).items()}
# Construct a dataset, and configure batching/repeating
ds = Dataset.from_tensor_slices((features, targets)) # Warning: 2GB limit
ds = ds.batch(batch_size).repeat(num_epochs)
# Shuffle the data, if specified
if shuffle:
ds = ds.shuffle(10000)
# Return the next batch of data
features, labels = ds.make_one_shot_iterator().get_next()
return features, labels
# Now we will go creating a train model using neural network
def train_nn_regression_model(learning_rate, steps, batch_size, hidden_units,
training_examples, training_targets,
validation_examples, validation_targets):
"""Trains a neural network regression model of multiple features
In addition to training, this function also prints training progress information,
as well as plot of the training and validation loss over time
Args:
learning_rate: A 'float', the learning rate
steps: A non-zero 'int', the total number of training steps. A training step
consists of a forward and backward pass using a single batch.
batch_size: A non-zero 'int', the batch size.
hidden_size" A 'list' of int values, specifying the number of neurons in each layer
training_examples: A 'DataFrame' containing one or more columns from
'zerlite_13X_error' to use as input features for training
training_targets: A 'DataFrame' containing exactly one column from
'zerlite_13X_error' to use as target for training
validation_examples: A 'DataFrame' containing one or more columns from
'zerlite_13X_error' to use as input features for validation
validation_targets: A 'DataFrame' containing exactly one column from
'zerlite_13X_error' to use as target for validation
Returns:
A 'DNNRegressor' object trained on the training data.
"""
periods = 10
steps_per_period = steps / periods
# Create a DNNRegressor Object
my_optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
my_optimizer = tf.contrib.estimator.clip_gradients_by_norm(my_optimizer, 5.0)
dnn_regressor = tf.estimator.DNNRegressor(
feature_columns=construct_feature_columns(training_examples),
hidden_units=hidden_units,
optimizer=my_optimizer,
)
# Create input functions.
training_input_fn = lambda: my_input_fn(training_examples,
training_targets["Objective 1"],
batch_size=batch_size)
predict_training_input_fn = lambda: my_input_fn(training_examples,
training_targets["Objective 1"],
num_epochs=1,
shuffle=False)
predict_validation_input_fn = lambda: my_input_fn(validation_examples,
validation_targets["Objective 1"],
num_epochs=1,
shuffle=False)
# Train the model, but do so inside a loop so that we can periodically assess loss metrics
print("Training Models ............")
print("RMSE (on training data): ")
training_rmse = []
validation_rmse = []
for period in range(0, periods): # Python shows error occuring here
# Train the model, starting from the prior state
dnn_regressor.train(
input_fn=training_input_fn,
steps=steps_per_period)
# take a break and compute predictions
training_predictions = dnn_regressor.predict(input_fn=predict_training_input_fn)
training_predictions = np.array([item['predictions'][0] for item in training_predictions])
validation_predictions = dnn_regressor.predict(input_fn=predict_validation_input_fn)
validation_predictions = np.array([item['predictions'][0] for item in validation_predictions])
# Compute training and validation loss
training_root_mean_squared_error = math.sqrt(
metrics.mean_squared_error(training_predictions, training_targets))
validation_root_mean_squared_error = math.sqrt(
metrics.mean_squared_error(validation_predictions, validation_targets))
# Occasionally print the current loss
print(" period %02d: %02f" % (period, training_root_mean_squared_error))
# Add the loss metrics from this period to our list
training_rmse.append(training_root_mean_squared_error)
validation_rmse.append(validation_root_mean_squared_error)
print("Model training finished")
# Output a graph of loss metrics over periods
plt.ylabel("RMSE")
plt.xlabel("Periods")
plt.title("Root Mean Squared Error v.s. Periods")
plt.tight_layout()
plt.plot(training_rmse, label="training")
plt.plot(validation_rmse, label="validation")
plt.legend()
print("Final RMSE (on training data): %0.2f" % training_root_mean_squared_error)
print("Final RMSE (on validation data): %0.2f" % validation_root_mean_squared_error)
return dnn_regressor
# Train NN model
dnn_regressor = train_nn_regression_model(
learning_rate=0.1,
steps=5000,
batch_size=10,
hidden_units=[10, 2],
training_examples=training_examples,
training_targets=training_targets,
validation_examples=validation_examples,
validation_targets=validation_targets) # Python shows error here

Minimal DNNRegressor example with TensorFlow

I'm new to Python and TensorFlow and I'm trying to build a simple working example with fake data in TensorFlow. My goal is to use the DNNRegressor estimator to predict a real value from a multidimensional input. This is the code I wrote:
import pandas as pd
import tensorflow as tf
import numpy as np
# Amount of train samples
m_train = 1000
# Amount of test samples
m_test = 100
# Dimensions for each sample
n = 10
def from_dataset(ds):
return lambda: ds.make_one_shot_iterator().get_next()
# Create random samples with numpy
train_data = (np.random.sample((m_train,n)), np.random.sample((m_train,1)))
test_data = (np.random.sample((m_test,n)), np.random.sample((m_test,1)))
# Create two datasets, one for trainning and the other for testing
train_dataset = tf.data.Dataset.from_tensor_slices(train_data)
test_dataset = tf.data.Dataset.from_tensor_slices(test_data)
feature_columns = [tf.contrib.layers.real_valued_column("", dimension=n)]
model = tf.estimator.DNNRegressor(hidden_units=[20, 20], feature_columns=feature_columns)
# Train the model
model.train(input_fn=from_dataset(train_dataset), steps=1000)
# Evaluate the unseen samples
eval_result = model.evaluate(input_fn=from_dataset(test_dataset))
And this is the error I get:
$ python fake.py
WARNING:tensorflow:Using temporary folder as model directory: /tmp/tmp1j5irF
Traceback (most recent call last):
File "fake.py", line 28, in <module>
model.train(input_fn=from_dataset(train_dataset), steps=1000)
File "/usr/lib/python2.7/site-packages/tensorflow/python/estimator/estimator.py", line 314, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "/usr/lib/python2.7/site-packages/tensorflow/python/estimator/estimator.py", line 743, in _train_model
features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
File "/usr/lib/python2.7/site-packages/tensorflow/python/estimator/estimator.py", line 725, in _call_model_fn
model_fn_results = self._model_fn(features=features, **kwargs)
File "/usr/lib/python2.7/site-packages/tensorflow/python/estimator/canned/dnn.py", line 448, in _model_fn
config=config)
File "/usr/lib/python2.7/site-packages/tensorflow/python/estimator/canned/dnn.py", line 153, in _dnn_model_fn
'Given type: {}'.format(type(features)))
ValueError: features should be a dictionary of `Tensor`s. Given type: <class 'tensorflow.python.framework.ops.Tensor'>
I supose I have to use a dictionary of Tensors, but I'm just beginning in Python and I don't know how to do it.
You need to return the iterator returned by get_one(), rather than a lambda function that returns the iterator. Check out https://github.com/tensorflow/tensorflow/blob/r1.8/tensorflow/examples/get_started/regression/dnn_regression.py

Tensorflow stratified_sample error

I'm trying to use tf.contrib.training.stratified_sample in Tensorflow to balance classes. I made a quick example below to test it, drawing samples from two unbalanced classes in a balanced way and verifying it, but I'm getting an error.
import tensorflow as tf
from tensorflow.python.framework import ops
from tensorflow.python.framework import dtypes
batch_size = 10
data = ['a']*9990+['b']*10
labels = [1]*9990+[0]*10
data_tensor = ops.convert_to_tensor(data, dtype=dtypes.string)
label_tensor = ops.convert_to_tensor(labels)
target_probs = [0.5,0.5]
data_batch, label_batch = tf.contrib.training.stratified_sample(
data_tensor, label_tensor, target_probs, batch_size,
queue_capacity=2*batch_size)
with tf.Session() as sess:
d,l = sess.run(data_batch,label_batch)
print('percentage "a" = %.3f' % (np.sum(l)/len(l)))
The error I'm getting is:
Traceback (most recent call last):
File "/home/jason/code/scrap.py", line 56, in <module>
test_stratified_sample()
File "/home/jason/code/scrap.py", line 47, in test_stratified_sample
queue_capacity=2*batch_size)
File "/usr/local/lib/python3.4/dist-packages/tensorflow/contrib/training/python/training/sampling_ops.py", line 191, in stratified_sample
with ops.name_scope(name, 'stratified_sample', tensors + [labels]):
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/ops/math_ops.py", line 829, in binary_op_wrapper
y = ops.convert_to_tensor(y, dtype=x.dtype.base_dtype, name="y")
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/framework/ops.py", line 676, in convert_to_tensor
as_ref=False) File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/framework/ops.py", line 741, in internal_convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/framework/constant_op.py", line 113, in _constant_tensor_conversion_function
return constant(v, dtype=dtype, name=name)
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/framework/constant_op.py", line 102, in constant
tensor_util.make_tensor_proto(value, dtype=dtype, shape=shape, verify_shape=verify_shape))
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/framework/tensor_util.py", line 374, in make_tensor_proto
_AssertCompatible(values, dtype)
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/framework/tensor_util.py", line 302, in _AssertCompatible
(dtype.name, repr(mismatch), type(mismatch).__name__)) TypeError: Expected string, got list containing Tensors of type '_Message' instead.
The error doesn't explain what I'm doing wrong. I also tried putting the raw data and labels in (without converting to a tensor), as well as tried using tf.train.slice_input_producer to create an initial queue of the data and label tensors.
Has anyone gotten stratified_sample to work? I haven't been able to find any examples.
I've modified the code into something that works for me. Summary of the changes:
Use enqueue_many=True to enqueue a batch of examples with different labels. Otherwise it's expecting a single scalar label Tensor (which can be stochastic when evaluated by the queue runners).
The first argument is expected to be a list of Tensors. It should have a better error message (I think this is what you ran into). Please do send a pull request or open an issue on Github for a better error message.
Start queue runners. Otherwise code that uses queues will deadlock. Or use Estimators or MonitoredSession so you don't need to worry about this.
(Edit based on comments) stratified_sample does not shuffle the data, it just accepts/rejects! So if your data is not randomized, consider putting it through slice_input_producer (enqueue_many=False) or shuffle_batch (enqueue_many=True) before sampling if you want it to come out in a random order.
Modified code (improved based on Jason's comments):
import numpy
import tensorflow as tf
from tensorflow.python.framework import ops
from tensorflow.python.framework import dtypes
with tf.Graph().as_default():
batch_size = 100
data = ['a']*9000+['b']*1000
labels = [1]*9000+[0]*1000
data_tensor = ops.convert_to_tensor(data, dtype=dtypes.string)
label_tensor = ops.convert_to_tensor(labels, dtype=dtypes.int32)
shuffled_data, shuffled_labels = tf.train.slice_input_producer(
[data_tensor, label_tensor], shuffle=True, capacity=3*batch_size)
target_probs = numpy.array([0.5,0.5])
data_batch, label_batch = tf.contrib.training.stratified_sample(
[shuffled_data], shuffled_labels, target_probs, batch_size,
queue_capacity=2*batch_size)
with tf.Session() as session:
tf.local_variables_initializer().run()
tf.global_variables_initializer().run()
coordinator = tf.train.Coordinator()
tf.train.start_queue_runners(session, coord=coordinator)
num_iter = 10
sum_ones = 0.
for _ in range(num_iter):
d, l = session.run([data_batch, label_batch])
count_ones = l.sum()
sum_ones += float(count_ones)
print('percentage "a" = %.3f' % (float(count_ones) / len(l)))
print('Overall: {}'.format(sum_ones / (num_iter * batch_size)))
coordinator.request_stop()
coordinator.join()
Outputs:
percentage "a" = 0.480
percentage "a" = 0.440
percentage "a" = 0.580
percentage "a" = 0.570
percentage "a" = 0.580
percentage "a" = 0.520
percentage "a" = 0.480
percentage "a" = 0.460
percentage "a" = 0.390
percentage "a" = 0.530
Overall: 0.503

Categories

Resources