I am running the point cloud CNN by pointNet. I have it running well up until I try to predict on a very large dataset. This set has ~20k files. Now I know some may say multiprocessing, however I am a little newer and do not know how to do this with keras models. (I do have access to higher computing capabilities through a university) I also am wondering if there is a way to run this using GPU? Or even changing batch size. I am trying to create an end product where it predicts and I translate that to the string of the classes in the model. Does having more epochs above effect prediction time? Here is the portion of code where I am running this...
import os
import glob
from sklearn import metrics
import trimesh
import numpy as np
from matplotlib import pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import pandas as pd
#### Create folder for results
def createFolder(directory): # creates a folder in the working directory to save all files produced into the folder.
try:
if not os.path.exists(directory):
os.makedirs(directory)
except OSError:
print ('Error: Creating directory. ' + directory)
createFolder('MODEL_OUTCOMES')
#####################################################
########## Loading in data set from folder ##########
#####################################################
DATA_DIR ="/home/sat164/pytorch/Data_Set_Spaghetti"
num_epocs = 1 # number of epocs
def parse_dataset(num_points=7000):
train_points = []
train_labels = []
test_points = []
test_labels = []
class_map = {}
real_names = {}
folders = glob.glob(DATA_DIR + "/*")
for i, folder in enumerate(folders):
# Lets you know it Has found the Folder
pathname = os.path.basename(folder)
print(f"processing class: {pathname}")
# store folder name with ID by getting base name of folder
class_map[i] = os.path.basename(folder)
# real_names[i] = folder.split("/")[-1] # if you want the actual names
# Iterate through the data set to find Train and Test files
train_files = glob.glob(os.path.join(folder, "Train/*"))
test_files = glob.glob(os.path.join(folder, "Test/*"))
# Train data
for f in train_files:
a = np.loadtxt(f, dtype = float)
train_points.append(a)
train_labels.append(i)
# Test data
for f in test_files:
b = np.loadtxt(f, dtype = float)
test_points.append(b)
test_labels.append(i)
return (np.array(train_points),
np.array(test_points),
np.array(train_labels),
np.array(test_labels),
class_map,
)
NUM_POINTS = 7000 # How many points to sample
# -- Each has 7000 lines of x,y,z
# so must be 7000 unless increased in CoordinateSaveMe.py
NUM_CLASSES = 2 # binary (GPCR or not)
BATCH_SIZE = 16 # Smaller Batch sizes are less memory and potentially better generalization
train_points, test_points, train_labels, test_labels, CLASS_MAP = parse_dataset(
NUM_POINTS
)
# Augmentation function to jitter and shuffle the train dataset.
def augment(points, label):
# jitter points
points += tf.random.uniform(points.shape, -0.005, 0.005, dtype=tf.float64)
# shuffle points
points = tf.random.shuffle(points)
return points, label
# set the shuffle buffer size to the entire size of the dataset
# as prior to this the data is ordered by class
train_dataset = tf.data.Dataset.from_tensor_slices((train_points, train_labels))
test_dataset = tf.data.Dataset.from_tensor_slices((test_points, test_labels))
train_dataset = train_dataset.shuffle(len(train_points)).map(augment).batch(BATCH_SIZE)
test_dataset = test_dataset.shuffle(len(test_points)).batch(BATCH_SIZE)
print("\n\n\n********** DATA SET COMPLETE **********\n\n\n")
#####################################
########## Build the Model ##########
#####################################
def conv_bn(x, filters):
x = layers.Conv1D(filters, kernel_size=1, padding="valid")(x)
x = layers.BatchNormalization(momentum=0.00)(x)
return layers.Activation("relu")(x)
def dense_bn(x, filters):
x = layers.Dense(filters)(x)
x = layers.BatchNormalization(momentum=0.00)(x)
return layers.Activation("relu")(x)
### Point net uses Two components ###
# The primary MLP network, and the transformer net (T-net).
# The T-net aims to learn an affine transformation matrix by its own mini network.
# The T-net is used twice.
# The first time to transform the input features (n, 3) into a canonical representation.
# The second is an affine transformation for alignment in feature space (n, 3).
# As per the original paper we constrain the transformation to be close to an orthogonal matrix
# (i.e. ||X*X^T - I|| = 0).
class OrthogonalRegularizer(keras.regularizers.Regularizer):
def __init__(self, num_features, l2reg=0.001):
self.num_features = num_features
self.l2reg = l2reg
self.eye = tf.eye(num_features)
def __call__(self, x):
x = tf.reshape(x, (-1, self.num_features, self.num_features))
xxt = tf.tensordot(x, x, axes=(2, 2))
xxt = tf.reshape(xxt, (-1, self.num_features, self.num_features))
return tf.reduce_sum(self.l2reg * tf.square(xxt - self.eye))
### We can define a general function to build T-net layers ###
def tnet(inputs, num_features):
# Initalise bias as the indentity matrix
bias = keras.initializers.Constant(np.eye(num_features).flatten())
reg = OrthogonalRegularizer(num_features)
x = conv_bn(inputs, 32*2)
x = conv_bn(x, 64*2)
x = conv_bn(x, 512*2)
x = layers.GlobalMaxPooling1D()(x)
x = dense_bn(x, 256*2)
x = dense_bn(x, 128*2)
x = layers.Dense(
num_features * num_features,
kernel_initializer="zeros",
bias_initializer=bias,
activity_regularizer=reg,
)(x)
feat_T = layers.Reshape((num_features, num_features))(x)
# Apply affine transformation to input features
return layers.Dot(axes=(2, 1))([inputs, feat_T])
inputs = keras.Input(shape=(NUM_POINTS, 3))
x = tnet(inputs, 3)
x = conv_bn(x, 32)
x = conv_bn(x, 32)
x = tnet(x, 32)
x = conv_bn(x, 32)
x = conv_bn(x, 64)
x = conv_bn(x, 512)
x = layers.GlobalMaxPooling1D()(x)
x = dense_bn(x, 256)
x = layers.Dropout(0.3)(x)
x = dense_bn(x, 128)
x = layers.Dropout(0.3)(x)
outputs = layers.Dense(NUM_CLASSES, activation="softmax")(x)
model = keras.Model(inputs=inputs, outputs=outputs, name="GpcrHunter")
model.summary()
model.compile(
loss="sparse_categorical_crossentropy",
optimizer=keras.optimizers.Adam(learning_rate=0.001),
metrics=["sparse_categorical_accuracy"],)
history = model.fit(train_dataset, epochs=num_epocs, validation_data=test_dataset)
This is the Area I am struggling with.
import time
path = "/home/sat164/pytorch/predict_Whole/Test/spaghetti_whole_set"
folders = glob.glob(path + "/*.txt")
dataset_labels= []
testSet = []
for i in folders:
a = np.loadtxt(i, dtype = float)
testSet.append(a)
dataset_labels.append(os.path.basename(i))
testSet = np.array(testSet)
dataset_labels = np.array(dataset_labels)
test_dataset = tf.data.Dataset.from_tensor_slices((testSet, dataset_labels))
test_dataset = test_dataset.shuffle(len(testSet)).batch(10000)
t0= time.clock()
preds2 = model.predict(test_dataset)
preds2 = tf.math.argmax(preds2,-1)
prediction = []
for i in range(len(dataset_labels)):
prediction.append(CLASS_MAP[preds2[i].numpy()])
df = pd.DataFrame()
df["Prediction"] = prediction[0::1]
df["Real-label"] = dataset_labels[0::1]
df.to_excel('MODEL_OUTCOMES/Alpha_fold_results.xlsx', index = False)
t1 = time.clock() - t0
print("FINISHED.......Time elapsed: ", t1)
I have tried looking online for ways to do the prediction, it seems to work well with a small amount of files like 10-100 then it seems to really increase in time. It seems like it takes a long time to predict and I cannot figure out how to make the model multiprocess in a way. I have tried changing batch size but haven't noticed much difference, not sure if that even helps. I feel as though there is an easier way but I don’t fully understand how the models predict especially how to use just a frozen spot on the model that is good accuracy on the validation/training set.
THANKS
I followed the guide on https://stackabuse.com/time-series-prediction-using-lstm-with-pytorch-in-python/ in order to set up a LSTM model for predictions using torch.
Once imported the needed modules (torch, torch.nn, functional, optim, etc:) I set up my indexlist with quantities for 52 weeks.
idxlist = []
for x in range (0, len(inputslist)):
idxlist.append(a - datetime.timedelta(weeks = x))
in_data = pd.DataFrame(columns=['date','qty'])
in_data.date = idxlist
in_data.qty = inputslist
all_data = in_data['qty'].values.astype(float)
Then, I defined the test size and set up a scaler to normalize data:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(-1,1))
train_data_normalized = scaler.fit_transform(train_data.reshape(-1,1))
train_data_normalized = torch.FloatTensor(train_data_normalized).view(-1)
#train window
train_window = 12
Then I created sequences:
def create_inout_sequences(input_data,tw):
inout_seq = []
L = len(input_data) ##40
for i in range(L-tw): ##32 (se tw=8)
train_seq = input_data[i:i+tw]
train_label = input_data[i+tw:i+tw+1]
inout_seq.append((train_seq ,train_label))
return inout_seq
train_inout_seq = create_inout_sequences(train_data_normalized, train_window)
print(len(train_inout_seq)) #len(train) - train_w
And set up the Net class for the LSTM model. After optimizing with model=LSTM(), optim.Adam and model.eval(), I obtain the inverse transform with:
actual_predictions = scaler.inverse_transform(np.array(test_inputs[train_window:] ).reshape(-1, 1))
However, plotting the predicted values of last 12 weeks with the actual values I obtain:
Graph.
Why is this prediction so much below the actual values, even if periodic?
How can I approach this error and what part of my code might be causing this? I tried looking up existing issues and SO threads but they mostly point towards a failing custom loss-calculating layer but I am using Keras' built-in loss.
My code:
dimension = 300
n_neighbors = 10
n_unique_candidate_pos = 500 # Defines number of unique positions a candidate can take in a given invoice. Hyper-parameter
running_first_time = True
tf.keras.backend.set_floatx('float64')
tf.keras.backend.clear_session()
for index, row in data.iterrows():
print('Epoch {} started.'.format(index))
image_path = row.pngFileLoc # getting path to image
image_json = eval(row.json_file) # getting json
image = Image.open(PATH_TO_DATA_FOLDER + image_path)
# Getting actual class label
actual_invoice_date = row.InvDate
# 1. Getting candidates
candidate_gen = CandidateGenerator(image, row)
candidates = candidate_gen.generate_date()
for c in candidates:
'''
Feed each candidate through the model by icrementally training it.
'''
# 2. Getting neighbors
neighbors_gen = Neighbors(image, row, n_neighbors = n_neighbors)
neighbors = neighbors_gen.get_neighbors(c)
# 3. vectorizing the neighbors
vect = Vectorize(dimension=300)
neighbors_embedded = vect.vectorize(neighbors)
# 4. getting absolute candidate position
#absolute_cand_pos = get_absolute_cand_pos(c, image_json)
candidate_normalized_vertices = get_normalized_cand_vertices(c, image_json, image)
absolute_candidate_pos = neighbors_gen.get_centroid(candidate_normalized_vertices)
# 5. is already done
# 6. Maxpooling the neighbors
neighborhood_encoding = NeighborhoodEncoding()
neighbors_encoded = neighborhood_encoding.encode_neighbors(np.array(neighbors_embedded))
# Trainable layers start here
# 7. Embedding candidate absolute position
input_candidate_pos_layer = Input(shape=(2,), name='input_layer')
embedding_candidate_pos_layer = Embedding(input_dim = n_unique_candidate_pos, output_dim = dimension // 2, name='candidate_position_embedding_layer')(input_candidate_pos_layer)
#We will get [2, dimension/2] output above as we are using 2d co-ordinates so flatten it out into [dimension]
flatten_cand_pos_layer = Flatten(name = 'flatten_candidate_position_embedding_layer')(embedding_candidate_pos_layer)
# 8. Concatenate neighborhood encoding and candidate position embedding
sliced_flatten_cand_pos_layer = SliceLayer(index=0)(flatten_cand_pos_layer)
concat_neighbor_candidate = Concatenate(name='concat_neighbors_candpos_layer')([tf.convert_to_tensor(neighbors_encoded, dtype='float64'), sliced_flatten_cand_pos_layer]) #I honestly have no idea why it requires me to slice the tensor
#reshaping this
reshape_concat_neighbor = Reshape((1, ), input_shape=concat_neighbor_candidate.shape)(concat_neighbor_candidate)
transposed_reshape_concat_neighbor = TransposeLayer()(reshape_concat_neighbor)
# 9. Reduce dimensionality of candidate encoding
dense_dim_reduce_layer = Dense(units = dimension, activation = 'relu', name='dense_dim_reduc_layer')(transposed_reshape_concat_neighbor)
flatten_dense_dim_reduce_layer = Flatten(name='flatten_dense_dim_reduc_layer')(dense_dim_reduce_layer)
# 10. Compute cosine similarity between field_embedding and candidate_encoding and 11. do sigmoid
sliced_flatten_dense_dim_layer = SliceLayer(index=0)(flatten_dense_dim_reduce_layer)
cosine_sim_layer = CosineSimilarityLayer(name='cosine_sim_layer')(sliced_flatten_dense_dim_layer, field_embedded[0])
# 12. Compute loss
#y_pred = Output(name='output_layer')(tf.convert_to_tensor(cosine_sim_layer, dtype='float64'))
y_pred = Output(name='output_layer')(tf.convert_to_tensor([cosine_sim_layer], dtype='float64'))
y_actual = int(actual_invoice_date == c)
if running_first_time:
model = Model(inputs = input_candidate_pos_layer, outputs = y_pred)
model.compile(loss='binary_crossentropy')
running_first_time = False
print('model initialized successfully.')
print(model.summary())
model.fit([np.asarray([absolute_candidate_pos]).astype('float64'), np.asarray([y_actual]).astype('float64')])
What it does:
I am iterating through each image and for each image I generate a list of 'candidates' that might belong to the actual-class. Each image is an invoice and each candidate for a class 'Invoice Date' could be all of the dates that appear in the image.
Actual class-label is calculated dynamically being seeing if the candidate matches the human-annotated data.
Between each layer, I am applying some functions on the previous' layers output (as per this research paper). I made sure to not use any layers / tensorflow operations that might break backprop and this is what the summary of model after one iteration looks like this:
This is the raised error when i call model.fit():
ValueError: No gradients provided for any variable: ['candidate_position_embedding_layer/embeddings:0', 'dense_dim_reduc_layer/kernel:0', 'dense_dim_reduc_layer/bias:0'].
I am trying to classify 24 RGB images belonging to 2 classes. Each image was originally of dimension 400 X 400, but has been resized to 32 X 32 in the code. Iam using the metric-learning for image similarity search algorithm. However, I obtain the error " Error when checking input.....", when I run the line history = model.fit(AnchorPositivePairs(num_batchs=2), epochs=20) at the end of the program. What could be causing this error?
Here is my code!
import random
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from collections import defaultdict
from PIL import Image
from sklearn.metrics import ConfusionMatrixDisplay
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import utils
import glob
import os
import tqdm
IMG_DIR = "C:/Temp2/AGAIN_4/" # load the images from one directory
IM_WIDTH = 32
IM_HEIGHT = 32
#batch_size = 2
num_classes = 2
#epochs = 15
def read_images(directory, resize_to=(32, 32)): # extract image labels
"""
Reads images and labels from the given directory
:param directory directory from which to read the files
:param resize_to a tuple of width, height to resize the images
: returns a tuple of list of images and labels
"""
files = glob.glob(directory + "*.jpg")
images = []
labels = []
for f in tqdm.tqdm_notebook(files):
im = Image.open(f)
im = im.resize(resize_to)
im = np.array(im) / 255.0
im = im.astype("float32")
images.append(im)
label = 1 if 'microwave' in f.lower() else 0
labels.append(label)
return np.array(images), np.array(labels)
x, y = read_images(directory=IMG_DIR, resize_to=(IM_WIDTH, IM_HEIGHT))
# make sure we have 25000 images if we are reading the full data set.
# Change the number accordingly if you have created a subset
assert len(x) == len(y) == 24 #25000
from sklearn.model_selection import train_test_split # extract train and test data
x_train, x_test, y_train, y_test =train_test_split(x, y, test_size=0.25)
# remove X and y since we don't need them anymore
# otherwise it will just use the memory
del x
del y
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
# Display some of the images
height_width = 32
def show_collage(examples):
box_size = height_width + 2
num_rows, num_cols = examples.shape[:2]
collage = Image.new(
mode="RGB",
size=(num_cols * box_size, num_rows * box_size),
color=(250, 250, 250),
)
for row_idx in range(num_rows):
for col_idx in range(num_cols):
array = (np.array(examples[row_idx, col_idx]) * 255).astype(np.uint8)
collage.paste(
Image.fromarray(array), (col_idx * box_size, row_idx * box_size)
)
# Double size for visualisation.
collage = collage.resize((2 * num_cols * box_size, 2 * num_rows * box_size))
return collage
# Show a collage of 3x3 random images.
sample_idxs = np.random.randint(0, 15, size=(3, 3))
examples = x_train[sample_idxs]
show_collage(examples) # Displays 9 images
class_idx_to_train_idxs = defaultdict(list)
for y_train_idx, y in enumerate(y_train):
class_idx_to_train_idxs[y].append(y_train_idx)
class_idx_to_test_idxs = defaultdict(list)
for y_test_idx, y in enumerate(y_test):
class_idx_to_test_idxs[y].append(y_test_idx)
num_classes = 2
class AnchorPositivePairs(keras.utils.Sequence):
def __init__(self, num_batchs):
self.num_batchs = num_batchs
def __len__(self):
return self.num_batchs
def __getitem__(self, _idx):
x = np.empty((2, num_classes, height_width, height_width, 3), dtype=np.float32)
for class_idx in range(num_classes):
examples_for_class = class_idx_to_train_idxs[class_idx]
anchor_idx = random.choice(examples_for_class)
positive_idx = random.choice(examples_for_class)
while positive_idx == anchor_idx:
positive_idx = random.choice(examples_for_class)
x[0, class_idx] = x_train[anchor_idx]
x[1, class_idx] = x_train[positive_idx]
return x
examples = next(iter(AnchorPositivePairs(num_batchs=1)))
show_collage(examples)
class EmbeddingModel(keras.Model):
def train_step(self, data):
# Note: Workaround for open issue, to be removed.
if isinstance(data, tuple):
data = data[0]
anchors, positives = data[0], data[1]
with tf.GradientTape() as tape:
# Run both anchors and positives through model.
anchor_embeddings = self(anchors, training=True)
positive_embeddings = self(positives, training=True)
# Calculate cosine similarity between anchors and positives. As they have
# been normalised this is just the pair wise dot products.
similarities = tf.einsum(
"ae,pe->ap", anchor_embeddings, positive_embeddings
)
# Since we intend to use these as logits we scale them by a temperature.
# This value would normally be chosen as a hyper parameter.
temperature = 0.2
similarities /= temperature
# We use these similarities as logits for a softmax. The labels for
# this call are just the sequence [0, 1, 2, ..., num_classes] since we
# want the main diagonal values, which correspond to the anchor/positive
# pairs, to be high. This loss will move embeddings for the
# anchor/positive pairs together and move all other pairs apart.
sparse_labels = tf.range(num_classes)
loss = self.compiled_loss(sparse_labels, similarities)
# Calculate gradients and apply via optimizer.
gradients = tape.gradient(loss, self.trainable_variables)
self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
# Update and return metrics (specifically the one for the loss value).
self.compiled_metrics.update_state(sparse_labels, similarities)
return {m.name: m.result() for m in self.metrics}
inputs = layers.Input(shape=(height_width, height_width, 3))
#inputs = layers.Input(shape=(32, 32, 3))
x = layers.Conv2D(filters=32, kernel_size=3, strides=2, activation="relu")(inputs)
x = layers.Conv2D(filters=64, kernel_size=3, strides=2, activation="relu")(x)
x = layers.Conv2D(filters=128, kernel_size=3, strides=2, activation="relu")(x)
x = layers.GlobalAveragePooling2D()(x)
embeddings = layers.Dense(units=8, activation=None)(x)
embeddings = tf.nn.l2_normalize(embeddings, axis=-1)
model = EmbeddingModel(inputs, embeddings)
model.summary()
model.compile(
optimizer=keras.optimizers.Adam(learning_rate=1e-3),
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
)
history = model.fit(AnchorPositivePairs(num_batchs=2), epochs=20)
plt.plot(history.history["loss"])
plt.show()
I have used the cifar10 dataset as input instead of my local directory images as shown in the next code, but I still get the same error.
import random
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from collections import defaultdict
from PIL import Image
from sklearn.metrics import ConfusionMatrixDisplay
from tensorflow import keras
from tensorflow.keras import layers
"""
## Dataset
For this example we will be using the
[CIFAR-10](https://www.cs.toronto.edu/~kriz/cifar.html) dataset.
"""
from tensorflow.keras.datasets import cifar10
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train = x_train.astype("float32") / 255.0
y_train = np.squeeze(y_train)
x_test = x_test.astype("float32") / 255.0
y_test = np.squeeze(y_test)
"""
To get a sense of the dataset we can visualise a grid of 25 random examples.
"""
height_width = 32
def show_collage(examples):
box_size = height_width + 2
num_rows, num_cols = examples.shape[:2]
collage = Image.new(
mode="RGB",
size=(num_cols * box_size, num_rows * box_size),
color=(250, 250, 250),
)
for row_idx in range(num_rows):
for col_idx in range(num_cols):
array = (np.array(examples[row_idx, col_idx]) * 255).astype(np.uint8)
collage.paste(
Image.fromarray(array), (col_idx * box_size, row_idx * box_size)
)
# Double size for visualisation.
collage = collage.resize((2 * num_cols * box_size, 2 * num_rows * box_size))
return collage
# Show a collage of 5x5 random images.
sample_idxs = np.random.randint(0, 50000, size=(5, 5))
examples = x_train[sample_idxs]
show_collage(examples)
"""
Metric learning provides training data not as explicit `(X, y)` pairs but instead uses
multiple instances that are related in the way we want to express similarity. In our
example we will use instances of the same class to represent similarity; a single
training instance will not be one image, but a pair of images of the same class. When
referring to the images in this pair we'll use the common metric learning names of the
`anchor` (a randomly chosen image) and the `positive` (another randomly chosen image of
the same class).
To facilitate this we need to build a form of lookup that maps from classes to the
instances of that class. When generating data for training we will sample from this
lookup.
"""
class_idx_to_train_idxs = defaultdict(list)
for y_train_idx, y in enumerate(y_train):
class_idx_to_train_idxs[y].append(y_train_idx)
class_idx_to_test_idxs = defaultdict(list)
for y_test_idx, y in enumerate(y_test):
class_idx_to_test_idxs[y].append(y_test_idx)
"""
For this example we are using the simplest approach to training; a batch will consist of
`(anchor, positive)` pairs spread across the classes. The goal of learning will be to
move the anchor and positive pairs closer together and further away from other instances
in the batch. In this case the batch size will be dictated by the number of classes; for
CIFAR-10 this is 10.
"""
num_classes = 10
class AnchorPositivePairs(keras.utils.Sequence):
def __init__(self, num_batchs):
self.num_batchs = num_batchs
def __len__(self):
return self.num_batchs
def __getitem__(self, _idx):
x = np.empty((2, num_classes, height_width, height_width, 3), dtype=np.float32)
for class_idx in range(num_classes):
examples_for_class = class_idx_to_train_idxs[class_idx]
anchor_idx = random.choice(examples_for_class)
positive_idx = random.choice(examples_for_class)
while positive_idx == anchor_idx:
positive_idx = random.choice(examples_for_class)
x[0, class_idx] = x_train[anchor_idx]
x[1, class_idx] = x_train[positive_idx]
return x
"""
We can visualise a batch in another collage. The top row shows randomly chosen anchors
from the 10 classes, the bottom row shows the corresponding 10 positives.
"""
examples = next(iter(AnchorPositivePairs(num_batchs=1)))
show_collage(examples)
"""
## Embedding model
We define a custom model with a `train_step` that first embeds both anchors and positives
and then uses their pairwise dot products as logits for a softmax.
"""
class EmbeddingModel(keras.Model):
def train_step(self, data):
# Note: Workaround for open issue, to be removed.
if isinstance(data, tuple):
data = data[0]
anchors, positives = data[0], data[1]
with tf.GradientTape() as tape:
# Run both anchors and positives through model.
anchor_embeddings = self(anchors, training=True)
positive_embeddings = self(positives, training=True)
# Calculate cosine similarity between anchors and positives. As they have
# been normalised this is just the pair wise dot products.
similarities = tf.einsum(
"ae,pe->ap", anchor_embeddings, positive_embeddings
)
# Since we intend to use these as logits we scale them by a temperature.
# This value would normally be chosen as a hyper parameter.
temperature = 0.2
similarities /= temperature
# We use these similarities as logits for a softmax. The labels for
# this call are just the sequence [0, 1, 2, ..., num_classes] since we
# want the main diagonal values, which correspond to the anchor/positive
# pairs, to be high. This loss will move embeddings for the
# anchor/positive pairs together and move all other pairs apart.
sparse_labels = tf.range(num_classes)
loss = self.compiled_loss(sparse_labels, similarities)
# Calculate gradients and apply via optimizer.
gradients = tape.gradient(loss, self.trainable_variables)
self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
# Update and return metrics (specifically the one for the loss value).
self.compiled_metrics.update_state(sparse_labels, similarities)
return {m.name: m.result() for m in self.metrics}
"""
Next we describe the architecture that maps from an image to an embedding. This model
simply consists of a sequence of 2d convolutions followed by global pooling with a final
linear projection to an embedding space. As is common in metric learning we normalise the
embeddings so that we can use simple dot products to measure similarity. For simplicity
this model is intentionally small.
"""
inputs = layers.Input(shape=(height_width, height_width, 3))
x = layers.Conv2D(filters=32, kernel_size=3, strides=2, activation="relu")(inputs)
x = layers.Conv2D(filters=64, kernel_size=3, strides=2, activation="relu")(x)
x = layers.Conv2D(filters=128, kernel_size=3, strides=2, activation="relu")(x)
x = layers.GlobalAveragePooling2D()(x)
embeddings = layers.Dense(units=8, activation=None)(x)
embeddings = tf.nn.l2_normalize(embeddings, axis=-1)
model = EmbeddingModel(inputs, embeddings)
"""
Finally we run the training. On a Google Colab GPU instance this takes about a minute.
"""
model.compile(
optimizer=keras.optimizers.Adam(learning_rate=1e-3),
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
)
history = model.fit(AnchorPositivePairs(num_batchs=1000), epochs=20)
plt.plot(history.history["loss"])
plt.show()
"""
## Testing
We can review the quality of this model by applying it to the test set and considering
near neighbours in the embedding space.
First we embed the test set and calculate all near neighbours. Recall that since the
embeddings are unit length we can calculate cosine similarity via dot products.
"""
near_neighbours_per_example = 10
embeddings = model.predict(x_test)
gram_matrix = np.einsum("ae,be->ab", embeddings, embeddings)
near_neighbours = np.argsort(gram_matrix.T)[:, -(near_neighbours_per_example + 1) :]
"""
As a visual check of these embeddings we can build a collage of the near neighbours for 5
random examples. The first column of the image below is a randomly selected image, the
following 10 columns show the nearest neighbours in order of similarity.
"""
num_collage_examples = 5
examples = np.empty(
(
num_collage_examples,
near_neighbours_per_example + 1,
height_width,
height_width,
3,
),
dtype=np.float32,
)
for row_idx in range(num_collage_examples):
examples[row_idx, 0] = x_test[row_idx]
anchor_near_neighbours = reversed(near_neighbours[row_idx][:-1])
for col_idx, nn_idx in enumerate(anchor_near_neighbours):
examples[row_idx, col_idx + 1] = x_test[nn_idx]
show_collage(examples)
"""
We can also get a quantified view of the performance by considering the correctness of
near neighbours in terms of a confusion matrix.
Let us sample 10 examples from each of the 10 classes and consider their near neighbours
as a form of prediction; that is, does the example and its near neighbours share the same
class?
We observe that each animal class does generally well, and is confused the most with the
other animal classes. The vehicle classes follow the same pattern.
"""
confusion_matrix = np.zeros((num_classes, num_classes))
# For each class.
for class_idx in range(num_classes):
# Consider 10 examples.
example_idxs = class_idx_to_test_idxs[class_idx][:10]
for y_test_idx in example_idxs:
# And count the classes of its near neighbours.
for nn_idx in near_neighbours[y_test_idx][:-1]:
nn_class_idx = y_test[nn_idx]
confusion_matrix[class_idx, nn_class_idx] += 1
# Display a confusion matrix.
labels = [
"Airplane",
"Automobile",
"Bird",
"Cat",
"Deer",
"Dog",
"Frog",
"Horse",
"Ship",
"Truck",
]
disp = ConfusionMatrixDisplay(confusion_matrix=confusion_matrix, display_labels=labels)
disp.plot(include_values=True, cmap="viridis", ax=None, xticks_rotation="vertical")
plt.show()
ValueError: Error when checking input: expected input_16 to have 4 dimensions, but got array with shape (None, None, None, None, None)
I m not sure but as i understand it your own data generator cause this error. You should also pass to try your data size in generator, try this:
def __len__(self):
if self.batch_size > self.X.shape[0]:
print("Batch size is greater than data size!!")
return -1
return int(np.floor(self.X.shape[0] / self.batch_size))
I'm struggling with solving this issue and I believe it is due to my data. I'm thinking about this as a few to many regression problem, but there could be a better approach in tensorflow.
Training Data
I have some data generated from a video sequence. For each frame of video I have a distribution of x,y positions for each cluster. There are 157,110 frames and 200,000 clusters. The frames and clusters are the inputs, which are integers and I think could be considered labels (I'll be using another network to learn the sequences of clusters later on). As each histogram is related to both a frame and clusterID, the input is not "one hot". The histograms (outputs) have 19+8 (x+y) bins where each count is rarely above 10, and could be normalized.
A subset of the training data is available here: The first two columns are the frame and clusterID (inputs) and the remaining 19+8 columns are the histograms (outputs).
What is the best network to learn to generate the appropriate histogram for a given frame/clusterID pair?
The following code is my current attempt using an MLP. It does not converge; in fact cost does not decrease at all. Is there something wrong in my implementation, or my choice of MLP, or a lack of scaling in my input data?
#!/usr/bin/python
# This program uses tensorflow to learn cluster probabilities and associate them with frame and cluster IDs
# Arguments
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("clusterProbabilityfile", help="CSV file containing cluster probabilities")
parser.add_argument("trainingIterations", type=int, help="CSV file containing cluster probabilities")
args = parser.parse_args()
# Imports for ML
import tensorflow as tf
import numpy as np
from tensorflow.python.framework import dtypes
# Imports for loading CSV file
from tensorflow.python.platform import gfile
import csv
# Global vars
numInputUnits = 2;
numOutputUnits = 19+8
numHiddenUnits = (numOutputUnits-numInputUnits)/2
workingDirectory = args.clusterProbabilityfile.split('/')[0]+"/"
columnSplit = 2 # Column number that splits
# Shuffle training set
def shuffleTrainingSet(trainingSet):
trainingIndecies = np.arange(len(trainingSet.data)) # assumes len(data) == len(target)
np.random.shuffle(trainingIndecies) # shuffle indecies
data = trainingSet.data[trainingIndecies]
target = trainingSet.target[trainingIndecies]
training_set = tf.contrib.learn.datasets.base.Dataset(data=data, target=target)
return training_set
# Load training data from CSV file, convert to numpy arrays and construct Dataset
# Modified from tf.contrib.learn.datasets.base.load_csv_without_header
# Should these be randomized???
with gfile.Open(args.clusterProbabilityfile) as csv_file:
data_file = csv.reader(csv_file)
data, target = [], []
for row in data_file:
target.append(row[columnSplit+1:]) # All elements past the split column.
data.append(row[:columnSplit]) # All elements before and including the split column.
target = np.array(target, dtype=int)
data = np.array(data, dtype=int)
training_set = tf.contrib.learn.datasets.base.Dataset(data=data, target=target)
training_set = shuffleTrainingSet(training_set)
# Construct computation graph
# MLP approach (from https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3_NeuralNetworks/multilayer_perceptron.py)
# Single hidden layer!
inputVec = tf.placeholder(tf.float32, [None, numInputUnits])
outputVec = tf.placeholder(tf.float32, [None, numOutputUnits])
# Weights
hiddenWeights = tf.Variable(tf.random_normal([numInputUnits, numHiddenUnits])) # inputUnits -> hiddenUnits
outputWeights = tf.Variable(tf.random_normal([numHiddenUnits, numOutputUnits])) # hiddenUnits -> outputUnits
# Biases
hiddenBiases = tf.Variable(tf.random_normal([numHiddenUnits]))
outputBiases = tf.Variable(tf.random_normal([numOutputUnits]))
# Contruct MLP from layers
hiddenLayer = tf.add(tf.matmul(inputVec, hiddenWeights), hiddenBiases) # input * weight + bias = hidden
hiddenLayer = tf.nn.relu(hiddenLayer) # RELU Activation function for hidden layer.
outputLayer = tf.add(tf.matmul(hiddenLayer, outputWeights), outputBiases) # hidden * weight + bias = output
# loss and optimizer
#cross_entropy = -(outputVec * tf.log(outputLayer) + (1 - outputVec) * tf.log(1 - outputLayer))
#cost = tf.reduce_mean(cross_entropy)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(outputLayer, outputVec))
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost)
# Compute graph
sess = tf.Session()
sess.run(tf.initialize_all_variables())
for epoch in range(args.trainingIterations):
training_set = shuffleTrainingSet(training_set) # Reshuffle for each epoch.
epochCost = sess.run(cost, feed_dict={inputVec: training_set.data, outputVec: training_set.target})
print("{:d}\t{:f}".format(epoch, epochCost))
# Evaluate model
correct_prediction = tf.equal(tf.argmax(outputLayer,1), tf.argmax(outputVec,1)) # compare output layer with target output vector.
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print("Cost:", sess.run(cost,feed_dict={inputVec: training_set.data, outputVec: training_set.target}))
print("Accuracy:", sess.run(accuracy,feed_dict={inputVec: training_set.data, outputVec: training_set.target}))