I am using the Tensorflow image classification example (https://www.tensorflow.org/versions/r0.9/tutorials/image_recognition/index.html).
How could I classify multiple images at a time?
EDIT: Ideally, I would just pass in one image and a number (nb) as arguments, and then make the input-to-be-classified nb iterations of that image
The file is classify_image.py, and the important portion is:
def run_inference_on_image(image):
"""Runs inference on an image.
Args:
image: Image file name.
Returns:
Nothing
"""
if not tf.gfile.Exists(image):
tf.logging.fatal('File does not exist %s', image)
image_data = tf.gfile.FastGFile(image, 'rb').read()
# Creates graph from saved GraphDef.
create_graph()
with tf.Session() as sess:
# Some useful tensors:
# 'softmax:0': A tensor containing the normalized prediction across
# 1000 labels.
# 'pool_3:0': A tensor containing the next-to-last layer containing 2048
# float description of the image.
# 'DecodeJpeg/contents:0': A tensor containing a string providing JPEG
# encoding of the image.
# Runs the softmax tensor by feeding the image_data as input to the graph.
softmax_tensor = sess.graph.get_tensor_by_name('softmax:0')
predictions = sess.run(softmax_tensor,
{'DecodeJpeg/contents:0': image_data})
predictions = np.squeeze(predictions)
# Creates node ID --> English string lookup.
node_lookup = NodeLookup()
top_k = predictions.argsort()[-FLAGS.num_top_predictions:][::-1]
for node_id in top_k:
human_string = node_lookup.id_to_string(node_id)
score = predictions[node_id]
print('%s (score = %.5f)' % (human_string, score))
def main(_):
maybe_download_and_extract()
image = (FLAGS.image_file if FLAGS.image_file else
os.path.join(FLAGS.model_dir, 'cropped_panda.jpg'))
run_inference_on_image(image)
The code relevant to you would be this section:
def main(_):
maybe_download_and_extract()
image = (FLAGS.image_file if FLAGS.image_file else
os.path.join(FLAGS.model_dir, 'cropped_panda.jpg'))
run_inference_on_image(image)
In order to have predictions for all the png, jpeg or jpg files in a "images" folder, you could do this:
def main(_):
maybe_download_and_extract()
# search for files in 'images' dir
files_dir = os.getcwd() + '/images'
files = os.listdir(files_dir)
# loop over files, print prediction if it is an image
for f in files:
if f.lower().endswith(('.png', '.jpg', '.jpeg')):
image_path = files_dir + '/' + f
print run_inference_on_image(image_path)
This should print out the predictions for all your images in that folder
Related
I'm using this tutorial from the keras website: Image segmentation with a U-Net-like architecture. The tutorial runs well, so I want to adapt it for my own use. I have my own data (250x250 images and masks, while the dataset provided is 160x160), categorized in it's own archives. When I try to run it, i get this error:
Node: 'sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits'
logits and labels must have the same first dimension, got logits shape [2097152,2] and labels shape [2000000]
The architecture is the same as the link provided, I just modified how it search the images (because I have a file structure). So here's what I modified:
target_dir = "IA_training_data_final/Toy_mask/"
img_size = (250, 250)
class_list = os.listdir(input_dir)
num_classes = len(class_list)
target_classes = list(range(num_classes))
batch_size = 32
input_img_number = 0
target_img_number = 0
input_img_paths = list()
target_img_paths = list()
val_percent = 0.10
for subdir, dirs, files in os.walk(input_dir):
for file in files:
input_img_number += 1
input_path = os.path.join(subdir,file)
input_img_paths.append(input_path)
input_img_paths = sorted(input_img_paths)
for subdir, dirs, files in os.walk(target_dir):
for file in files:
target_img_number += 1
target_path = os.path.join(subdir,file)
target_img_paths.append(target_path)
target_img_paths = sorted(target_img_paths)
print("Number of samples:", input_img_number)
print("Number of masks:", target_img_number)
Any idea what I'm missing?
I did not try the tutorial with your modification, but from a first look you have a mismatch between the format of your labels and what the model expects (logits format is 2D, meaning it outputs a one-hot-encoded format when your labels are 1D). Try using categorical_crossentropy instead of sparse_categorical_crossentropy.
Or you can change your label format with utils.to_categorical and utils.to_ordinal
the following code is copied from :
https://www.tensorflow.org/tutorials/load_data/images
the code aims to create dataset of images downloaded from the web and stored into folders depending upon their classes, please do refer to the link above for the whole context!
list_ds = tf.data.Dataset.list_files(str(data_dir/'*/*'))
for f in list_ds.take(5):
print(f.numpy())
def get_label(file_path):
# convert the path to a list of path components
parts = tf.strings.split(file_path, os.path.sep)
# The second to last is the class-directory
return parts[-2] == CLASS_NAMES
def decode_img(img):
# convert the compressed string to a 3D uint8 tensor
img = tf.image.decode_jpeg(img, channels=3)
# Use `convert_image_dtype` to convert to floats in the [0,1] range.
img = tf.image.convert_image_dtype(img, tf.float32)
# resize the image to the desired size.
return tf.image.resize(img, [IMG_WIDTH, IMG_HEIGHT])
def process_path(file_path):
label = get_label(file_path)
# load the raw data from the file as a string
img = tf.io.read_file(file_path)
img = decode_img(img)
return img, label
# Set `num_parallel_calls` so multiple images are loaded/processed in parallel.
labeled_ds = list_ds.map(process_path, num_parallel_calls=AUTOTUNE)
for image, label in labeled_ds.take(1):
print("Image shape: ", image.numpy().shape)
print("Label: ", label.numpy())
def prepare_for_training(ds, cache=True, shuffle_buffer_size=1000):
# This is a small dataset, only load it once, and keep it in memory.
# use `.cache(filename)` to cache preprocessing work for datasets that don't
# fit in memory.
if cache:
if isinstance(cache, str):
ds = ds.cache(cache)
else:
ds = ds.cache()
ds = ds.shuffle(buffer_size=shuffle_buffer_size)
# Repeat forever
ds = ds.repeat()
ds = ds.batch(BATCH_SIZE)
# `prefetch` lets the dataset fetch batches in the background while the model
# is training.
ds = ds.prefetch(buffer_size=AUTOTUNE)
return ds
train_ds = prepare_for_training(labeled_ds)
we are finally left with train_ds that is a PreffetchDataset object and contains the entire dataset of images, labels!
How to split train_ds into train, test & validation sets to feed it into a model?
After the ds.repeat() call the dataset is infinite and splitting an infinte dataset doesn't work very well. Therefore you should split it before the prepare_training() call. Like this:
labeled_ds = list_ds.map(process_path, num_parallel_calls=AUTOTUNE)
labeled_ds = labeled_ds.shuffle(10000).batch(BATCH_SIZE)
# Size of dataset
n = sum(1 for _ in labeled_ds)
n_train = int(n * 0.8)
n_valid = int(n * 0.1)
n_test = n - n_train - n_valid
train_ds = labeled_ds.take(n_train)
valid_ds = labeled_ds.skip(n_train).take(n_valid)
test_ds = labeled_ds.skip(n_train + n_valid).take(n_test)
The line n = sum(1 for _ in labeled_ds) iterates through the dataset once to get its size, then it is 3-way split into 80%/10%/10%.
I have the following code:
imagepaths = tf.convert_to_tensor(imagepaths, dtype=tf.string)
labels = tf.convert_to_tensor(labels, dtype=tf.int32)
# Build a TF Queue, shuffle data
image, label = tf.data.Dataset.from_tensor_slices((imagepaths, labels))
and am getting the following error:
image, label = tf.data.Dataset.from_tensor_slices((imagepaths, labels))
ValueError: too many values to unpack (expected 2)
Shouldn't Dataset.from_tensor_slices see this as the length of the tensor, not the number of inputs? How can I fix this issue or combine the data tensors into the same variable more effectively?
Just for reference:
There are 1800 imagepaths and 1800 labels corresponding to each other. And to be clear, the imagepaths are paths to the files where the jpgs images are located. My goal after this is to shuffle the data set and build the neural network model.
That code is right here:
# Read images from disk
image = tf.read_file(image)
image = tf.image.decode_jpeg(image, channels=CHANNELS)
# Resize images to a common size
image = tf.image.resize_images(image, [IMG_HEIGHT, IMG_WIDTH])
# Normalize
image = image * 1.0/127.5 - 1.0
# Create batches
X, Y = tf.train.batch([image, label], batch_size=batch_size,
capacity=batch_size * 8,
num_threads=4)
try to do this:
def transform(entry):
img = entry[0]
lbl = entry[1]
return img, lbl
raw_data = list(zip(imagepaths, labels))
dataset = tf.data.Dataset.from_tensor_slices(raw_data)
dataset = dataset.map(transform)
and if you want to have a look at your dataset you can do it like this:
for e in dataset.take(1):
print(e)
you can add multiple map functions and you can after that use shuffle and batch on your dataset to prepare it for training ;)
I'm trying to use the code from a public repository to train an kNN model with a set of images. It originally works processing the similarity of all the images between the cluster. But I'd like to use a new image (not included in the model) and get the most similar images from the original cluster.
This is the code to train the original kNN
for f in os.listdir(path):
# Process filename
filename = os.path.splitext(f) # filename in directory
filename_full = os.path.join(path,f) # full path filename
head, ext = filename[0], filename[1]
if ext.lower() not in [".jpg", ".jpeg"]:
continue
# Read image file
img = image.load_img(filename_full, target_size=(224, 224)) #
load
imgs.append(np.array(img)) # image
filename_heads.append(head) # filename head
# Pre-process for model input
img = process_image(img)
features = model.predict(img).flatten() # features
eX.append(features) # append feature extractor
filename_heads.append(head)
X = np.array(eX) # feature vectors
imgs = np.array(imgs) # images
n_neighbours = 5 + 1
knn = kNN() # kNN model
knn.compile(n_neighbors=n_neighbours, algorithm="brute", metric="cosine")
knn.fit(X)
This is my code to query a new image and find similar ones in the original cluster
#previously I read the image from an url and put it in img variable
img = image.load_img('db/temp.jpg', target_size=(224, 224)) # load
img = image.img_to_array(img) # convert to array
img = np.expand_dims(img, axis=0)
img = preprocess_input(img)
img_features = model.predict(img).flatten() # features
distances, indices = knn.predict(img_features)
The problem is that I get a "IndexError: tuple index out of range
" error when I run knn.predict(new_img_features). I've already looked at the shape and type of the img_features and they're all the same, so I don't really know why this error appears. Maybe the error is because the kNN used here is not a classifier, but I don't know how to adapt it in order to work.
Full code link just in case you want to check it out.
The problem was that I had to pass the matrix this way:
distances, indices = knn.predict(np.array([img_features]))
I was playing around with Tensorflow for image classification. I used the image_retraining/retrain.py to retrain the inception library with new categories and used it to classify images using label_image.py from https://github.com/llSourcell/tensorflow_image_classifier/blob/master/src/label_image.py as below:
import tensorflow as tf
import sys
# change this as you see fit
image_path = sys.argv[1]
# Read in the image_data
image_data = tf.gfile.FastGFile(image_path, 'rb').read()
# Loads label file, strips off carriage return
label_lines = [line.rstrip() for line
in tf.gfile.GFile("/root/tf_files/output_labels.txt")]
# Unpersists graph from file
with tf.gfile.FastGFile("/root/tf_files/output_graph.pb", 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
_ = tf.import_graph_def(graph_def, name='')
with tf.Session() as sess:
# Feed the image_data as input to the graph and get first prediction
softmax_tensor = sess.graph.get_tensor_by_name('final_result:0')
#predictions = sess.run(softmax_tensor,{'DecodeJpeg/contents:0': image_data})
predictions = sess.run(softmax_tensor,{'DecodePng/contents:0': image_data})
# Sort to show labels of first prediction in order of confidence
top_k = predictions[0].argsort()[-len(predictions[0]):][::-1]
for node_id in top_k:
human_string = label_lines[node_id]
score = predictions[0][node_id]
print('%s (score = %.5f)' % (human_string, score))
I noticed two issues. When I retrain with new categories, it only trains JPG images. I am a noob in machine learning so not sure whether this is a limitation or is it possible to train other extension images like PNG, GIF?
Another one is when classifying the images the input is again only for JPG. I tried to change DecodeJpeg to DecodePng in label_image.py above but couldn't work. Another way I tried was to convert other formats into JPG before passing them in for classification like:
im = Image.open('/root/Desktop/200_s.gif').convert('RGB')
im.save('/root/Desktop/test.jpg', "JPEG")
image_path1 = '/root/Desktop/test.jpg'
Is there any other way to do this? Does Tensorflow have functions to handle other image formats other than JPG?
I tried the following by feeding in parsed image as compared to JPEG as suggested by #mrry
import tensorflow as tf
import sys
import numpy as np
from PIL import Image
# change this as you see fit
image_path = sys.argv[1]
# Read in the image_data
image_data = tf.gfile.FastGFile(image_path, 'rb').read()
image = Image.open(image_path)
image_array = np.array(image)[:,:,0:3] # Select RGB channels only.
# Loads label file, strips off carriage return
label_lines = [line.rstrip() for line
in tf.gfile.GFile("/root/tf_files/output_labels.txt")]
# Unpersists graph from file
with tf.gfile.FastGFile("/root/tf_files/output_graph.pb", 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
_ = tf.import_graph_def(graph_def, name='')
with tf.Session() as sess:
# Feed the image_data as input to the graph and get first prediction
softmax_tensor = sess.graph.get_tensor_by_name('final_result:0')
predictions = sess.run(softmax_tensor,{'DecodeJpeg:0': image_array})
# Sort to show labels of first prediction in order of confidence
top_k = predictions[0].argsort()[-len(predictions[0]):][::-1]
for node_id in top_k:
human_string = label_lines[node_id]
score = predictions[0][node_id]
print('%s (score = %.5f)' % (human_string, score))
It works for JPEG images but when I use PNG or GIF it throws
Traceback (most recent call last):
File "label_image.py", line 17, in <module>
image_array = np.array(image)[:,:,0:3] # Select RGB channels only.
IndexError: too many indices for array
The model can only train on (and evaluate) JPEG images, because the GraphDef that you've saved in /root/tf_files/output_graph.pb only contains a tf.image.decode_jpeg() op, and uses the output of that op for making predictions. There are at least a couple of options for using other image formats:
Feed in parsed images rather than JPEG data. In the current program, you feed in a JPEG-encoded image as a string value for the tensor "DecodeJpeg/contents:0". Instead, you can feed in a 3-D array of decoded image data for the tensor "DecodeJpeg:0" (which represents the output of the tf.image.decode_jpeg() op), and you can use NumPy, PIL, or some other Python library to create this array.
Remap the image input in tf.import_graph_def(). The tf.import_graph_def() function enables you to connect two different graphs together by remapping individual tensor values. For example, you could do something like the following to add a new image-processing op to the existing graph:
image_string_input = tf.placeholder(tf.string)
image_decoded = tf.image.decode_png(image_string_input)
# Unpersists graph from file
with tf.gfile.FastGFile("/root/tf_files/output_graph.pb", 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
softmax_tensor, = tf.import_graph_def(
graph_def,
input_map={"DecodeJpeg:0": image_decoded},
return_operations=["final_result:0"])
with tf.Session() as sess:
# Feed the image_data as input to the graph and get first prediction
predictions = sess.run(softmax_tensor, {image_string_input: image_data})
# ...
You should have a look at the tf.image package. It's got good functions to decode / encode JPEGs, GIFs and PNGs.
Following #mrry's suggestion to feed in parsed image, converted the image data into array and convert into RGB as stated below in the code. Now I am able to feed in JPG,PNG and GIF.
import tensorflow as tf
import sys
import numpy as np
from PIL import Image
# change this as you see fit
image_path = sys.argv[1]
# Read in the image_data
image_data = tf.gfile.FastGFile(image_path, 'rb').read()
image = Image.open(image_path)
image_array = image.convert('RGB')
# Loads label file, strips off carriage return
label_lines = [line.rstrip() for line
in tf.gfile.GFile("/root/tf_files/output_labels.txt")]
# Unpersists graph from file
with tf.gfile.FastGFile("/root/tf_files/output_graph.pb", 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
_ = tf.import_graph_def(graph_def, name='')
with tf.Session() as sess:
# Feed the image_data as input to the graph and get first prediction
softmax_tensor = sess.graph.get_tensor_by_name('final_result:0')
predictions = sess.run(softmax_tensor,{'DecodeJpeg:0': image_array})
# Sort to show labels of first prediction in order of confidence
top_k = predictions[0].argsort()[-len(predictions[0]):][::-1]
for node_id in top_k:
human_string = label_lines[node_id]
score = predictions[0][node_id]
print('%s (score = %.5f)' % (human_string, score))