I am trying to make a website that can make predictions on images using tensorflow, flask, and python.
This is my code:
from flask import Flask, render_template
import os
import numpy as np
import pandas as pd
app = Flask(__name__)
#app.route('/')
def index():
return render_template('index.html')
import tensorflow as tf
import tensorflow_hub as hub
model = tf.keras.models.load_model(MODEL_PATH)
IMG_SIZE = 224
BATCH_SIZE = 32
custom_path = "http://t1.gstatic.com/licensed-image?q=tbn:ANd9GcQd6lM4HtInRF3cxw6h3MgUZIIiJCdMgFvXKrhaJrbw61tN3aYpMIVBi0dx0KPv1sdCrLk0sBhPeNVt8m0"
custom_data = create_data_batches(custom_path, test_data=True)
custom_preds = model.predict(custom_data)
# Get custom image prediction labels
custom_pred_labels = [get_pred_label(custom_preds[i]) for i in range(len(custom_preds))]
print(custom_pred_labels)
#app.route('/my-link/')
def my_link():
return f"The predictions are: {custom_pred_labels}"
if __name__ == '__main__':
app.run(host="localhost", port=3000, debug=True)
The process_image function:
def process_image(image_path, img_size=IMG_SIZE):
"""
Takes an image file path and turns the image into a Tensor.
"""
image = tf.io.read_file(image_path)
image = tf.image.decode_jpeg(image, channels=3)
image = tf.image.convert_image_dtype(image, tf.float32)
image = tf.image.resize(image, size=[img_size, img_size])
return image
The needed part of the create_data_batches function:
def create_data_batches(X, y=None, batch_size=BATCH_SIZE, valid_data=False, test_data=False):
"""
Creates batches out of data out of image (X) and label (y) pairs.
Shuffles the data if it's training data but doesn't shuffle if it's validation data.
Also accepts test data as input (no labels)
"""
if test_data:
print("Creating test data batches...")
data = tf.data.Dataset.from_tensor_slices((tf.constant(X))) # only filepaths (no labels)
data_batch = data.map(process_image).batch(BATCH_SIZE)
return data_batch
The get_image_label function:
def get_image_label(image_path, label):
"""
Takes an image file path name and the associated label, processes the image and returns a tuple of (image, label).
"""
image = process_image(image_path)
return image, label
The get_pred_label function:
def get_pred_label(prediction_probabilites):
"""
Turns an array of prediction probabilities into a label.
"""
return unique_breeds[np.argmax(prediction_probabilites)]
Now when I run this, I get the following error:
ValueError: Unbatching a tensor is only supported for rank >= 1
I tried turning it into a list as one of the solutions I found said:
custom_path = ["http://t1.gstatic.com/licensed-image?q=tbn:ANd9GcQd6lM4HtInRF3cxw6h3MgUZIIiJCdMgFvXKrhaJrbw61tN3aYpMIVBi0dx0KPv1sdCrLk0sBhPeNVt8m0"]
But when I run that, I get this error:
UNIMPLEMENTED: File system scheme 'http' not implemented (file: 'http://t1.gstatic.com/licensed-image?q=tbn:ANd9GcQd6lM4HtInRF3cxw6h3MgUZIIiJCdMgFvXKrhaJrbw61tN3aYpMIVBi0dx0KPv1sdCrLk0sBhPeNVt8m0')
Any help would be appreciated.
Related
So I am following a tutorial for making a dataloader for images (https://github.com/codebasics/deep-learning-keras-tf-tutorial/blob/master/44_tf_data_pipeline/tf_data_pipeline.ipynb).
The full code is something like this:
images_ds = tf.data.Dataset.list_files("path/class/*")
def get_label(file_path):
import os
parts = tf.strings.split(file_path, os.path.sep)
return parts[-2]
## How the tutorial does it
def process_image(file_path):
label = get_label(file_path)
img = tf.io.read_file(file_path)
img = tf.image.decode_jpeg(img)
return img, label
## How I want to do it
def process_image(file_path):
label = get_label(file_path)
img = np.load(file_path)
img = tf.convert_to_tensor(img)
return img, label
train_ds = images_ds.map(process_image)
In the tutorial, the data is a .jpeg. However, my data is a .npy.
Therefore, loading the data with the following code does not work:
img = tf.io.read_file(file_path)
img = tf.image.decode_jpeg(img)
I want to work around this problem, but my solution does not work.
img = np.load(file_path)
img = tf.convert_to_tensor(img)
It does work when I feed the process_image function 1 instance. However, when I use the .map function, I get an error.
Error:
TypeError: expected str, bytes or os.PathLike object, not Tensor
Is there an equivalent function to tf.image.decode_image() for decoding a numpy array and/or can someone help me with my current error?
The comment of #André put me in the right direction. The code below works.
def process_image(file_path):
label = get_label(file_path)
label = np.uint8(label)
img = np.load(file_path)
img = tf.convert_to_tensor(img/255, dtype=tf.float32)
return img , label
train_ds = images_ds.map(lambda item: tf.numpy_function(
process_image, [item], (tf.float32, tf.uint8)))
I am a beginner in PyTorch. I want to train a network using NYU dataset, but I am getting an error.
The error happens while I use the Dataloader to load my local dataset, and I want to print the data to demonstrate the code is right:
test=Mydataset(data_root,transforms,'image_train')
test2=DataLoader(test,batch_size=4,num_workers=0,shuffle=False)
for idx,data in enumerate(test2):
print(idx)
Here's the rest of the code with the Mydataset definition:
from __future__ import division,absolute_import,print_function
from PIL import Image
from torch.utils.data import DataLoader,Dataset
from torchvision.transforms import transforms
data_root='D:/AuxiliaryDocuments/NYU/'
transforms=transforms.Compose([transforms.ToPILImage(),
transforms.Resize(224,101),
transforms.ToTensor()])
filename_txt={'image_train':'image_train.txt','image_test':'image_test.txt',
'depth_train':'depth_train.txt','depth_test':'depth_test.txt'}
class Mydataset(Dataset):
def __init__(self,data_root,transformation,data_type):
self.transform=transformation
self.image_path_txt=filename_txt[data_type]
self.sample_list=list()
f=open(data_root+'/'+data_type+'/'+self.image_path_txt)
lines=f.readlines()
for line in lines:
line=line.strip()
line=line.replace(';','')
self.sample_list.append(line)
f.close()
def __getitem__(self, index):
item=self.sample_list[index]
img=Image.open(item)
if self.transform is not None:
img=self.transform(img)
idx=index
return idx,img
def __len__(self):
return len(self.sample_list)
The error in the title is different from the one in the image (which you should have posted as text, by the way). Assuming the one from the image is correct, your problem is the following:
Your transforms begins with a transforms.ToPILImage(), but the image is already read as a PIL image by the dataloader. If you remove that transformation, the code should run just fine.
# [...]
transforms = transforms.Compose([
transforms.ToPILImage(), # <<< remove this
transforms.Resize(224, 101),
transforms.ToTensor()
])
# [...]
class Mydataset(Dataset):
# [...]
def __getitem__(self, index):
item = self.sample_list[index]
img = Image.open(item) # <<< this image is already a PIL image
if self.transform is not None:
img = self.transform(img)
idx = index
return idx, img
# [...]
I've deployed a custom Pytorch model to the Google AI platform for prediction, but when I try to make a prediction request with image data using gcloud tools I get the following error in response:
{
"error": "Prediction failed: unknown error."
}
I've tried to encode my image data in b64 format or to place it into a multidimensional python array, by doing the following:
pil_im = Image.open('Pic512.png')
pil_im = pil_im.resize((224,224)).convert('RGB')
im_arr = np.asarray(pil_im)
py_arr = im_arr.tolist()
json_instance_1 = {'instances': py_arr}
with open('json_instance_1.json', 'w') as f:
json.dump(json_instance_1, f)
I converted it into b64 like so, after adjusting my Predictor code accordingly:
with open('Pic512.png', 'rb') as f:
byte_im = f.read()
json_instance = {'instances': {'b64': base64.b64encode(byte_im).decode()}}
with open('json_instance.json', 'w') as f:
json.dump(json_instance, f)
I've tried converting with different file formats and similar methods, but all of them give me the same error.
My predictor module:
from facenet_pytorch import MTCNN, InceptionResnetV1, extract_face
import torch
from torchvision import transforms
from torch.nn import functional as F
from PIL import Image
# from sklearn.externals import joblib
import numpy as np
import os
import io
import base64
class MyPredictor(object):
"""An example Predictor for an AI Platform custom prediction routine."""
def __init__(self, model, preprocessor, device):
"""Stores artifacts for prediction. Only initialized via `from_path`.
"""
self._resnet = model
self._mtcnn_mult = preprocessor
self._device = device
self.get_std_tensor = transforms.Compose([
np.float32,
np.uint8,
transforms.ToTensor(),
])
self.tensor2pil = transforms.ToPILImage(mode='RGB')
self.trans_resnet = transforms.Compose([
transforms.Resize((100, 100)),
np.float32,
transforms.ToTensor()
])
def predict(self, instances, **kwargs):
pil_transform = transforms.Resize((512, 512))
imarr = np.uint8(np.array(instances))
# img_bytes_string = io.BytesIO(base64.b64decode(instances))
pil_im = Image.fromarray(imarr)
# pil_im = Image.open(img_bytes_string)
image = pil_im.convert('RGB')
pil_im_512 = pil_transform(image)
boxes, _ = self._mtcnn_mult.detect(pil_im_512)
box = boxes[0]
face_tensor = extract_face(pil_im_512, box, margin=40)
std_tensor = self.get_std_tensor(face_tensor.permute(1, 2, 0))
cropped_pil_im = self.tensor2pil(std_tensor)
face_tensor = self.trans_resnet(cropped_pil_im)
face_tensor4d = face_tensor.unsqueeze(0)
face_tensor4d = face_tensor4d.to(self._device)
self._resnet.eval()
prediction = self._resnet(face_tensor4d)
preds = F.softmax(prediction, dim=1).detach().numpy().reshape(-1)
print('probability of (class1, class2) = ({:.4f}, {:.4f})'.format(preds[0], preds[1]))
return {'probs':preds.tolist()}
#classmethod
def from_path(cls, model_dir):
device_path = os.path.join(model_dir, 'device_cpu.pt')
device = torch.load(device_path)
model_path = os.path.join(model_dir, 'FullResNetRefinedExtra_no_norm_100x100_8634.pt')
classifier = torch.load(model_path, map_location=device)
mtcnn_path = os.path.join(model_dir, 'mtcnn_mult.pt')
mtcnn_mult = torch.load(mtcnn_path)
return cls(classifier, mtcnn_mult, device)
When I test the class locally everything works, so I assume it's a problem related the serialisation and deserialisation on the side of Google Platform. How can I resolve this issue?
I'm working with a piece of code written by someone else for domain generalization, and as part of it, I have a dataloader set up for loading my training, validation, and test data for one of my datasets. The code works fine when I load in the train or test data but when I try and load in the val data, I get Value Error: could not broadcast input array from shape (320,371) into shape (320) in the load_samples function at the images=np.asarray(images) line. I understand what this error is saying but I can't for the life of me figure out why it's saying it. The code for the val section is identical to the ones for the train and test sections and the csv file I'm reading from is the exact same format as the other two csv files. I'm also calling the get_chexpert function for each of them the exact same way. Additionally, the dataloader for my other dataset has nearly identical code to this one and can create the validation set just fine. I tried testing if it was the csv file by replacing the val csv with the test csv but I still get the same error. Can anyone point out to me what I'm doing wrong? I feel like it must be some stupidly obvious mistake but I just can't see it.
import os
import csv
from PIL import Image
import numpy as np
import torch
import torch.utils.data as data
from torchvision import datasets, transforms
import params
class Chexpert(data.Dataset):
def __init__(self, root, train=True, val=False, transform=None):
"""Init chexpert dataset."""
# init params
self.root = os.path.expanduser(root)
self.train = train
self.val = val
self.transform = transform
self.dataset_size = None
self.train_data, self.train_labels = self.load_samples()
if self.train:
total_num_samples = self.train_labels.shape[0]
indices = np.arange(total_num_samples)
np.random.shuffle(indices)
self.train_data = self.train_data[indices[0:self.dataset_size]]
self.train_labels = self.train_labels[indices[0:self.dataset_size]]
def __getitem__(self, index):
"""Get images and target for data loader.
Args:
index (int): Index
Returns:
tuple: (image, target) where target is index of the target class.
"""
img, label = self.train_data[index], self.train_labels[index]
if self.transform is not None:
img = self.transform(img)
label = torch.LongTensor([np.int64(label).item()])
return img, label
def __len__(self):
"""Return size of dataset."""
return self.dataset_size
def load_samples(self):
"""Load sample images from dataset."""
# some arbitrary limits so I'm not loading 100,000 images while debugging
numtr = 50
numts = 20
numvl = 10
data_root = os.path.join(self.root, 'CheXpert-v1.0-small')
images = []
labels = []
if self.val:
val_info = csv.reader(open(os.path.join(data_root, 'effusion-val-split.csv'), 'r'))
for count, row in enumerate(val_info):
if count == numvl:
break
image = np.array(Image.open(os.path.join(self.root, row[0])))
images.append(image)
labels.append(row[1])
elif self.train:
train_info = csv.reader(open(os.path.join(data_root, 'effusion-train-split.csv'), 'r'))
for count, row in enumerate(train_info):
if count == numtr:
break
image = np.array(Image.open(os.path.join(self.root, row[0])))
images.append(image)
labels.append(row[1])
elif not self.val and not self.train:
test_info = csv.reader(open(os.path.join(data_root, 'effusion-test-split.csv'), 'r'))
for count, row in enumerate(test_info):
if count == numts:
break
image = np.array(Image.open(os.path.join(self.root, row[0])))
images.append(image)
labels.append(row[1])
images = np.asarray(images)
labels = np.asarray(labels)
self.dataset_size = labels.shape[0]
return images, labels
def get_chexpert(train, val):
"""Get chexpert dataset loader."""
# image pre-processing
pre_process = transforms.Compose([transforms.ToPILImage(),
transforms.Resize((224, 224)),
transforms.ToTensor(),
#transforms.Normalize(
#mean=params.dataset_mean,
#std=params.dataset_std)])
])
# dataset and data loader
chexpert_dataset = Chexpert(root=params.data_root,
train=train,
val=val,
transform=pre_process)
chexpert_data_loader = torch.utils.data.DataLoader(
dataset=chexpert_dataset,
batch_size=params.batch_size,
shuffle=True)
return chexpert_data_loader
if __name__ == '__main__':
# load dataset
print("Loading Source Train Data")
src_data_loader = get_chexpert()
print("Loading Source Validation Data")
src_data_loader_val = get_chexpert(train=False, val=True)
print("Loading Source Test Data")
src_data_loader_eval = get_chexpert(train=False)
print("Loading Target Train Data")
tgt_data_loader = get_nih()
print("Loading Target Validation Data")
tgt_data_loader_val = get_nih(train=False, val=True)
print("Loading Target Test Data")
tgt_data_loader_eval = get_nih(train=False)
I have deployed my object detection model to Google Kubernetes Engine. My model is trained using faster_rcnn_resnet101_pets configuration. The inference time of my model is very high (~10 seconds total time for prediction and ) even though I am using a Nvidia Tesla K80 GPU in my cluster node. I am using gRPC for getting predicitons from the model. The script for making prediciton requests is :
import argparse
import os
import time
import sys
import tensorflow as tf
from PIL import Image
import numpy as np
from grpc.beta import implementations
sys.path.append("..")
from object_detection.core.standard_fields import \
DetectionResultFields as dt_fields
from object_detection.utils import label_map_util
from argparse import RawTextHelpFormatter
from tensorflow_serving.apis import predict_pb2
from tensorflow_serving.apis import prediction_service_pb2_grpc
tf.logging.set_verbosity(tf.logging.INFO)
WIDTH = 1024
HEIGHT = 768
def load_image_into_numpy_array(input_image):
image = Image.open(input_image)
image = image.resize((WIDTH, HEIGHT), Image.ANTIALIAS)
(im_width, im_height) = image.size
image_arr = np.array(image.getdata()).reshape(
(im_height, im_width, 3)).astype(np.uint8)
image.close()
return image_arr
def load_input_tensor(input_image):
image_np = load_image_into_numpy_array(input_image)
image_np_expanded = np.expand_dims(image_np, axis=0).astype(np.uint8)
tensor = tf.contrib.util.make_tensor_proto(image_np_expanded)
return tensor
def main(args):
start_main = time.time()
host, port = args.server.split(':')
channel = implementations.insecure_channel(host, int(port))._channel
stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
request = predict_pb2.PredictRequest()
request.model_spec.name = args.model_name
input_tensor = load_input_tensor(args.input_image)
request.inputs['inputs'].CopyFrom(input_tensor)
start = time.time()
result = stub.Predict(request, 60.0)
end = time.time()
output_dict = {}
output_dict[dt_fields.detection_classes] = np.squeeze(
result.outputs[dt_fields.detection_classes].float_val).astype(np.uint8)
output_dict[dt_fields.detection_boxes] = np.reshape(
result.outputs[dt_fields.detection_boxes].float_val, (-1, 4))
output_dict[dt_fields.detection_scores] = np.squeeze(
result.outputs[dt_fields.detection_scores].float_val)
category_index = label_map_util.create_category_index_from_labelmap(args.label_map,
use_display_name=True)
classes = output_dict[dt_fields.detection_classes]
scores = output_dict[dt_fields.detection_scores]
classes.shape = (1, 300)
scores.shape = (1, 300)
print("prediction time : " + str(end-start))
objects = []
threshold = 0.5 # in order to get higher percentages you need to lower this number; usually at 0.01 you get 100% predicted objects
for index, value in enumerate(classes[0]):
object_dict = {}
if scores[0, index] > threshold:
object_dict[(category_index.get(value)).get('name').encode('utf8')] = \
scores[0, index]
objects.append(object_dict)
print(objects)
end_main = time.time()
print("Overall Time : " + str(end_main-start_main))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="Object detection grpc client.",
formatter_class=RawTextHelpFormatter)
parser.add_argument('--server',
type=str,
default='localhost:9000',
help='PredictionService host:port')
parser.add_argument('--model_name',
type=str,
default="my-model",
help='Name of the model')
parser.add_argument('--input_image',
type=str,
default='./test_images/123.jpg',
help='Path to input image')
parser.add_argument('--output_directory',
type=str,
default='./',
help='Path to output directory')
parser.add_argument('--label_map',
type=str,
default="./data/object_detection.pbtxt",
help='Path to label map file')
args = parser.parse_args()
main(args)
I have used kubectl port forwarding for testing purposes so the request port is set to localhost:9000.
The output is :
prediction time : 6.690936326980591
[{b'goi_logo': 0.9999970197677612}]
Overall Time : 10.25893259048462
What can I do to make my inference faster? I have seen that the inference time is in the order of milliseconds so in comparison 10 seconds is a very long duration and unfit for production environments. I understand that port forwarding is slow. What is another method that I can use? I need to make this client available to the world as an API endpoint.
As previous answers stated, you should indeed try to do multiple requests because tf-serving needs some overhead the first time(s). You can prevent this by using a warm-up script.
To add some extra options:
from tf-serving v1.8 you can also use a http rest API service. Then you can call the service that you have created on your GKE from a google compute engine to reduce the connection lag. In my case it had a big speed-up because my local connection was mediocre at best. Next to http rest api being more workable to debug, you can also send much bigger requests. The grpc limit seems to be 1.5 mb while the http one is a lot higher.
Are you sending b64 encoded images? Sending the images themselves is a lot slower than sending b64 encoded strings. The way I handled this is sending b64 encoded strings from the images and create some extra layers in front of my network that transform the string to jpeg images again and then process them through the model. Some code to help you on your way:
from keras.applications.inception_v3 import InceptionV3, preprocess_input
from keras.models import Model
import numpy as np
import cv2
import tensorflow as tf
from keras.layers import Input, Lambda
from keras import backend as K
base_model = InceptionV3(
weights='imagenet',
include_top=True)
model = Model(
inputs=base_model.input,
outputs=base_model.get_layer('avg_pool').output)
def prepare_image(image_str_tensor):
#image = tf.squeeze(tf.cast(image_str_tensor, tf.string), axis=[0])
image_str_tensor = tf.cast(image_str_tensor, tf.string)
image = tf.image.decode_jpeg(image_str_tensor,
channels=3)
#image = tf.divide(image, 255)
#image = tf.expand_dims(image, 0)
image = tf.image.convert_image_dtype(image, tf.float32)
return image
def prepare_image_batch(image_str_tensor):
return tf.map_fn(prepare_image, image_str_tensor, dtype=tf.float32)
# IF BYTE STR
model.layers.pop(0)
print(model.layers[0])
input_img = Input(dtype= tf.string,
name ='string_input',
shape = ()
)
outputs = Lambda(prepare_image_batch)(input_img)
outputs = model(outputs)
inception_model = Model(input_img, outputs)
inception_model.compile(optimizer = "sgd", loss='categorical_crossentropy')
weights = inception_model.get_weights()
Next to that, I would say use a bigger gpu. I have basic yolo (keras implementation) now running on a P100 with about 0.4s latency when called from a compute engine. We noticed that the darknet implementation (in c++) is a lot faster than the keras implementation tho.