wrong classification with imagenet

wrong classification with imagenet - python

I have used VGG16 and imagenet weights to predict an image. But unfortunately, the prediction was wrong.
I don't know where I went wrong.
Code:
from keras.preprocessing import image as image_util
from keras.applications.imagenet_utils import preprocess_input
from keras.applications.imagenet_utils import decode_predictions
from keras.applications import VGG16
import numpy as np
import argparse
import cv2
ap = argparse.ArgumentParser()
ap.add_argument("-i","--image",required= True,help ="path of the image")
args = vars(ap.parse_args())
# orig = cv2.imread(args["image"]) #Opencv function to load a image
image = image_util.load_img(args["image"],target_size=(224,224))
image = image_util.img_to_array(image)
#print("!!!!!.....!!!!")
print(image.shape)
image = np.expand_dims(image,axis=0) #(224,224,3) --> (1,224,224,3)
#print("!!!!!.....!!!!")
print(image.shape)
image = preprocess_input(image)
#Loading the model
model = VGG16(weights="imagenet")
pred = model.predict(image)
#print("111!!!!!.....!!!!")
#print(pred)
p = decode_predictions(pred)
#print("222!!!!!.....!!!!")
#print(p)
for (i,(imagenetID,label,prob)) in enumerate(p[0]):
print("{}. {}: {:.2f}%".format(i+1, label, prob*100))
orig = cv2.imread(args["image"]) #Opencv function to load a image
(imagenetID,label,prob) = p[0][0]
cv2.putText(orig, "Label:{},{:.2f}%".format(label,prob*100),(10,30),cv2.FONT_HERSHEY_SIMPLEX,0.8,(0,255,0),2)
cv2.imshow("classification",orig)
cv2.waitKey(0)
Output:
The output must be an apple. But I got Granny smith

Related

I'm trying to extract the features of pretrained resnet50 using dataloader, but it doesn't work

import os
import numpy as np
from google.colab import drive
import csv
import json
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
from torch.autograd import Variable
import torchvision
from torchvision import models
import torchvision.transforms as transforms
import pandas as pd
import cv2
import shutil
import time
from tqdm import tqdm
from PIL import Image
def getVector(img):
model = models.resnet50(pretrained = True)
model.eval()
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = Image.fromarray(img)
preprocess = transforms.Compose([
transforms.Resize((224,224)),
transforms.ToTensor(),
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])
input_tensor = preprocess(img)
input_tensor = input_tensor.unsqueeze(0)
with torch.no_grad():
my_output = None
def my_hook(m,i,o):
nonlocal my_output
my_output = o
a_hook = model.avgpool.register_forward_hook(my_hook)
model(input_tensor)
a_hook.remove()
return my_output
def makeFeature(filePath):
fpsCount = 0
if os.path.isfile(filePath):
getVideo = cv2.VideoCapture(filePath)
else: print("no file || file name: " + str(filePath))
fps = int(getVideo.get(cv2.CAP_PROP_FPS))
#fps = int(fps/2)
vidInput = []
print(fps)
print(getVideo.isOpened())
while(getVideo.isOpened()):
ret, getImage = getVideo.read()
if ret == False:
break
if(int(getVideo.get(1)) % fps == 0):
#print("fps:" + str(getVideo.get(1)))
#print("get image: ")
feature = getVector(getImage)
vidInput.append(feature)
fpsCount = fpsCount + 1
if fpsCount == 10:
break
break
#name = filePath.replace("mp4","npy")
#np.save(name, vidInput)
#print("working")
getVideo.release()
return vidInput
class CustomDataset(Dataset): #Dataset 상속
def __init__(self):
#self.train_list = glob.glob('*.mp4')
self.train_list = '/content/drive/MyDrive/abseiling_0.mp4'
def __len__(self):
return len(self.train_list)
def __getitem__(self, idx):
return makeFeature(self.train_list)
dataset = CustomDataset()
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)
for i in dataloader:
print(len(i))
np.save('test_save',i)
The code reads the video and reads one frame per second, extracts only the feature value from the last layer in the pretrained resnet, and returns it.
and appends them, and then tries to np.save.
but When I print len in the last line, not only this value is printed several times, but all lens are printed as 0.
please tell me how to solve it 😥

In Image processing after applying the preprocessing technique, why do we need to de-process the images for the further computation of the image

Actually, I was working on a guided project on (neural style transfer) in which I came across the concept of preprocessing and then deprocessing of the image. It will be great if anyone can help me in understanding this concept.
from torchvision import transforms as T
def preprocess(img_path,max_size = 500):
image = Image.open(img_path).convert('RGB')
if max(image.size) > max_size:
size = max_size
else:
size = max(image.size)
img_transform = T.Compose([
T.Resize(size),
T.ToTensor(),
T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])
image = img_transform(image)
image = image.unsqueeze(0)
return image
....
# Deprocess image
import numpy as np
import matplotlib.pyplot as plt
def deprocess(tensor):
image = tensor.to('cpu').clone()
image = image.numpy()
image = image.squeeze(0) #(1,3,224,224) -> (3,224,224)
image = image.transpose(1,2,0) #(3,224,224) -> (224,224,3)
image = image*np.array([0.229, 0.224,0.225])+np.array([0.485,0.456,0.406])
image = image.clip(0,1)
return image
content_d = deprocess(content_p)
style_d = deprocess(style_p)
print("Deprecess content :",content_d.shape)
print("Deprocess style :",style_d.shape)
fig, (ax1,ax2) = plt.subplots(1,2,figsize = (20,10))
ax1.imshow(content_d)
ax2.imshow(style_d)
Code Image for preprocessing
Code Image for deprocessing
I would like to know that can we perform further computation of image by just preprocessed image without actually deprocessing it ?

How to crop the image with the boundling rectangle after detecting it with a Yolo net

I have created a model to recognize objects in an image, and it works fine for me, I have the code that detects the object according to the weights already trained and so on, but I would need to create a new image only with what I have detected, for example, if I have one image of a cat in a park, I want to create a new image only with the cat that I have detected, how could I do that? I give you my current code, in which I detect the object:
from __future__ import division
from models import *
from utils.utils import *
from utils.datasets import *
import os
import sys
import time
import datetime
import argparse
from PIL import Image
import torch
from torch.utils.data import DataLoader
from torchvision import datasets
from torch.autograd import Variable
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.ticker import NullLocator
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--image_folder", type=str, default="data/samples", help="path to dataset")
parser.add_argument("--model_def", type=str, default="config/yolov3.cfg", help="path to model definition file")
parser.add_argument("--weights_path", type=str, default="weights/yolov3.weights", help="path to weights file")
parser.add_argument("--class_path", type=str, default="data/coco.names", help="path to class label file")
parser.add_argument("--conf_thres", type=float, default=0.8, help="object confidence threshold")
parser.add_argument("--nms_thres", type=float, default=0.5, help="iou thresshold for non-maximum suppression")
parser.add_argument("--batch_size", type=int, default=1, help="size of the batches")
parser.add_argument("--n_cpu", type=int, default=0, help="number of cpu threads to use during batch generation")
parser.add_argument("--img_size", type=int, default=416, help="size of each image dimension")
parser.add_argument("--checkpoint_model", type=str, help="path to checkpoint model")
opt = parser.parse_args()
print(opt)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
os.makedirs("output", exist_ok=True)
# Set up model
model = Darknet(opt.model_def, img_size=opt.img_size).to(device)
if opt.weights_path.endswith(".weights"):
# Load darknet weights
model.load_darknet_weights(opt.weights_path)
else:
# Load checkpoint weights
model.load_state_dict(torch.load(opt.weights_path))
model.eval() # Set in evaluation mode
dataloader = DataLoader(
ImageFolder(opt.image_folder, img_size=opt.img_size),
batch_size=opt.batch_size,
shuffle=False,
num_workers=opt.n_cpu,
)
classes = load_classes(opt.class_path) # Extracts class labels from file
Tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor
imgs = [] # Stores image paths
img_detections = [] # Stores detections for each image index
print("\nPerforming object detection:")
prev_time = time.time()
for batch_i, (img_paths, input_imgs) in enumerate(dataloader):
# Configure input
input_imgs = Variable(input_imgs.type(Tensor))
# Get detections
with torch.no_grad():
detections = model(input_imgs)
detections = non_max_suppression(detections, opt.conf_thres, opt.nms_thres)
# Log progress
current_time = time.time()
inference_time = datetime.timedelta(seconds=current_time - prev_time)
prev_time = current_time
print("\t+ Batch %d, Inference Time: %s" % (batch_i, inference_time))
# Save image and detections
imgs.extend(img_paths)
img_detections.extend(detections)
# Bounding-box colors
cmap = plt.get_cmap("tab20b")
colors = [cmap(i) for i in np.linspace(0, 1, 20)]
print("\nSaving images:")
# Iterate through images and save plot of detections
for img_i, (path, detections) in enumerate(zip(imgs, img_detections)):
print("(%d) Image: '%s'" % (img_i, path))
# Create plot
img = np.array(Image.open(path))
plt.figure()
fig, ax = plt.subplots(1)
ax.imshow(img)
# Draw bounding boxes and labels of detections
if detections is not None:
# Rescale boxes to original image
detections = rescale_boxes(detections, opt.img_size, img.shape[:2])
unique_labels = detections[:, -1].cpu().unique()
n_cls_preds = len(unique_labels)
bbox_colors = random.sample(colors, n_cls_preds)
for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections:
print("\t+ Label: %s, Conf: %.5f" % (classes[int(cls_pred)], cls_conf.item()))
box_w = x2 - x1
box_h = y2 - y1
color = bbox_colors[int(np.where(unique_labels == int(cls_pred))[0])]
# Create a Rectangle patch
bbox = patches.Rectangle((x1, y1), box_w, box_h, linewidth=2, edgecolor=color, facecolor="none")
# Add the bbox to the plot
ax.add_patch(bbox)
# Add label
plt.text(
x1,
y1,
s=classes[int(cls_pred)],
color="white",
verticalalignment="top",
bbox={"color": color, "pad": 0},
)
# Save generated image with detections
plt.axis("off")
plt.gca().xaxis.set_major_locator(NullLocator())
plt.gca().yaxis.set_major_locator(NullLocator())
filename = path.split("/")[-1].split(".")[0]
plt.savefig(f"output/{filename}.png", bbox_inches="tight", pad_inches=0.0)
plt.close()
How could I add to this code a fragment to cut the object that I have detected? Thank you very much, in case it was necessary for the solution, I am using pytho3 and tensorflow 2, again, thank you very much

You can crop the image using this slicing notation:
crop = img[y1:y1+box_height, x1:x1+box_width, :];
So long as the numpy array is an numpy.uint8 it should be straigtforward to save:
pimg = Image.fromarray(img);
pimg.save("test.png");

solved with:
img = cv2.imread('./202.jpg')
crop_img = img[y:y+h, x:x+w]
#cv2.imshow('img', crop_img)
#cv2.waitKey(0)
#cv2.destroyAllWindows()
cv2.imwrite('./pruebas/resultado.png', crop_img)

Is there a way to pass 8-channel image to keras for CNN?

The problem: I am unable to process CNN model for training 8-channel .TIF images.
Expected Output: Map training data (train_ds) via gdal and train model.
data (images):
n = 600
shape = (256, 256, 8)
data structure:
project_photos/
....classes/
......barren/
......agriculture/
......wooded/
import numpy as np
import os
import PIL
import PIL.Image
import tensorflow as tf
import tensorflow_datasets as tfds
import pathlib
>print (tf.__version__)
2.1.0
data_dir = ".\projects\keras\projectA\project_photos\classes")
data_dir = pathlib.Path(data_dir)
image_count = len(list(data_dir.glob('*/*.tif')))
>print(image_count)
600
list_ds = tf.data.Dataset.list_files(str(data_dir/'*/*'), shuffle=False)
list_ds = list_ds.shuffle(image_count, reshuffle_each_iteration=False)
batch_size = 32
img_height = 256
img_width = 256
>for f in list_ds.take(5):
> print(f.numpy())
b'/home/projects/keras/projectA/project_photos/classes/barren/12345_b0001.tif'
b'/home/projects/keras/projectA/project_photos/classes/wooded//12345_w0001.tif'
b'/home/projects/keras/projectA/project_photos/classes/barren/12345_b0002.tif'
b'/home/projects/keras/projectA/project_photos/classes/agriculture//12345_a0001.tif'
b'/home/projects/keras/projectA/project_photos/classes/wooded/12345_w0002.tif'
# tree structure
>class_names = np.array(sorted([item.name for item in data_dir.glob('*')]))
print(class_names)
['barren' 'agriculture' 'wooded']
# train/validation split
val_size = int(image_count * 0.2)
train_ds = list_ds.skip(val_size)
val_ds = list_ds.take(val_size)
def get_label(file_path):
# convert the path to a list of path components
parts = tf.strings.split(file_path, os.path.sep)
# The second to last is the class-directory
one_hot = parts[-2] == class_names
# Integer encode the label
return tf.argmax(one_hot)
def decode_img(img):
# convert the compressed string to a 3D uint8 tensor
img = tf.image.decode_jpeg(img, channels=3)
# resize the image to the desired size
return tf.image.resize(img, [img_height, img_width])
def process_path(file_path):
label = get_label(file_path)
# load the raw data from the file as a string
img = tf.io.read_file(file_path)
img = decode_img(img)
return img, label
# Set `num_parallel_calls` so multiple images are loaded/processed in parallel.
train_ds = train_ds.map(process_path, num_parallel_calls=AUTOTUNE)
val_ds = val_ds.map(process_path, num_parallel_calls=AUTOTUNE)
I understand that tensorflow has limited support (experimental) for decode_tiff, and even if that did work - I am unable to use the latest version of TF that has that update.
This leaves me with attempting workarounds, the following - which have not succeeded:
"""
Updating decode_img(img) in attempt to process 8-channel .TIF raster
"""
#attempt, adding gdal_Open variable to decode_img
## fails due to image path (train_ds) being stored as byte.
x = gdal.Open(file_path)
Error: Not a string.
#attempt, modifying to extract PATH as str().
def process_path(file_path):
label = get_label(file_path)
# load the raw data from the file as a string
img = ''
for fpath in file_path:
img = fpath.numy()
img = decode_img(img)
return img, label
>train_ds = train_ds.map(process_path, num_parallel_calls=AUTOTUNE)
ValueError: len requires a non-scalar tensor, got one of shape Tensor("Shape:0", shape=(0,), dtype=int32)
#attempt, processing outside of `.map`, works just fine.
imgList = []
for elem in train_ds:
img = elem.numpy()
img = img.decode()
imgList.append(img)
file_path = imgList[0]
raster = gdal.Open(file_path)
bands = [raster.GetRasterBand(k + 1).ReadAsArray() for k in range (raster.RasterCount)]
n_bands = len(bands)
img_array = np.stack(bands,2)
img = tf.convert_to_tensor(img_array, dtype = tf.float32)
img = tf.image.resize(img, [img_height, img_width])
print(type(img))
print(img.numpy().shape)
<class: 'tensorflow.python.framework.ops.EagerTensor'>
(256, 256, 8)
So, any ideas on how I can get this to work within the TF framework - getting TF to process the raster via .map?

Call `model.predict()` from an externally attached function

Using this as reference, I came up with the code below:
import tensorflow as tf
from tensorflow.keras.applications.densenet import DenseNet121
from tensorflow.keras.applications.densenet import preprocess_input as densenet_preprocess_input
import inspect, cv2
import numpy as np
#tf.function(input_signature=[tf.TensorSpec([None, None, 3],dtype=tf.uint8)])
def _preprocess(image_array):
im_arr = tf.image.resize(image_array, (resize_height, resize_width))
im_arr = densenet_preprocess_input(im_arr)
input_batch = tf.expand_dims(im_arr, axis=0)
return input_batch
training_model = DenseNet121(include_top=True, weights='imagenet')
#Assign resize dimensions
resize_height = tf.constant(480, dtype=tf.int64)
resize_width = tf.constant(640, dtype=tf.int64)
#Attach function to Model
training_model.preprocess = _preprocess
#Attach resize dimensions to Model
training_model.resize_height = resize_height
training_model.resize_width = resize_width
training_model.save("saved_model", overwrite=True)
which basically attaches an method called preprocess, to tf.keras.Model defined for DenseNet121.
So that later I can use it as follows to make a prediction:
pred_model = tf.keras.models.load_model('saved_model')
#download image
image_path = tf.keras.utils.get_file("cat.jpg", "https://storage.googleapis.com/download.tensorflow.org/example_images/320px-Felis_catus-cat_on_snow.jpg")
#load and convert the image to tf.uint8 numpy array
image_array = np.array(tf.keras.preprocessing.image.load_img(path=image_path))
#call the custom function bound to the model
preprocessed_image = pred_model.preprocess(image_array)
result = pred_model.predict(preprocessed_image)
print(np.argmax(result, axis=-1), np.amax(result, axis=-1))
My Question:
How can I call the model's predict method from preprocess function. So that
preprocessed_image = pred_model.preprocess(image_array)
result = pred_model.predict(preprocessed_image)
can become
result = pred_model.preprocess_predict(image_array)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

wrong classification with imagenet - python

Related

I'm trying to extract the features of pretrained resnet50 using dataloader, but it doesn't work

In Image processing after applying the preprocessing technique, why do we need to de-process the images for the further computation of the image

How to crop the image with the boundling rectangle after detecting it with a Yolo net

Is there a way to pass 8-channel image to keras for CNN?

Call `model.predict()` from an externally attached function

Categories

Resources