I am using the following code to generate the GradCam maps for the input images. I am not able to resize the subplot to visualise them properly as soon in the figure below.
Code:
import cv2
def NormalizeData(data):
return (data - np.min(data)) / (np.max(data) - np.min(data))
for i in range(5):
img = train_loader.dataset[i][0].to(device)
mdlOut = model(img)
#img = torch.tensor(img, requires_grad=True)
img=img.clone().detach().requires_grad_(True) #newline
out = model(img)
idx = out.argmax()
yc = out[0,idx]
yc.backward()
rawGrad = img.grad.cpu().detach().numpy()
grad = NormalizeData(rawGrad)
avgGrad = np.mean(np.abs(grad),axis=(1,2))
img = NormalizeData(img.cpu().detach().numpy())
CAM = NormalizeData(avgGrad[0]*img[0,:,:] \+ avgGrad[1]*img[1,:,:] \+ avgGrad[2]*img[2,:,:])
plt.subplot(20,2,2*i+1)
plt.imshow(CAM,cmap='jet')
plt.subplot(20,2,2*i+2)
plt.imshow(np.einsum('kli->lik',img))
Related
First of all i already load my model to predict inference set that i already prepared, but i got error when to try predict and show the result.
so here my code
def load_img(filename):
img = read_file(filename) # Load Data
img = decode_image(img, channels=3) # convert to RGB
img = resize(img, size=[img_height, img_height])
img = np.array(img)[:,:,1] # Resize image
img = img/255. # Rescale Images
return img
inf1 = load_img(r'ML2\COVID-19\inf_set\covid\covid - 1.jpeg')
inf2 = load_img(r'ML2\COVID-19\inf_set\covid\covid - 2.jpeg')
inf3 = load_img(r'ML2\COVID-19\inf_set\normal\Normal - 1.jpeg')
inf4 = load_img(r'ML2\COVID-19\inf_set\normal\Normal - 2.jpeg')
inf5 = load_img(r'ML2\COVID-19\inf_set\pneumonia\Pneumonia - 1.jpeg')
inf6 = load_img(r'ML2\COVID-19\inf_set\pneumonia\Pneumonia - 2.jpeg')
plt.figure(figsize=(35, 5))
plt.suptitle('Prediction Results', fontsize=15)
counter = 1
for i in [inf1,inf2, inf3, inf4, inf5,inf6]:
plt.subplot(1,6,counter)
res = int(tf.round(model.predict(x=expand_dims(i, axis=0))))
plt.imshow(i)
plt.title(f"Prediction: {label_data[res]}")
plt.axis('off')
counter += 1
plt.show()
And here the error notification
So, i need help to solve this proble, thank you before
You are squashing the image size in this line
img = np.array(img)[:,:,1]
This is why your image becomes size (220,220) instead of (220,220,1), which when you do expand_dims will be of proper input shape (1,220,220,1)
You could change the load_img function, or you could solve this by doing
res = int(tf.round(model.predict(x=expand_dims(i, axis=[0,3]))))
I'm training my neural network using PyTorch framework. The data is full HD images (1920x1080). But in each iteration, I just need to crop out a random 256x256 patch from these images. My network is relatively small (5 conv layers), and hence the bottleneck is being caused by loading the data. I've provided my current code below. Is there any way to optimize loading the data and speed up the training?
Code:
from pathlib import Path
import numpy
import skimage.io
import torch.utils.data as data
import Imath
import OpenEXR
class Ours(data.Dataset):
"""
Loads patches of resolution 256x256. Patches are selected such that they contain atleast 1 unknown pixel
"""
def __init__(self, data_dirpath, split_name, patch_size):
super(Ours, self).__init__()
self.dataroot = Path(data_dirpath) / split_name
self.video_names = []
for video_path in sorted(self.dataroot.iterdir()):
for i in range(4):
for j in range(11):
view_num = i * 12 + j
self.video_names.append((video_path.stem, view_num))
self.patch_size = patch_size
return
def __getitem__(self, index):
video_name, view_num = self.video_names[index]
patch_start_pt = (numpy.random.randint(1080), numpy.random.randint(1920))
frame1_path = self.dataroot / video_name / f'render/rgb/{view_num + 1:04}.png'
frame2_path = self.dataroot / video_name / f'render/rgb/{view_num + 2:04}.png'
depth_path = self.dataroot / video_name / f'render/depth/{view_num + 1:04}.exr'
mask_path = self.dataroot / video_name / f'render/masks/{view_num + 1:04}.png'
frame1 = self.get_image(frame1_path, patch_start_pt)
frame2 = self.get_image(frame2_path, patch_start_pt)
mask = self.get_mask(mask_path, patch_start_pt)
depth = self.get_depth(depth_path, patch_start_pt, mask)
data_dict = {
'frame1': frame1,
'frame2': frame2,
'mask': mask,
'depth': depth,
}
return data_dict
def __len__(self):
return len(self.video_names)
#staticmethod
def get_mask(path: Path, patch_start_point: tuple):
h, w = patch_start_point
mask = skimage.io.imread(path.as_posix())[h:h + self.patch_size, w:w + self.patch_size][None]
return mask
def get_image(self, path: Path, patch_start_point: tuple):
h, w = patch_start_point
image = skimage.io.imread(path.as_posix())
image = image[h:h + self.patch_size, w:w + self.patch_size, :3]
image = image.astype(numpy.float32) / 255 * 2 - 1
image_cf = numpy.moveaxis(image, [0, 1, 2], [1, 2, 0])
return image_cf
def get_depth(self, path: Path, patch_start_point: tuple, mask: numpy.ndarray):
h, w = patch_start_point
exrfile = OpenEXR.InputFile(path.as_posix())
raw_bytes = exrfile.channel('B', Imath.PixelType(Imath.PixelType.FLOAT))
depth_vector = numpy.frombuffer(raw_bytes, dtype=numpy.float32)
height = exrfile.header()['displayWindow'].max.y + 1 - exrfile.header()['displayWindow'].min.y
width = exrfile.header()['displayWindow'].max.x + 1 - exrfile.header()['displayWindow'].min.x
depth = numpy.reshape(depth_vector, (height, width))
depth = depth[h:h + self.patch_size, w:w + self.patch_size]
depth = depth[None]
depth = depth.astype(numpy.float32)
depth = depth * mask
return depth
Finally, I'm creating a DataLoader as follows:
train_data_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=4)
What I've tried so far:
I've searched if it is possible to read a part of the image. Unfortunately, I didn't get any leads. Looks like python libraries read the full image.
I'm planning to read more patches from a single image so that I will need to read fewer images. But in PyTorch framework, the get_item() function has to return a single sample, not a batch. So, in each get_item() I can read only a patch.
I'm planning to circumvent this as follows: Read 4 patches in get_item() and return patches of shape (4,3,256,256) instead of (3,256,256). Later when I read a batch using dataloader, I'll get a batch of shape (BS,4,3,256,256) instead of (BS,3,256,256). I can then concatenate the data along dim=1 to convert (BS,4,3,256,256) to (BS*4,3,256,256). Thus I can reduce batch_size (BS) by 4 and hopefully this will speed up data loading by 4 times.
Are there any other options? I'm open to all kind of suggestions. Thanks!
I'm currently making a object detection app, which is able to detect if tires are damaged or not. For this I'm using Google's AutoML edge, which exports a TFlite model. Now I wanna implement this model in my code, but apparently the coordinates it predicts are normalized and I'm stuck in denormalizing them
Have a Look at my code here:
import tensorflow as tf
import numpy as np
import cv2
MODEL_PATH = 'Resources/model_v1_OD.tflite'
LABEL_PATH = 'Resources/model_v1_OD.txt'
class TFTireModel():
labels = []
intepreter = None
input_details = []
output_details = []
height = 0
width = 0
def __init__(self):
with open(LABEL_PATH, 'r') as f:
self.labels = [line.strip() for line in f.readlines()]
# Init TFlite interpreter
self.interpreter = tf.lite.Interpreter(model_path=MODEL_PATH)
self.interpreter.allocate_tensors()
# Get input and output tensors.
self.input_details = self.interpreter.get_input_details()
self.output_details = self.interpreter.get_output_details()
# Get input dimensions
self.height = self.input_details[0]['shape'][1]
self.width = self.input_details[0]['shape'][2]
def predict(self, img, threshold=0.3):
# Resize image to input dimensions
img = cv2.resize(img, (self.width, self.height))
img = np.expand_dims(img, axis=0)
img = (2.0 / 255.0) * img - 1.0
img = img.astype('uint8')
# Predict image
self.interpreter.set_tensor(self.input_details[0]['index'], img)
self.interpreter.invoke()
# get results
boxes = self.interpreter.get_tensor(
self.output_details[0]['index'])
print(f"boxes: {boxes}")
classes = self.interpreter.get_tensor(
self.output_details[1]['index'])
scores = self.interpreter.get_tensor(
self.output_details[2]['index'])
num = self.interpreter.get_tensor(
self.output_details[3]['index'])
# Get output
output =self._boxes_coordinates(boxes=np.squeeze(boxes[0]),
classes=np.squeeze(classes[0]+1).astype(np.int32),
scores=np.squeeze(scores[0]),
im_width=self.width,
im_height=self.height,
min_score_thresh=threshold)
print(f"output: {output}")
# Format output
return output
def _boxes_coordinates(self,
boxes,
classes,
scores,
im_width,
im_height,
max_boxes_to_draw=4,
min_score_thresh=0.4):
print(f"width: {im_width}, height {im_height}" )
if not max_boxes_to_draw:
max_boxes_to_draw = boxes.shape[0]
number_boxes = min(max_boxes_to_draw, boxes.shape[0])
tire_boxes = []
# person_labels = []
for i in range(number_boxes):
if scores is None or scores[i] > min_score_thresh:
box = tuple(boxes[i].tolist())
ymin, xmin, ymax, xmax = box
xmin, ymin, xmax, ymax = (int(xmin * im_width), int(xmax * im_width), int(ymin * im_height), int(ymax * im_height)) #TODO: DO A LOOP
#tire_boxes.append([(ymin, xmin, ymax, xmax), scores[i], self.labels[classes[i]]]) #More complete
tire_boxes.append((xmin, ymin, xmax, ymax))
return tire_boxes
Things go wrong at:
boxes = self.interpreter.get_tensor(
self.output_details[0]['index'])
print(f"boxes: {boxes}"
boxes: [[[ 0.00263482 0.50020593 0.3734043 0.83953816]
[ 0.12580797 0.14952084 0.65327024 0.61710536]
[ 0.13584864 0.38896233 0.6485662 0.85324436]
[ 0.31914377 0.3945622 0.87147605 0.8458656 ]
[ 0.01334581 0.03666234 0.46443292 0.55461186]
[ 0.1018104 -0.08279537 0.6541427 0.37984413]
Since here the output is normalized, and I don't know how to denormalize it. The desired output is percentages of the width and height as can be seen in the _boxes_coordinates function.
The outputs from TFLite's Object Detection models are in the format:
[top/Height, left/Width, bottom/Height, right/Width]
So if you know the dimensions of your image, you can compute the boundaries of each computed object.
I have problem with preprocessing data set for deep learning.
I am using U-net.
I have training data, label data, test data size of 512x512.
I want patch based learning so i am trying to change 512x512 slices to multiple 64x64 slices so that I can train image with 64x64 patches. In my case I want to make 64x64 patches stride 32 pixel from original 512x512 Images.
For example, first patch is formed from (0,0) ~ (64,64) and the next patch is formed from (32,0) ~ (64 + 32, 64)
This is full code of data.py
#from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
import numpy as np
import os
import glob
import cv2
from os.path import *
#from libtiff import TIFF
import matplotlib.pyplot as plt
class dataProcess(object):
def __init__(self, out_rows, out_cols):
"""
"""
self.out_rows = out_rows
self.out_cols = out_cols
def load_train_data(self):
imgs_row, imgs_col = 512,512
train_list = []
train_img = []
label_list = []
label_img = []
train_path = 'C:\\Users\\Lee Doyle\\unet\\data\\Train'
label_path = 'C:\\Users\\Lee Doyle\\unet\\data\\Label'
######################Traindata################################
print('-' * 30)
print('load train images...')
print('-' * 30)
for i in glob.glob(train_path + '/*.[tT][iI][fF]'):
train_list.append(abspath(i))
print(len(train_list))
for i in train_list:
# print(i)
img = cv2.imread(i, cv2.IMREAD_GRAYSCALE)
train_img.append(img.astype(np.float32)/255.0)
train_img = np.array(train_img)
print(train_img)
######################Labeldata################################
for i in glob.glob(label_path + '/*.[tT][iI][fF]'):
label_list.append(abspath(i))
for i in label_list:
img = cv2.imread(i, cv2.IMREAD_GRAYSCALE)
label_img.append(img.astype(np.float32) / 255.0)
label_img = np.array(label_img)
fig = plt.figure()
a=fig.add_subplot(1,2,1)
plt.imshow(train_img[0],cmap='gray')
a.set_title('trian image')
a=fig.add_subplot(1,2,2)
plt.imshow(label_img[0]+train_img[0],cmap='gray')
plt.imshow(label_img[0],cmap='gray')
a.set_title('Image with GT SEG ')
plt.show()
print(train_img.shape)
train_img = train_img.reshape(train_img.shape[0], imgs_row, imgs_col, 1)
label_img = label_img.reshape(label_img.shape[0], imgs_row, imgs_col, 1)
return train_img,label_img
def load_test_data(self):
imgs_row, imgs_col = 512,512
test_list = []
test_img = []
test_path = 'C:\\Users\\Lee Doyle\\unet\\data\\Test'
######################Testdata#################################
print('-' * 30)
print('load test images...')
print('-' * 30)
for i in glob.glob(test_path + '/*.[tT][iI][fF]'):
test_list.append(abspath(i))
for i in test_list:
img = cv2.imread(i, cv2.IMREAD_GRAYSCALE)
# img=cv2.resize(img,(512,512))
test_img.append(img.astype(np.float32) / 255.0)
test_img = np.array(test_img)
# mean = test_img.mean(axis=0)
# test_img -= mean
test_img=test_img.reshape(test_img.shape[0],imgs_row,imgs_col,1)
print(test_img.shape)
return test_img
if __name__ == "__main__":
mydata = dataProcess(512,512)
mydata.load_train_data()
mydata.load_test_data()
I think I need to add patch making code near this code.
for i in glob.glob(train_path + '/*.[tT][iI][fF]'):
train_list.append(abspath(i))
print(len(train_list))
for i in train_list:
# print(i)
img = cv2.imread(i, cv2.IMREAD_GRAYSCALE)
train_img.append(img.astype(np.float32)/255.0)
train_img = np.array(train_img)
print(train_img)
I apppreciate your help.
While I was following the deepdream iPython notebook which is here: https://github.com/google/deepdream/blob/master/dream.ipynb, I successfully ran the code and initialized the network until i get this error:
I0218 20:53:01.108750 12174 net.cpp:283] Network initialization done.
I0218 20:53:06.017426 12174 net.cpp:816] Ignoring source layer data
I0218 20:53:06.139768 12174 net.cpp:816] Ignoring source layer loss
Traceback (most recent call last):
File "/home/andrew/PycharmProjects/deepmeme/deepmeme.py", line 122, in <module>
<IPython.core.display.Image object>
frame = deepdream(net, frame)
File "/home/andrew/PycharmProjects/deepmeme/deepmeme.py", line 78, in deepdream
octaves = [preprocess(net, base_img)]
File "/home/andrew/PycharmProjects/deepmeme/deepmeme.py", line 43, in preprocess
return np.float32(np.rollaxis(img, 2)[::-1]) - net.transformer.mean['data']
KeyError: 'data'
This is my code for the python file:
import sys
sys.path.append("/home/andrew/caffe/python")
from cStringIO import StringIO
import numpy as np
import scipy.ndimage as nd
import PIL.Image
from IPython.display import clear_output, Image, display
from google.protobuf import text_format
import caffe
# If your GPU supports CUDA and Caffe was built with CUDA support,
# uncomment the following to run Caffe operations on the GPU.
# caffe.set_mode_gpu()
# caffe.set_device(0) # select GPU device if multiple devices exist
def showarray(a, fmt='jpeg'):
a = np.uint8(np.clip(a, 0, 255))
f = StringIO()
PIL.Image.fromarray(a).save(f, fmt)
display(Image(data=f.getvalue()))
model_path = '/home/andrew/caffe/models/bvlc_reference_caffenet/' # substitute your path here
net_fn = model_path + 'deploy.prototxt'
param_fn = model_path + 'caffe_train_iter_500.caffemodel'
# Patching model to be able to compute gradients.
# Note that you can also manually add "force_backward: true" line to "deploy.prototxt".
model = caffe.io.caffe_pb2.NetParameter()
text_format.Merge(open(net_fn).read(), model)
model.force_backward = True
open('deploy.prototxt', 'w').write(str(model))
net = caffe.Classifier('/home/andrew/caffe/models/bvlc_reference_caffenet/deploy.prototxt', '/home/andrew/caffe/models/bvlc_reference_caffenet/caffenet_train_iter_500.caffemodel', caffe.TEST)
# a couple of utility functions for converting to and from Caffe's input image layout
def preprocess(net, img):
return np.float32(np.rollaxis(img, 2)[::-1]) - net.transformer.mean['data']
def deprocess(net, img):
return np.dstack((img + net.transformer.mean['data'])[::-1])
def objective_L2(dst):
dst.diff[:] = dst.data
def make_step(net, step_size=1.5, end='inception_4c/output',
jitter=32, clip=True, objective=objective_L2):
'''Basic gradient ascent step.'''
src = net.blobs['data'] # input image is stored in Net's 'data' blob
dst = net.blobs[end]
ox, oy = np.random.randint(-jitter, jitter+1, 2)
src.data[0] = np.roll(np.roll(src.data[0], ox, -1), oy, -2) # apply jitter shift
net.forward(end=end)
objective(dst) # specify the optimization objective
net.backward(start=end)
g = src.diff[0]
# apply normalized ascent step to the input image
src.data[:] += step_size/np.abs(g).mean() * g
src.data[0] = np.roll(np.roll(src.data[0], -ox, -1), -oy, -2) # unshift image
if clip:
bias = net.transformer.mean['data']
src.data[:] = np.clip(src.data, -bias, 255-bias)
def deepdream(net, base_img, iter_n=10, octave_n=4, octave_scale=1.4,
end='inception_4c/output', clip=True, **step_params):
# prepare base images for all octaves
octaves = [preprocess(net, base_img)]
for i in xrange(octave_n-1):
octaves.append(nd.zoom(octaves[-1], (1, 1.0/octave_scale,1.0/octave_scale), order=1))
src = net.blobs['data']
detail = np.zeros_like(octaves[-1]) # allocate image for network-produced details
for octave, octave_base in enumerate(octaves[::-1]):
h, w = octave_base.shape[-2:]
if octave > 0:
# upscale details from the previous octave
h1, w1 = detail.shape[-2:]
detail = nd.zoom(detail, (1, 1.0*h/h1,1.0*w/w1), order=1)
src.reshape(1,3,h,w) # resize the network's input image size
src.data[0] = octave_base+detail
for i in xrange(iter_n):
make_step(net, end=end, clip=clip, **step_params)
# visualization
vis = deprocess(net, src.data[0])
if not clip: # adjust image contrast if clipping is disabled
vis = vis*(255.0/np.percentile(vis, 99.98))
showarray(vis)
print octave, i, end, vis.shape
clear_output(wait=True)
# extract details produced on the current octave
detail = src.data[0]-octave_base
# returning the resulting image
return deprocess(net, src.data[0])
img = np.float32(PIL.Image.open('/home/andrew/caffe/examples/images/cat.jpg'))
showarray(img)
net.blobs.keys()
frame = img
frame_i = 0
h, w = frame.shape[:2]
s = 0.05 # scale coefficient
for i in xrange(100):
frame = deepdream(net, frame)
PIL.Image.fromarray(np.uint8(frame)).save("frames/%04d.jpg"%frame_i)
frame = nd.affine_transform(frame, [1-s,1-s,1], [h*s/2,w*s/2,0], order=1)
frame_i += 1
Image(filename='frames/0029.jpg')
Does anybody know what's happening? I am using my own data that I successfully trained a model with.
From the deepdream iPython notebook:
net = caffe.Classifier('tmp.prototxt', param_fn,
mean = np.float32([104.0, 116.0, 122.0]), # ImageNet mean, training set dependent
channel_swap = (2,1,0)) # the reference model has channels in BGR order instead of RGB
vs your:
net = caffe.Classifier('/home/andrew/caffe/models/bvlc_reference_caffenet/deploy.prototxt', '/home/andrew/caffe/models/bvlc_reference_caffenet/caffenet_train_iter_500.caffemodel', caffe.TEST)
You do not seem to include a mean when you create a caffe.Classifier.
See the definition of caffe.Classifier.
If you don't have a mean, you could probably just remove the mention of mean from preprocess/deprocess:
def preprocess(net, img):
return np.float32(np.rollaxis(img, 2)[::-1])
def deprocess(net, img):
return np.dstack((img)[::-1])