I am using a custom dataset for image segmentation. While visualizing some of the images and masks i found an error. The problem for me know is, how to find the name of the image. The code i use for the pytorch datasetset creation is:
class SegmentationDataset(Dataset):
def __init__(self, df, augmentations):
self.df = df
self.augmentations = augmentations
def __len__(self):
return len(self.df)
def __getitem__(self, idx):
row = self.df.iloc[idx]
image_path = DATA_DIR + row.images
mask_path = DATA_DIR + row.masks
image = skimage.io.imread(image_path)
mask = skimage.io.imread(mask_path)
mask = np.expand_dims(mask, axis = -1)
if self.augmentations:
data = self.augmentations(image = image, mask = mask)
image = data['image']
mask = data['mask']
image = np.transpose(image, (2, 0, 1)).astype(np.float32)
mask = np.transpose(mask, (2, 0, 1)).astype(np.float32)
image = torch.Tensor(image) / 255.0
mask = torch.round(torch.Tensor(mask) / 255.0)
return image, mask
trainset = SegmentationDataset(train_df, get_train_augs())
validset = SegmentationDataset(valid_df, get_valid_augs())
When i then print one specific image, i see that the mask is not available/wrong:
idx = 9
print('Drawn sample ID:', idx)
image, mask = validset[idx]
show_image(image, mask)
How do i now get the image name of this idx = 9?
I'd imagine you could print out one of the following, under this line image = skimage.io.imread(image_path), it should help lead you to your answer:
print(row)
print(row.images)
print(images)
print(image_path)
To get the file name after you have parsed the fully quaified path above:
my_str = '/my/data/path/images/wallpaper.jpg'
result = my_str.rsplit('/', 1)[1]
print(result) # 'wallpaper.jpg'
with_slash = '/' + my_str.rsplit('/', 1)[1]
print(with_slash) # '/wallpaper.jpg'
['/my/data/path/images/', 'wallpaper.jpg']
print(my_str.rsplit('/', 1)[1])
Related
I am getting TypeError: get_train_augs() got an unexpected keyword argument 'image', I have my augmentation functions as follows
Augmentation functions
def get_train_augs():
return A.Compose([
A.Resize(IMAGE_SIZE,IMAGE_SIZE),
A.HorizontalFlip(p = 0.5),
A.VerticalFlip(p = 0.5),
])
def get_valid_augs():
return A.Compose([
A.Resize(IMAGE_SIZE,IMAGE_SIZE),
])
Custom segmentation dataset class
class SegmentationDataset(Dataset):
def __init__(self, df, augmentations=None):
self.df = df
self.augmentations = augmentations
def __len__(self):
return len(self.df)
def __getitem__(self,idx):
row = self.df.iloc[idx]
image_path = row.images
mask_path = row.masks
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE) # (h, w, c)
mask = np.expand_dims(mask, axis = -1)
if self.augmentations is not None:
data = self.augmentations(image = image, mask = mask)
image = data['image']
mask = data['mask']
# (h, w, c) -> (c, h, w)
image = np.transpose(image,(2,0,1)).astype(np.float32)
mask = np.transpose(mask,(2,0,1)).astype(np.float32)
image = torch.Tensor(image)/255.0
mask = torch.round(torch.Tensor(mask)/255.0)
return image, mask
when I call trainset like this I am getting an error:
trainset = SegmentationDataset(train_df, get_train_augs)
validset = SegmentationDataset(valid_df, get_valid_augs)
calling a random index
idx = 3
image, mask = trainset[idx]
The error I am getting is:
TypeError Traceback (most recent call last)
<ipython-input-28-9b83781b7e3d> in <module>()
1 idx = 3
2
----> 3 image, mask = trainset[idx]
4
5 helper.show(image, mask)
<ipython-input-25-39872478644d> in __getitem__(self, idx)
20
21 if self.augmentations is not None:
---> 22 data = self.augmentations(image = image, mask = mask)
23
24 image = data['image']
TypeError: get_train_augs() got an unexpected keyword argument 'image'
try
trainset = SegmentationDataset(train_df, get_train_augs())
validset = SegmentationDataset(valid_df, get_valid_augs())
I want to calculate variance, gabor and entropy filters to some images, but the images have blank areas that I donĀ“t want to apply the filters. I try to use a np.ma.array option but return this error: "'MaskedArray' object is not callable"
this is the code:
def bandas_img (image, array1, array2):
imagenRGB = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
return cv2.inRange(imagenRGB, array1, array2)
def rescale_by_width(image, target_width, method=cv2.INTER_LANCZOS4):
h = int(round(target_width * image.shape[0] / image.shape[1]))
return cv2.resize(image, (target_width, h), interpolation=method)
#Resized image by width
target_width = 400
#To mask null values
mask_image = True
hue = 20
sat = 57
value = 116
toleranciaH = 150
toleranciaS = 150
toleranciaV = 150
lower = np.array ([hue - toleranciaH, sat - toleranciaS, value - toleranciaV])
upper = np.array ([hue + toleranciaH, sat + toleranciaS, value + toleranciaV])
#working directory where the csv files are
os.chdir("C:/Users/Mariano/Documents/3 - Visual studio code/Prueba filtrar mascara/filtrada") ##ojoooo las barras van /// y no D:/OMAN/BHI TEXTURES/U-2
file_extension = '.png' #Check Extension
all_filenames = [i for i in glob.glob(f"*{file_extension}")]
for f in all_filenames:
image = cv2.imread(f,1)
#resized Image
resized1 = rescale_by_width(image, target_width)
#Set f value (image name)
f = f.replace(".png", "")
#Save Image
plt.imsave(f+"_resized.png", resized1)
#Create mask for null values
if mask_image == True:
mask = bandas_img(resized1, lower, upper)
cv2.imwrite(f+"_mask.png", mask)
resized2 = io.imread(f+"_resized.png", as_gray=True)
resized3 = resized2.copy()
#First Try
resized3[mask == 0] = np.nan
resized3[mask != 0] = resized2[mask != 0]
#Second Try
mask1 = (resized3 == np.nan)
resized_Mask = np.ma.array(resized3, mask = mask1)
#Varianza
k=6
img_mean = ndimage.uniform_filter(resized_Mask, (k, k))
img_sqr_mean = ndimage.uniform_filter(resized_Mask**2, (k, k))
img_var = img_sqr_mean - img_mean**2
img_var[mask == 0] = 1
plt.imsave(f+"_varianza.png", img_var)
I've been trying to run a detection model on a raspberry pi but when I try I get the error that:
could not broadcast input array from shape (320,320,3) into shape (640,640,3)
when I run this
import re
import cv2
from tflite_runtime.interpreter import Interpreter
import numpy as np
CAMERA_WIDTH = 640
CAMERA_HEIGHT = 480
def load_labels(path='labels.txt'):
"""Loads the labels file. Supports files with or without index numbers."""
with open(path, 'r', encoding='utf-8') as f:
lines = f.readlines()
labels = {}
for row_number, content in enumerate(lines):
pair = re.split(r'[:\s]+', content.strip(), maxsplit=1)
if len(pair) == 2 and pair[0].strip().isdigit():
labels[int(pair[0])] = pair[1].strip()
else:
labels[row_number] = pair[0].strip()
return labels
def set_input_tensor(interpreter, image):
"""Sets the input tensor."""
tensor_index = interpreter.get_input_details()[0]['index']
input_tensor = interpreter.tensor(tensor_index)()[0]
input_tensor[:, :] = np.expand_dims((image-255)/255, axis=0)
def get_output_tensor(interpreter, index):
"""Returns the output tensor at the given index."""
output_details = interpreter.get_output_details()[index]
tensor = np.squeeze(interpreter.get_tensor(output_details['index']))
return tensor
def detect_objects(interpreter, image, threshold):
"""Returns a list of detection results, each a dictionary of object info."""
set_input_tensor(interpreter, image)
interpreter.invoke()
# Get all output details
boxes = get_output_tensor(interpreter, 0)
classes = get_output_tensor(interpreter, 1)
scores = get_output_tensor(interpreter, 2)
count = int(get_output_tensor(interpreter, 3))
results = []
for i in range(count):
if scores[i] >= threshold:
result = {
'bounding_box': boxes[i],
'class_id': classes[i],
'score': scores[i]
}
results.append(result)
return results
def main():
labels = load_labels()
interpreter = Interpreter('detect.tflite')
interpreter.allocate_tensors()
_, input_height, input_width, _ = interpreter.get_input_details()[0]['shape']
cap = cv2.VideoCapture(0)
while cap.isOpened():
ret, frame = cap.read()
img = cv2.resize(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), (320,320))
res = detect_objects(interpreter, img, 0.8)
print(res)
for result in res:
ymin, xmin, ymax, xmax = result['bounding_box']
xmin = int(max(1,xmin * CAMERA_WIDTH))
xmax = int(min(CAMERA_WIDTH, xmax * CAMERA_WIDTH))
ymin = int(max(1, ymin * CAMERA_HEIGHT))
ymax = int(min(CAMERA_HEIGHT, ymax * CAMERA_HEIGHT))
cv2.rectangle(frame,(xmin, ymin),(xmax, ymax),(0,255,0),3)
cv2.putText(frame,labels[int(result['class_id'])],(xmin, min(ymax, CAMERA_HEIGHT-20)), cv2.FONT_HERSHEY_SIMPLEX, 0.5,(255,255,255),2,cv2.LINE_AA)
cv2.imshow('Pi Feed', frame)
if cv2.waitKey(10) & 0xFF ==ord('q'):
cap.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
main()
the model is an SSD Mobilenet 640x640 and the images for the model were taken on the raspberry pi as 1028x720 but were downscaled during model training. But I still get this error and I'm not sure how to fix it.
i used this code below to extract patches from a image.
extract code:
import os
import glob
from PIL import Image
Image.MAX_IMAGE_PIXELS = None # to avoid image size warning
imgdir = "/path/to/image/folder"
filelist = [f for f in glob.glob(imgdir + "**/*.png", recursive=True)]
savedir = "/path/to/image/folder/output"
start_pos = start_x, start_y = (0, 0)
cropped_image_size = w, h = (256, 256)
for file in filelist:
img = Image.open(file)
width, height = img.size
frame_num = 1
for col_i in range(0, width, w):
for row_i in range(0, height, h):
crop = img.crop((col_i, row_i, col_i + w, row_i + h))
name = os.path.basename(file)
name = os.path.splitext(name)[0]
save_to= os.path.join(savedir, name+"_{:03}.png")
crop.save(save_to.format(frame_num))
frame_num += 1
Now i want to reconstruct this imagem from all those patches extracted before, i've tried 2 diffenret codes
so my DB is 120x256x256x3 extracted patches, theres 120 patches to fit in 3840x2048 shape..:
patches = []
for directory_path in glob.glob('D:\join_exemplo'):
for img_path in glob.glob(os.path.join(directory_path, "*.png")):
img = cv2.imread(img_path,1)
patches.append(img)
input_patches = np.array(patches)
first i've tried sklearn.feature_extraction.image importing reconstruct_from_patches_2d, but got a black image:
reconstruct = reconstruct_from_patches_2d(input_patches, input_image)
reconstruct = reconstruct.astype(np.uint8)
Image.fromarray(reconstruct, 'RGB').save(r'D:\join_exemplo\re\re3.png')
also tried, this below but got a grayscale tone pattern image
input_image = (3840,2048,3)
reconstructed_arr = np.zeros(shape=(3840,2048,3))
#%%
>>> step = 256
>>> for x in range(img.shape[0]):
for y in range(img.shape[1]):
x_pos, y_pos = x * step, y * step
reconstructed_arr[x_pos:x_pos + 512, y_pos:y_pos + 512] = img[x, y, 0, ...]
>>> (input_image == reconstructed_arr).all()
True
cv2.imwrite(r'D:\join_exemplo\re\re.png',reconstructed_arr)
Can someone see whats wrong? sorry about my bad english
IMG_SIZE = 50
traing_data =[]
def create_training_data():
for c in categories:
path = os.path.join(dir,c)
class_num = categories.index(c) # dog =0, cat =1
for img in os.listdir(path):
try:
img_array = cv2.imread(os.path.join(path,img),cv2.IMREAD_GRAYSCALE)
new_array = cv2.resize(img_array,(IMG_SIZE,IMG_SIZE))
traing_data.append([new_array,class_num])
except Exception as e:
pass # Some photos are broken
create_training_data()
X = np.array(X).reshape(-1,IMG_SIZE,IMG_SIZE,1)
y = np.array(y)
Hi guys! I was studying convolutional neural network from youtube and I was wondering why do I have to resize the image array ( new_array = cv2.resize(img_array,(IMG_SIZE,IMG_SIZE)) ) and the X array ( X = np.array(X).reshape(-1,IMG_SIZE,IMG_SIZE,1) ) ?