I have multiple different folders with the images have same naming like a.png etc. I want to modify the above code to read this same named files in different directories and give their opencv output using yolo at the same time. To be more specific I have 10 files which contains images transported with different categories like one folder contains rgb files and the other contains gray files etc. To compare their output, I want to show the images with same naming but in different folders. I know it should not be that hard but I am pretty confused. Thanks in advance!
import cv2
import numpy as np
import os
import matplotlib.pyplot as plt
import tkinter
from tkinter import filedialog
def cal_alpB(minMax):
minD = minMax[0]
maxD = minMax[1]
alpha = 255/(maxD-minD)
beta = -alpha*minD
return [alpha, beta]
def getMinMax(path):
with open(path+'/config') as f:
minMax = f.read().splitlines()
minMax = minMax[0].split(',')
minMax = [eval(x) for x in minMax]
return minMax
def normalizeData(minMax, img):
alpB = cal_alpB(minMax)
img[img>minMax[1]] = minMax[1]
img[img<0] = 0
return alpB
def boxDrawing(layerOutput, frameWidth, frameHeight, class_ids, confidences, boxes, img):
for output in layerOutput:
for detection in output:
score = detection[5:]
class_id = np.argmax(score)
confidence = score[class_id]
if confidence > 0.5:
center_x = int(detection[0] * frameWidth)
center_y = int(detection[1] * frameHeight)
width = int(detection[2] * frameWidth)
height = int(detection[3] * frameHeight)
left = int(center_x - width / 2)
top = int(center_y - height / 2)
class_ids.append(class_id)
confidences.append(float(confidence))
boxes.append([left, top, width, height])
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.8, 0.7)
font = cv2.FONT_HERSHEY_PLAIN
colors = np.random.uniform(0, 255, size = (len(boxes),3))
for i in range(len(boxes)):
if i in indexes:
x,y,w,h = boxes[i]
label = str(classes[class_ids[i]])
confi = str(round(confidences[i],2))
color = colors[i]
cv2.rectangle(img, (x,y), (x+w,y+h), color,1)
cv2.putText(img, label+" "+ confi, (x,y+20), font, 1, (255,255,255),1)
def algorythmYolo():
tkinter.Tk().withdraw()
folder = filedialog.askdirectory()
minMax = getMinMax(folder)
for filename in sorted(os.listdir(folder)):
img = cv2.imread(os.path.join(folder,filename),-1)
if img is not None:
alpB = normalizeData(minMax,img)
img = cv2.convertScaleAbs(img, alpha=alpB[0], beta= alpB[1])
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
frameHeight, frameWidth, channels = img.shape
blob = cv2.dnn.blobFromImage(img, 1/255, (frameWidth,frameHeight), (0,0,0), swapRB = True, crop = False)
yolo.setInput(blob)
layerOutput = yolo.forward(outputLayers)
boxes = []
confidences = []
class_ids = []
boxDrawing(layerOutput,frameWidth, frameHeight,class_ids,confidences,boxes,img)
cv2.imshow("window", img)
cv2.setWindowTitle('window', folder)
cv2.waitKey(1)
else:
break
cv2.destroyAllWindows()
yolo = cv2.dnn.readNet("./yolov3.weights","./yolov3.cfg")
with open("./coco.names","r") as f:
classes = f.read().splitlines()
layers_names = yolo.getLayerNames()
outputLayers = [layers_names[i-1] for i in yolo.getUnconnectedOutLayers()]
cv2.namedWindow("window", cv2.WINDOW_NORMAL)
algorythmYolo()
Related
I am trying to detect objects in a certain area using yolov7 and deepSORT algorithm, but in the results I get, I see that the IDs are always changing. I leave 3 photos for you to understand.
As you can see the IDs are different in all frames.
`
#class base virtual zone tracking
import random
import torch
import numpy as np
from models.experimental import attempt_load
from utils.torch_utils import TracedModel
from utils.datasets import letterbox
from utils.plots import plot_one_box, plot_one_box_center
from utils.general import check_img_size, non_max_suppression, scale_coords
import cv2
import time
from google.colab.patches import cv2_imshow
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
#deep sort
import os
import tensorflow as tf
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if len(physical_devices) > 0:
tf.config.experimental.set_memory_growth(physical_devices[0], True)
from tensorflow.compat.v1 import ConfigProto
from deep_sort.tracker import Tracker
from deep_sort.detection import Detection
import matplotlib.pyplot as plt
from deep_sort import preprocessing, nn_matching
from tracking_helpers import read_class_names, create_box_encoder
from detection_helpers import *
class YOLOv7:
def __init__(self, weights: str, image_size:int,device:str):
self.device = device
self.weights = weights
self.model = attempt_load(self.weights, map_location=self.device) # Model Load FP32
self.stride = int(self.model.stride.max())
self.image_size = check_img_size(image_size, self.stride)
if self.device != 'cpu':
self.half = True
else:
self.half = False
if self.half:
self.model.half() # FP16
self.names = self.model.module.names if hasattr(self.model , 'module') else self.model.names
color_values = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(self.names))]
self.colors = {i:color_values[i] for i in range(len(self.names))}
def detect(self, raw_image: np.ndarray, conf_thresh =0.45, iou_thresh =0.45, classes = [0]): #default class people
# Run inference
if self.device != 'cpu':
self.model(torch.zeros(1, 3, self.image_size, self.image_size).to(self.device).type_as(next(self.model.parameters())))
with torch.no_grad():
image = letterbox(raw_image, self.image_size, stride=self.stride)[0]
image = image[:, :, ::-1].transpose(2, 0, 1)
image = np.ascontiguousarray(image)
image = torch.from_numpy(image).to(self.device)
image = image.half() if self.half else image.float()
image /= 255.0
if image.ndimension() == 3:
image = image.unsqueeze(0)
# Inference
detections = self.model(image, augment=False)[0]
# Apply NMS
detections = non_max_suppression(detections, conf_thresh, iou_thresh, classes=classes, agnostic=False)[0]
# Rescale boxes from img_size to raw image size
detections[:, :4] = scale_coords(image.shape[2:], detections[:, :4], raw_image.shape).round()
return detections
def tracking(self, video_frame, yolo_dets, inside_poly = True, count_objects:bool=False,verbose=False, reID_model_path = "./deep_sort/model_weights/mars-small128.pb", nms_max_overlap:float=1.0, max_cosine_distance:float=0.4, nn_budget:float=None):
class_names = read_class_names()
encoder = create_box_encoder(reID_model_path, batch_size=1)
nms_max_overlap = nms_max_overlap
metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
tracker = Tracker(metric)
*xyxy, conf, cls = yolo_dets
frame = cv2.cvtColor(video_frame, cv2.COLOR_BGR2RGB)
if yolo_dets is None:
bboxes = []
scores = []
classes = []
num_objects = 0
else:
bboxes = yolo_dets[:,:4]
bboxes[:,2] = bboxes[:,2] - bboxes[:,0] # convert from xyxy to xywh
bboxes[:,3] = bboxes[:,3] - bboxes[:,1]
scores = yolo_dets[:,4]
classes = yolo_dets[:,-1]
num_objects = bboxes.shape[0]
#how many object you track
names = []
for i in range(num_objects): # loop through objects and use class index to get class name
class_indx = int(classes[i])
class_name = class_names[class_indx]
names.append(class_name)
names = np.array(names)
count = len(names)
if count_objects:
cv2.putText(frame, "both inside and outside the polygon detection: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 0, 0), 2)
# DeepSORT tacker work starts here
features = encoder(frame, bboxes) # encode detections and feed to tracker. [No of BB / detections per frame, embed_size]
detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(bboxes, scores, names, features)] # [No of BB per frame] deep_sort.detection.Detection object
cmap = plt.get_cmap('tab20b') #initialize color map
colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]
boxs = np.array([d.tlwh for d in detections]) # run non-maxima supression below
scores = np.array([d.confidence for d in detections])
classes = np.array([d.class_name for d in detections])
indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores)
detections = [detections[i] for i in indices]
tracker.predict() # Call the tracker
tracker.update(detections) # updtate using Kalman Gain
for track in tracker.tracks: # update new findings AKA tracks
#if not track.is_confirmed() or track.time_since_update > 1:
#continue
bbox = track.to_tlbr()
class_name = track.get_class()
color = colors[int(track.track_id) % len(colors)] # draw bbox on screen
color = [i * 255 for i in color]
#drawing poly
#pts = np.array([[6,449], [1052, 2], [1914, 6], [1766, 1074], [2, 1076]])
#frame = cv2.polylines(frame, [pts], True, (0,0,255), 5)
#creating poly
#poli = Polygon([(6,449), (1052, 2), (1914, 6), (1766, 1074), (2, 1076)])
#center = (int((bbox[0] + bbox[2]) / 2), int((bbox[1] + bbox[3]) / 2)) #center point ( (x1 + x2) / 2, (y1 + y2) / 2 )
#point = Point(center)
if inside_poly:
#drawing poly
pts = np.array([[6,449], [1052, 2], [1914, 6], [1766, 1074], [2, 1076]])
frame = cv2.polylines(frame, [pts], True, (0,0,255), 5)
#creating poly
poli = Polygon([(6,449), (1052, 2), (1914, 6), (1766, 1074), (2, 1076)])
center = (int((bbox[0] + bbox[2]) / 2), int((bbox[1] + bbox[3]) / 2)) #center point ( (x1 + x2) / 2, (y1 + y2) / 2 )
point = Point(center)
if poli.contains(point):
cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2)
cv2.rectangle(frame, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)+len(str(track.track_id)))*17, int(bbox[1])), color, -1)
cv2.putText(frame, class_name + " : " + str(track.track_id),(int(bbox[0]), int(bbox[1]-11)),0, 0.6, (255,255,255),1, lineType=cv2.LINE_AA)
cv2.putText(frame, "0", center,0, 0.6, (255,255,255),1, lineType=cv2.LINE_AA)
else:
cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2)
cv2.rectangle(frame, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)+len(str(track.track_id)))*17, int(bbox[1])), color, -1)
cv2.putText(frame, class_name + " : " + str(track.track_id),(int(bbox[0]), int(bbox[1]-11)),0, 0.6, (255,255,255),1, lineType=cv2.LINE_AA)
if verbose == 2:
print("Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}".format(str(track.track_id), class_name, (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))))
result = np.asarray(frame)
result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
return result
if __name__=='__main__':
yolov7=YOLOv7(weights='yolov7x.pt', device='cpu', image_size=800)
cap = cv2.VideoCapture('street5sn.mp4')
torch.cuda.empty_cache()
#writer
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) # by default VideoCapture returns float instead of int
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
codec = cv2.VideoWriter_fourcc(*"DIVX")
out = cv2.VideoWriter("./output/video_out_track5sn-d2.mp4", codec, fps, (width, height))
while True:
t1 = time.time()
ret, frame = cap.read()
if not ret:
break
detections=yolov7.detect(frame)
vir = yolov7.tracking(frame, detections, count_objects = True, inside_poly = False)
out.write(vir)
cv2_imshow(vir) #colab imshow kodu
print("add frame ...")
if cv2.waitKey(1) & 0xFF == ord('q'):
break
out.release()
cap.release()
cv2.destroyAllWindows()
`
I use this repo repo
I did not make any changes to other files.
I'm working on this code. I got a problem with the size of the windows showing 'lane1' and 'lane2'. they are so big and uncomfortable to work with. Does opencv have any function that makes it possible to control the size of windows.
PS: the code is about object detection on multiple ROIs. I'd like to count the number of vehicles showing on each line (that means I got to add some more steps on tracking)
import cv2
import numpy as np
def detection1(y2,y1,x2,x1,name):
roi=img[y2:y1,x2:x1]
blob= cv2.dnn.blobFromImage(roi, 1/255, (416, 416), (0,0,0), swapRB=True, crop=False)
net.setInput(blob)
output_layers_names = net.getUnconnectedOutLayersNames()
layerOutputs = net.forward(output_layers_names)
#showing information on the screen
boxes = []
confidences = []
class_ids = []
for output in layerOutputs:
for detection in output:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if classes[class_id] in allowed_objects:
if confidence > 0.2:
center_x = int(detection[0]*(x1-x2)) #(x2-x1)=width
center_y = int(detection[1]*(y1-y2)) #(y2-y1)=height
w = int(detection[2]*(x1-x2))
h = int(detection[3]*(y1-y2))
x = int(center_x - w/2)
y = int(center_y - h/2)
boxes.append([x, y, w, h])
confidences.append((float(confidence)))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.2, 0.4)
if len(indexes)>0:
for i in indexes.flatten():
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
confidence = str(round(confidences[i],2))
cv2.rectangle(roi, (x,y), (x+w, y+h), (0,0,0), 1)
cv2.putText(roi, label + " " + confidence, (x, y-15), font, 1, (255,255,255), 1)
cv2.imshow(name,roi)
# Load Yolo
net = cv2.dnn.readNet('yolov3.weights', 'yolov3.cfg')
classes = []
with open("coco.names", "r") as f:
classes = f.read().splitlines()
allowed_objects=['car','truck','motorbike','bicycle','bus']
#loading video
cap = cv2.VideoCapture('Traffic_Trim.mp4')
font = cv2.FONT_HERSHEY_PLAIN
#reading frames from video
while True:
_, img = cap.read()
detection1(y2=216, y1=1080, x2=1008, x1=1560,name='lane1')
detection1(y2=216, y1=1080, x2=72, x1=984,name='lane2')
key = cv2.waitKey(1)
if key==27:
break
cap.release()
cv2.destroyAllWindows()
you can use cv.resizeWindow, coupled with the WINDOW_NORMAL flag to cv.namedWindow.
That spares you from having to resize the image itself (with cv.resize) and it allows you to resize the window with your mouse!
import cv2 as cv
some_image = cv.imread(cv.samples.findFile("lena.jpg"))
cv.namedWindow("some window", cv.WINDOW_NORMAL) # explicit window creation
cv.imshow("some window", some_image)
cv.resizeWindow("some window", 200, 400)
cv.waitKey() # to run the GUI
Available cv::WindowFlags in official documentation
I'm using keypoint detection to find text within a game.
The background in the below images is dynamic, it's always a vaguely moving star-lit sky that you can barely see.
The detection works well when the text is white:
However, when the text is purple (unpredictable when this happens) the detection fails entirely:
Both the object I'm looking to detect and the image I'm running detection on are identical, screenshots are taken directly from within the game of the text i.e. the above. And then run on the exact same location the original screenshot were taken from.
The below code I've written using the official documentation I found here and here as a guide but it's very light on explaining itself.
Question: Is this an inherent limitation or is there something I can do to adjust to detect keypoints within the purple image?
import cv2 as cv
import win32gui, win32con, win32ui
import numpy as np
import glob
def get_haystack_image():
w, h = 1920, 1080
hwnd = None
wDC = win32gui.GetWindowDC(hwnd)
dcObj = win32ui.CreateDCFromHandle(wDC)
cDC = dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, w, h)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0, 0), (w, h), dcObj, (0, 0), win32con.SRCCOPY)
signedIntsArray = dataBitMap.GetBitmapBits(True)
img = np.frombuffer(signedIntsArray, dtype='uint8')
img.shape = (h, w, 4)
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
img = img[...,:3]
img = np.ascontiguousarray(img)
return img
def loadImages(directory):
# Intialise empty array
image_list = []
# Add images to array
for i in directory:
img = cv.imread(i, cv.IMREAD_UNCHANGED)
image_list.append((img, i))
return image_list
def preProcessNeedle(image_list):
needle_kp1_desc = []
for i in image_list:
img = i[0]
orb = cv.ORB_create(edgeThreshold=0, patchSize=32)
keypoint_needle, descriptors_needle = orb.detectAndCompute(img, None)
needle_kp1_desc.append((keypoint_needle, descriptors_needle, img))
return needle_kp1_desc
def match_keypoints(descriptors_needle, keypoint_haystack, min_match_count):
orbHaystack = cv.ORB_create(edgeThreshold=0, patchSize=32, nfeatures=3000)
keypoints_haystack, descriptors_haystack = orbHaystack.detectAndCompute(keypoint_haystack, None)
FLANN_INDEX_LSH = 6
index_params = dict(algorithm=FLANN_INDEX_LSH, table_number=6, key_size=12, multi_probe_level=1)
search_params = dict(checks=50)
try:
flann = cv.FlannBasedMatcher(index_params, search_params)
matches = flann.knnMatch(descriptors_needle, descriptors_haystack, k=2)
except cv.error:
return None, None, [], []
good = []
points = []
for pair in matches:
if len(pair) == 2:
if pair[0].distance < 0.7*pair[1].distance:
good.append(pair[0])
if len(good) > min_match_count:
for match in good:
points.append(keypoints_haystack[match.trainIdx].pt)
return keypoints_haystack, good, points
def shipDetection(needle_kp1_desc):
res = False
# Object Detection
for i, img in enumerate(needle_kp1_desc):
kp1 = img[0]
descriptors_needle = img[1]
needle_img = img[2]
# get an updated image of the screen & crop it
keypoint_haystack = get_haystack_image()
keypoint_haystack = keypoint_haystack[40:110, 850:1000]
kp2, matches, match_points, ship_avoided = match_keypoints(kp1, descriptors_needle, keypoint_haystack, min_match_count=40)
# display the matches
match_image = cv.drawMatches(needle_img, kp1, keypoint_haystack, kp2, matches, None)
cv.imshow('Keypoint Search', match_image)
cv.moveWindow("Keypoint Search",1940,30)
cv.waitKey(1)
if match_points:
# removed code as irrelevant to detection but left comments in
# find the center point of all the matched features
# account for the width of the needle image that appears on the left
# drawn the found center point on the output image
# display the processed image
cv.imshow('Keypoint Search', match_image)
cv.waitKey(1)
res = True
break
return res
ships_to_avoid = loadImages(glob.glob(r"C:\Users\*.png"))
needle_kp1_desc = preProcessNeedle(ships_to_avoid)
if shipDetection(needle_kp1_desc):
# do something with the output
Isolating the red channel, converting to grayscale and applying binary thresholding has normalised the results, they're all now a consistent "white" which my detection is successfully identifying.
apply_thresholding will perform this pre-processing to a folder, move the images from image_dir to output_dir then it'll delete the un-processes images from image_dir.
def apply_thresholding():
# get directory path where the images are stored
image_dir = r"C:\Users\pre"
# get directory path where you want to save the images
output_dir = r"C:\Users\post"
#iterate through all the files in the image directory
for _, _, image_names in os.walk(image_dir):
#iterate through all the files in the image_dir
for image_name in image_names:
# check for extension .png
if '.png' in image_name:
# get image read path(path should not contain spaces in them)
filepath = os.path.join(image_dir, image_name)
# get image write path
dstpath = os.path.join(output_dir, image_name)
print(filepath, dstpath)
# read the image
image = cv.imread(filepath)
r = image.copy()
# set blue and green channels to 0
r[:, :, 0] = 0
r[:, :, 1] = 0
# convert to grayscale now we've dropped b and g channels
gray = cv.cvtColor(r, cv.COLOR_BGR2GRAY)
# Apply binary thersholding
(T, thresh) = cv.threshold(gray, 40, 255, cv.THRESH_BINARY)
# write the image in a different path with the same name
cv.imwrite(dstpath, thresh)
files = glob.glob(r"C:\Users\pre\*")
for f in files:
os.remove(f)
I then applied the same channel isolation, grayscale conversion and binary thresholding to my detection area.
def get_haystack_image():
w, h = 1920, 1080
hwnd = None
wDC = win32gui.GetWindowDC(hwnd)
dcObj = win32ui.CreateDCFromHandle(wDC)
cDC = dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, w, h)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0, 0), (w, h), dcObj, (0, 0), win32con.SRCCOPY)
signedIntsArray = dataBitMap.GetBitmapBits(True)
img = np.frombuffer(signedIntsArray, dtype='uint8')
img.shape = (h, w, 4)
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
img = img[...,:3]
img = np.ascontiguousarray(img)
r = img.copy()
# set blue and green channels to 0
r[:, :, 0] = 0
r[:, :, 1] = 0
# convert to grayscale now we've dropped b and g channels
gray = cv.cvtColor(r, cv.COLOR_BGR2GRAY)
# Apply binary thersholding
(T, img) = cv.threshold(gray, 40, 255, cv.THRESH_BINARY)
return img
I have a collection of images as below -
Example 1
Example 2
Example 3
These represent dates in DDMMYYYY format. For each of these images, I want to save each digit as a separate image.For example 1, I wish to save 7,9,0,8,5,8,7,1 as separate images sliced from the original image. So far, I have tried various methods described on different stackoverflow & blogposts but none of them seems to work.
Code to extract boxes surrounding dates -
from glob import glob
import cv2 as cv
import numpy as np
from tqdm import tqdm
class ExtractRectangle:
def __init__(self):
super().__init__()
self.minLinLength_h = 70
self.minLinLength_v = 5
self.maxLineGap = 20
def is_horizontal(self, line, thresh=5):
return abs(line[1] - line[3]) <= thresh
def is_vertical(self, line, thresh=5):
return abs(line[0] - line[2]) <= thresh
def get_lines(self, canny, horizontal=True):
lines = []
if horizontal:
linesP = cv.HoughLinesP(
canny,
rho=1,
theta=np.pi / 180,
threshold=10,
lines=None,
minLineLength=self.minLinLength_h,
maxLineGap=20,
)
else:
linesP = cv.HoughLinesP(
canny,
rho=1,
theta=np.pi / 180,
threshold=10,
lines=None,
minLineLength=self.minLinLength_v,
maxLineGap=20,
)
if linesP is not None:
for i in range(0, len(linesP)):
l = linesP[i][0]
if self.is_horizontal(l, 3) and horizontal:
lines.append(l)
elif self.is_vertical(l, 3):
lines.append(l)
return lines
def remove_whitespace(self, img):
# https://stackoverflow.com/questions/48395434/how-to-crop-or-remove-white-background-from-an-image
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
th, threshed = cv.threshold(gray, 127, 255, cv.THRESH_BINARY_INV)
kernel = cv.getStructuringElement(cv.MORPH_ELLIPSE, (11, 11))
morphed = cv.morphologyEx(threshed, cv.MORPH_CLOSE, kernel)
cnts = cv.findContours(morphed, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)[-2]
cnt = sorted(cnts, key=cv.contourArea)[-1]
x, y, w, h = cv.boundingRect(cnt)
dst = img[y : y + h, x : x + w]
return dst
def process_image(self, filename, path):
errenous = False
img = cv.imread(cv.samples.findFile(filename))
img = self.remove_whitespace(img)
cImage = np.copy(img)
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
canny = cv.Canny(gray, 100, 200)
horizontal_lines = self.get_lines(canny)
horizontal_lines = sorted(horizontal_lines, key=lambda a_entry: a_entry[..., 1])
vertical_lines = self.get_lines(canny, horizontal=False)
vertical_lines = sorted(vertical_lines, key=lambda a_entry: a_entry[..., 0])
if len(horizontal_lines) > 0:
initial_line = horizontal_lines[0]
final_line = horizontal_lines[-1]
# LeftTop(x1, y1) -> RightTop(x2, y1) -> RightBottom(x2, y2) -> LeftBottom(x1, y2)
y1 = initial_line[1]
y2 = final_line[1]
bottom = min(y1, y2)
top = max(y1, y2)
# post whitespace removal, dates should only be the major component
if (top-bottom) / img.shape[0] < 0.6:
errenous = True
else:
errenous = True
if len(vertical_lines) > 0:
initial_line = vertical_lines[0]
final_line = vertical_lines[-1]
x1 = initial_line[0]
x2 = final_line[0]
left = min(x1, x2)
right = max(x1, x2)
# as dates occupy majority of the horizontal space
if (right-left) / img.shape[1] < 0.95:
errenous = True
else:
errenous = True
if not errenous:
# cImage = cv.rectangle(cImage, (left, bottom), (right, top), (255, 0, 0), 2)
cImage = cImage[
bottom : bottom + (top - bottom), left : left + (right - left)
]
cv.imwrite(f"{path}/{filename.split('/')[-1]}", cImage)
if __name__ == "__main__":
extract = ExtractRectangle()
test_files = glob("data/raw/test/*.png")
test_path = "data/processed/test/"
for path in tqdm(test_files):
extract.process_image(path, test_path)
train_files = glob("data/raw/train/*.png")
train_path = "data/processed/train/"
for path in tqdm(train_files):
extract.process_image(path, train_path)
Resultant detection for above images -
Example 1
Example 2
Example 3
Some other samples
The code below is able to detect objects without issue, however, towards the end there is the line "cv2.imshow("demo", img)"
I would expect this window to show the image with the generated bounding boxes and labels, but all I get is a blank window. I got this code originally from some examples on the internet so I'm a bit lost as to how to position that line, or why it's not generating the image.
import cv2
import numpy as np
def take_pic(output_filename):
import os
capture_img="ffmpeg -y -rtsp_transport udp -i rtsp://mycamera:apassword#172.16.66.106/live -vframes 1 " + output_filename
net = cv2.dnn.readNet("yolov3.weights", "./darknet/cfg/yolov3.cfg")
classes = []
with open("./darknet/data/coco.names", "r") as f:
classes = [line.strip() for line in f.readlines()]
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
colors = np.random.uniform(0, 255, size=(len(classes), 3))
output_filename = "/tmp/camera.jpeg"
cap = cv2.imread(output_filename)
j = 0
if j==0:
cv2.namedWindow("demo", cv2.WINDOW_AUTOSIZE)
while True:
take_pic(output_filename)
cap = cv2.imread(source)
j = j + 1
print("j= " + str(j))
img = cap
img = cv2.resize(img, None, fx=0.4, fy=0.4)
height, width, channels = img.shape
blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
class_ids = []
confidences = []
boxes = []
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
# Object detected
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
print(str(center_x)+" "+str(center_y))
w = int(detection[2] * width)
h = int(detection[3] * height)
# Rectangle coordinates
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
font = cv2.FONT_HERSHEY_PLAIN
for i in range(len(boxes)):
if i in indexes:
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
print("label :"+str(label)+"x: "+str(x)+" y: " + str(y))
color = colors[i]
cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
cv2.putText(img, label, (x, y + 30), font, 3, color, 3)
cv2.imshow("demo", img)
else:
print("camera open failed")
cv2.destroyAllWindows()
With opencv, a imshow is required to be accompanied with a waitKey method in order to display an image.
Paste something similar to this towards the end of your loop, after you call cv2.imshow:
if cv2.waitKey(0) == ord('q'):
print('exitting loop')
break
If the image shows blank during imshow method, then you might need to multiply pixels with 255. For instance, in Matlab, the images are normalized between 0 - 1.
Try:
cv2.imshow("demo", img * 255)
cv2.waitKey(0)