I wrote a code which runs the object detection(yolo) against bunch of images and writes output of the object detection along with the image into different folders based on the predicted classes. It basically classifies images based on the content into different folders. I created the folders based on the classID. Here is the code:
files=glob.glob(data_path)
for f1 in files:
image=cv2.imread(f1)
# construct a blob from the input image and then perform a forward
# pass of the YOLO object detector, giving us our bounding boxes and
# associated probabilities
blob = cv2.dnn.blobFromImage(image, 1 / 255.0, (416, 416),
swapRB=True, crop=False)
net.setInput(blob)
layerOutputs = net.forward(ln)
# loop over each of the layer outputs
for output in layerOutputs:
# loop over each of the detections
for detection in output:
# extract the class ID and confidence (i.e., probability) of
# the current object detection
scores = detection[5:]
classID = np.argmax(scores)
confidence = scores[classID]
box = detection[0:4]
(centerX, centerY, width, height) = box.astype("int")
# get upper left corner
x = int(centerX - (width / 2))
y = int(centerY - (height / 2))
# update our list of bounding box coordinates, confidences,
# and class IDs
boxes.append([x, y, int(width), int(height)])
confidences.append(float(confidence))
classIDs.append(classID)
# filter out weak predictions by ensuring the detected
# probability is greater than the minimum probability
if confidence > args["confidence"]:
# extract the bounding box coordinates
(x, y) = (boxes[i][0], boxes[i][1])
(w, h) = (boxes[i][2], boxes[i][3])
# draw a bounding box rectangle and label on the image
color = [int(c) for c in COLORS[classIDs[i]]]
cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
text = "{}: {:.4f}".format(LABELS[classIDs[i]], confidences[i])
cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX,
0.5, color, 2)
# write output files
class_dir = os.path.join('/path to folder/', LABELS[classID])
if not os.path.exists(class_dir):
os.makedirs(class_dir)
path = os.path.join(class_dir, f1.split('/')[-1][:-4])
cv2.imwrite(path + '.jpeg', image)
with open((path + '.txt'), 'a+') as f:
f.write(str(classID) + ' ' + str(box[0]) + ' ' + str(box[1]) + ' ' + str(box[2]) + ' ' + str(box[3]) + '\n')
I would like to know which images did not have any object of interest to be detected.
My question: sometimes object detection does not detect any objects in the image,so it just ignores those images and does not save them. How do I modify this code to write those images into a "unclassified" folder?
Related
im trying to capture position of license plate with webcam feed using YOLOv4 tiny then input the result to easyOCR to extract the characters. The detection works well in real time, however when i apply the OCR the webcam stream become really laggy. Is there anyway i can improve this code to make it make it less laggy?
my YOLOv4 detection
#detection
while 1:
#_, pre_img = cap.read()
#pre_img= cv2.resize(pre_img, (640, 480))
_, img = cap.read()
#img = cv2.flip(pre_img,1)
hight, width, _ = img.shape
blob = cv2.dnn.blobFromImage(img, 1 / 255, (416, 416), (0, 0, 0), swapRB=True, crop=False)
net.setInput(blob)
output_layers_name = net.getUnconnectedOutLayersNames()
layerOutputs = net.forward(output_layers_name)
boxes = []
confidences = []
class_ids = []
for output in layerOutputs:
for detection in output:
score = detection[5:]
class_id = np.argmax(score)
confidence = score[class_id]
if confidence > 0.7:
center_x = int(detection[0] * width)
center_y = int(detection[1] * hight)
w = int(detection[2] * width)
h = int(detection[3] * hight)
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append((float(confidence)))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, .5, .4)
boxes = []
confidences = []
class_ids = []
for output in layerOutputs:
for detection in output:
score = detection[5:]
class_id = np.argmax(score)
confidence = score[class_id]
if confidence > 0.5:
center_x = int(detection[0] * width)
center_y = int(detection[1] * hight)
w = int(detection[2] * width)
h = int(detection[3] * hight)
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append((float(confidence)))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, .8, .4)
font = cv2.FONT_HERSHEY_PLAIN
colors = np.random.uniform(0, 255, size=(len(boxes), 3))
if len(indexes) > 0:
for i in indexes.flatten():
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
confidence = str(round(confidences[i], 2))
color = colors[i]
cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
# detection= cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
detected_image = img[y:y+h, x:x+w]
cv2.putText(img, label + " " + confidence, (x, y + 400), font, 2, color, 2)
#print(detected_image)
cv2.imshow('detection',detected_image)
cv2.imwrite('lp5.jpg',detected_image)
cropped_image = cv2.imread('lp5.jpg')
cv2.waitKey(5000)
print("system is waiting")
result = OCR(cropped_image)
print(result)
easy OCR function
def OCR(cropped_image):
reader = easyocr.Reader(['en'], gpu=False) # what the reader expect from the image
result = reader.readtext(cropped_image)
text = ''
for result in result:
text += result[1] + ' '
spliced = (remove(text))
return spliced
There are several points.
cv2.waitKey(5000) in your loop causes some delay even though you keep pressing a key. So remove it if you are not debugging.
You are saving a detected region into a JPEG image file and loading it each time. So pass the region(Numpy array) on the OCR module directly.
EasyOCR is a DNN model based on ResNet, but you are not using a GPU(gpu=False). So use GPU.(See this benchmark by Liao.)
You are creating an easyocr.Reader instance each time in a loop. Creating it requires to load and initialize a DNN model. This is a huge workload causing the major bottleneck. So create only single instance before the loop and reuse it inside a loop.
You are essentially saying "the while loop must be fast."
And of course the OCR() call is a bit slow.
Ok, good.
Don't call OCR() from within the loop.
Rather, enqueue a request,
and let another thread / process / host
worry about the OCR computation,
while the loop quickly continues
upon its merry way.
You could use a threaded Queue,
or a subprocess,
or blast it over to RabbitMQ or Kafka.
The simplest approach would be to
simply overwrite /tmp/cropped_image.png
within the loop,
and have another process notice such
updates and (slowly) call OCR(),
appending the results to a log file.
There might be a couple of updates
to the image file while a single
OCR call is in progress, and that's fine.
The two are decoupled from one another,
each progressing at their own pace.
Downside of a queue would be OCR
sometimes falling behind -- you actually
want to shed load by skipping some
(redundant) cropped images.
The two are racing, and that's fine.
But take care to do things in atomic
fashion -- you wouldn't want to OCR
an image that starts with one frame
and ends with part of a subsequent
frame.
Write to a temp file and, after close(),
use os.rename() to atomically
make those pixels available under
the name that the OCR daemon
will read from.
Once it has a file descriptor
open for read, it will have no
problem reading to EOF without
interference, the kernel takes
care of that for us.
I am currently working on creating a python software that tracks players on a soccer field. I got the player detection working with YoloV3 and was able to output quite a nice result with players centroids and boxes drawn. What i want to do now is translate the players position and project their centroids onto a png/jpg of a soccerfield. For this I inteded to use two arrays with refrence points one for the soccerfield-image and one for the source video. But my question now is how do I translate the coordinates of the centroids to the soccerfield image.
Similiar example:
Example
How the boxes and Markers are drawn:
def draw_labels_and_boxes(img, boxes, confidences, classids, idxs, colors, labels):
# If there are any detections
if len(idxs) > 0:
for i in idxs.flatten():
# Get the bounding box coordinates
x, y = boxes[i][0], boxes[i][1]
w, h = boxes[i][2], boxes[i][3]
# Draw the bounding box rectangle and label on the image
cv.rectangle(img, (x, y), (x + w, y + h), (255, 255, 255), 2)
cv.drawMarker (img, (int(x + w / 2), int(y + h / 2)), (x, y), 0, 20, 3)
return img
Boxes are generated like this:
def generate_boxes_confidences_classids(outs, height, width, tconf):
boxes = []
confidences = []
classids = []
for out in outs:
for detection in out:
# print (detection)
# a = input('GO!')
# Get the scores, classid, and the confidence of the prediction
scores = detection[5:]
classid = np.argmax(scores)
confidence = scores[classid]
# Consider only the predictions that are above a certain confidence level
if confidence > tconf:
# TODO Check detection
box = detection[0:4] * np.array([width, height, width, height])
centerX, centerY, bwidth, bheight = box.astype('int')
# Using the center x, y coordinates to derive the top
# and the left corner of the bounding box
x = int(centerX - (bwidth / 2))
y = int(centerY - (bheight / 2))
# Append to list
boxes.append([x, y, int(bwidth), int(bheight)])
confidences.append(float(confidence))
classids.append(classid)
return boxes, confidences, classids
Assuming a stationary camera,
Find the coordinates of the four corners of the field.
Find the corresponding four corners in the top-view image that you want to create.
Find a homography matrix using these two sets of points. You can use OpenCV's findHomography for this.
Transform all the centroids using this homography matrix and that should give you your coordinates in the new image space. You can use warpPerspective for doing this.
Recently during COVID19 pandemic many developers have developed "social-distancing-monitoring-system". There a few of them also developed "Bird's Eye View" of the system. Your problem is just similar. As external links are not accepted here, so I am not able to post the exact link(s). Please check their codes in GitHub.
I want to crop my image based on the coordinate boxes of detected objects, the one with classID=1.
There might be multiple objects with the same id or other classes as well.
My problem is that my code only returns one cropped image, How could I return all cropped images with ClassID=1?
I have totall of 6 classes in which I am interested in ClassID=1.
# initializing bounding boxes, confidences, and classIDs.
boxes = []
confidences = []
classIDs = []
for output in layersOutputs:
# loop over each of the detections
for detection in output:
# extract the class ID and confidence
scores = detection[5:]
classID = np.argmax(scores)
confidence = scores[classID]
# filter out weak predictions
if confidence > c_threshold:
box = detection[0:4] * np.array([W, H, W, H])
(centerX, centerY, width, height) = box.astype("int")
#coordinates
x = int(centerX - (width / 2))
y = int(centerY - (height / 2))
# update bounding box coordinates, confidences, classIDs
boxes.append([x, y, int(width), int(height)])
confidences.append(float(confidence))
classIDs.append(classID)
# applying non maximum suppression
ind = cv.dnn.NMSBoxes(boxes, confidences, c_threshold, nms)
if len(ind) > 0:
# loop over the indexes that we want to keep
for i in ind.flatten():
# extract the bounding box coordinates
(x, y) = (boxes[1][0], boxes[1][1])
(w, h) = (boxes[1][2], boxes[1][3])
for i in classIDs:
if i != 1:
continue
# extract the bounding box coordinates
(x, y) = (boxes[i][0], boxes[i][1])
(w, h) = (boxes[i][2], boxes[i][3])
# crop that part of image which contains desired object
image = image[y:y + h, x:x + w]
cv.imshow("Image", image)
path = '/path to folder'
cv.imwrite(os.path.join(path, 'PImage.jpg'), image)
#
cv.waitKey(0)
Edited: As you can see there are many types of animals in this picture, I am trying to crop part of image that has dogs in it. I already got the coordinate bounding boxes related to dog parts(which means that I know where is the location of the rectangle that has dog in it as indicated in the photo)
I want to crop those rectangles that I indicated in the image. Dog has class id=1. I have class cat and other animals with different indexes.
Your loop is incorrect.
classID= [1]
for i in classID:
Here you basically say for i in [1]:, which crops only for your first detection. Instead, you should loop over all detections. Assuming the rest of your code is correct, the following loops over all detections logged in classID, and only crops if it belongs to class 1.
for i in classID:
if i!=1:
continue
I did type print(type(classID)) it tells that
0
<class 'numpy.int64'>
1
<class 'numpy.int64'>
1
<class 'numpy.int64'>
0
<class 'numpy.int64'>
As far as I know I can iterate on iterable objects, such as lists, single values such as int64 are not iterable.
How could I solve this problem?
The problem is you are losing the reference to the original image in the following line.
image = image[y:y + h, x:x + w]
instead you can create new variable for each dog image
dog_img = image[y:y + h, x:x + w]
Also while writing, you are writing by the same name, so it will overwrite the previous instance of the image, so try to make the name of the image to be dynamic like dog1.jpg, dog2.jpg ...
c = 0
for i in classIDs:
if i != 1:
continue
# extract the bounding box coordinates
(x, y) = (boxes[i][0], boxes[i][1])
(w, h) = (boxes[i][2], boxes[i][3])
# crop that part of image which contains desired object
dog_img = image[y:y + h, x:x + w]
cv.imshow("Image", dog_img )
path = '/path to folder'
c +=1
cv.imwrite(os.path.join(path, 'PImage'+str(c)+'.jpg'), dog_img )
#
cv.waitKey(0)
the full error says:
cv2.error: OpenCV(4.1.0) C:\projects\opencv-python\opencv\modules\dnn\src\caffe\caffe_io.cpp:1132: error: (-2:Unspecified error) FAILED: fs.is_open(). Can't open "frozen_east_text_detection.pb" in function 'c
here i have the code:
import time
from imutils.object_detection import non_max_suppression
import numpy as np
import cv2
import argparse
imagePath = "C:/xampp/htdocs/Tensorflow/TextDetection-master/images/tabla1.jpg"
east = "frozen_east_text_detection.pb"
newW = 640
newH = 480
min_confidence = 0.5
image = cv2.imread(imagePath) # it is working variable
orig = image.copy()
(H,W) = image.shape[:2]
set the new width an height and then determine the ratio in change
rW = W / float(newW)
rH = H / float(newH)
resize the image and grab the new image dimensions
image = cv2.resize(image, (newW, newH))
(H,W) = image.shape[:2]
"""
In order to perform text detection using OpenCV and the EAST deep learning model,
we need to extract the output feature maps of TWO LAYERS
"""
# Define the TWO output layer names ofr the EAST detector model
# FIRST LAYER : output probabilities of a region containing text or not.
# SECOND LAYER: bounding box coordinates of text.
layerNames = [
"feature_fusion/Conv_7/Sigmoid",
"feature_fusion/concat_3"
]
print("[INFO] loading EAST detector")
net = cv2.dnn.readNet( east ) #Load the Nerual Network into memory
# construct a blob from the image and then perform a forward pass of
# the model to obtain the two output layer sets
blob = cv2.dnn.blobFromImage( #https://www.pyimagesearch.com/2017/11/06/deep-learning-opencvs-blobfromimage-works/
image , 1.0 , (W,H) , (123.68, 116.78, 103.94) , swapRB=True, crop=False
)
start = time.time()
net.setInput(blob)
(scores, geometry) = net.forward(layerNames)
end = time.time()
print("[INFO] text detection took {:.6f} seconds".format(end-start))
# grab the umber of rows and columns from the scores volume, then
# initialize aour set of bounding box rectagles and corresponding
# cofidence scores
(numRows, numCols) = scores.shape[2:4]
rects = []
confidences = []
# loop over the number of rows
for y in range(0,numRows):
#extract the scores (probabilities), followed by the geometrical
#data used to derive potential bounding box coordinates that
#surround text
scoresData = scores[0, 0, y]
xData0 = geometry[0, 0, y]
xData1 = geometry[0, 1, y]
xData2 = geometry[0, 2, y]
xData3 = geometry[0, 3, y]
anglesData = geometry[0, 4, y]
#loop over the number of columns
for x in range(0, numCols):
#i our score does not have sufficient probability, ignore it
if (scoresData[x] < min_confidence):
continue
# compute the offset factor as our resulting feature maps will
# be 4x smaller than the input image
(offsetX, offsetY) = (x * 4.0 , y * 4.0)
# extract the rotation angle for the prediction and then
# compute the sin and cosine
angle = anglesData[x]
cos = np.cos(angle)
sin = np.sin(angle)
# use the geometry volume to derive the width and height of
# the bounding box
h = xData0[x] + xData2[x]
w = xData1[x] + xData3[x]
# compute both the starting and ending (x,y)-coordinates for
# the text prediction bounding box
endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]) )
endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]) )
startX = int(endX - w)
startY = int(endY - h)
#add the bounding box coordinates and probability score to
#our respective lists
rects.append( (startX, startY, endX, endY) )
confidences.append( scoresData[x] )
#apply non-maxima suppression to suppress weak, overlapping bounding boxes
boxes = non_max_suppression(np.array(rects) , probs=confidences) #imutils-> https://github.com/jrosebr1/imutils/blob/master/imutils/object_detection.py#L4
#loop over the bounding boxes
for (startX, startY, endX, endY) in boxes:
# scale the bounding box coordinates based on the respective ratios
startX = int(startX * rW)
startY = int(startY * rH)
endX = int(endX * rW)
endY = int(endY *rH)
#draw the bounding box on the image
cv2.rectangle( orig , (startX,startY) , (endX,endY) , (0,255,0) , 2 )
#show the outut image
cv2.imshow("Text detection",orig)
cv2.waitKey(0)
cv2.destroyAllWindows()
you can fix this by providing the absolute filepath of frozen_east_text_detection.pb from the main function of your code
Use this link to download model
https://www.dropbox.com/s/r2ingd0l3zt8hxs/frozen_east_text_detection.tar.gz?dl=1
you can further refer to OpenCV official blog
https://learnopencv.com/deep-learning-based-text-detection-using-opencv-c-python/
i am using yolo - python to detect object from multiple images.
i need to loop through result (describe detected object) to write that result in multiple text files (same name with name of image).
Example: 1.jpg detected 1 car & 1 dog => 1.txt contain car & dog.
i tried create multiple text file with glob(input image folder)
i tried loop through text (describe detected object) but i can't write result of each image to each text file. Something wrong with loop.
Please check the code in bottom, part of cv2.putText.
import numpy as np
import argparse
import time
import cv2
import os, os.path
import glob
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--input", required=True,
help="path to input image")
ap.add_argument("-y", "--yolo", required=True,
help="base path to YOLO directory")
ap.add_argument("-c", "--confidence", type=float, default=0.5,
help="minimum probability to filter weak detections")
ap.add_argument("-t", "--threshold", type=float, default=0.3,
help="threshold when applyong non-maxima suppression")
args = vars(ap.parse_args())
num = 1
text_path_list = []
def create_result_text(textDir = "input"):
for i in glob.glob("input/*.jpg"):
f = open(i.rsplit( ".", 1 )[ 0 ] + ".txt", "w+")
f.close()
temp = os.listdir(textDir)
for i in temp:
if (i.endswith("txt")):
text_path_list.append(i)
create_result_text()
print(text_path_list)
# load the COCO class labels our YOLO model was trained on
labelsPath = os.path.sep.join([args["yolo"], "yolov3.txt"])
LABELS = open(labelsPath).read().strip().split("\n")
# initialize a list of colors to represent each possible class label
np.random.seed(42)
COLORS = np.random.randint(0, 255, size=(len(LABELS), 3),
dtype="uint8")
# derive the paths to the YOLO weights and model configuration
weightsPath = os.path.sep.join([args["yolo"], "yolov3.weights"])
configPath = os.path.sep.join([args["yolo"], "yolov3.cfg"])
# load our YOLO object detector trained on COCO dataset (80 classes)
print("[INFO] loading YOLO from disk...")
net = cv2.dnn.readNetFromDarknet(configPath, weightsPath)
# read input image
imageDir = "input" #specify your path here
image_path_list = []
valid_image_extensions = [".jpg", ".jpeg", ".png", ".tif", ".tiff"] #specify your vald extensions here
valid_image_extensions = [item.lower() for item in valid_image_extensions]
for file in os.listdir(imageDir):
extension = os.path.splitext(file)[1]
if extension.lower() not in valid_image_extensions:
continue
image_path_list.append(os.path.join(imageDir, file))
# load input image and grab its spatial dimensions
for imagePath in image_path_list:
image = cv2.imread(imagePath)
(H, W) = image.shape[:2]
# determine only the *output* layer names that we need from YOLO
ln = net.getLayerNames()
ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]
# construct a blob from the input image and then perform a forward
# pass of the YOLO object detector, giving us our bounding boxes and
# associated probabilities
blob = cv2.dnn.blobFromImage(image, 1 / 255.0, (416, 416),
swapRB=True, crop=False)
net.setInput(blob)
start = time.time()
layerOutputs = net.forward(ln)
end = time.time()
# show timing information on YOLO
print("[INFO] YOLO took {:.6f} seconds".format(end - start))
# initialize our lists of detected bounding boxes, confidences, and
# class IDs, respectively
boxes = []
confidences = []
classIDs = []
# loop over each of the layer outputs
for output in layerOutputs:
# loop over each of the detections
for detection in output:
# extract the class ID and confidence (i.e., probability) of
# the current object detection
scores = detection[5:]
classID = np.argmax(scores)
confidence = scores[classID]
# filter out weak predictions by ensuring the detected
# probability is greater than the minimum probability
if confidence > args["confidence"]:
# scale the bounding box coordinates back relative to the
# size of the image, keeping in mind that YOLO actually
# returns the center (x, y)-coordinates of the bounding
# box followed by the boxes' width and height
box = detection[0:4] * np.array([W, H, W, H])
(centerX, centerY, width, height) = box.astype("int")
# use the center (x, y)-coordinates to derive the top and
# and left corner of the bounding box
x = int(centerX - (width / 2))
y = int(centerY - (height / 2))
# update our list of bounding box coordinates, confidences,
# and class IDs
boxes.append([x, y, int(width), int(height)])
confidences.append(float(confidence))
classIDs.append(classID)
# apply non-maxima suppression to suppress weak, overlapping bounding
# boxes
idxs = cv2.dnn.NMSBoxes(boxes, confidences, args["confidence"],
args["threshold"])
result_texts = []
# ensure at least one detection exists
if len(idxs) > 0:
# loop over the indexes we are keeping
for i in idxs.flatten():
# extract the bounding box coordinates
(x, y) = (boxes[i][0], boxes[i][1])
(w, h) = (boxes[i][2], boxes[i][3])
# draw a bounding box rectangle and label on the image
color = [int(c) for c in COLORS[classIDs[i]]]
cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
text = "{}: {:.4f}".format(LABELS[classIDs[i]], confidences[i])
cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX,
0.5, color, 2)
result_texts.append(text)
print(result_texts)
for text in result_texts:
np.savetxt("input/" + str(num) + ".txt", np.array(result_texts), fmt="%s")
num += 1
i expect each of text to has name of object of each image
but all text have name of object of last image.