i wrote a program to capture the position of license plate with my webcam feed using YOLOv4. The result of the detection is then passed to easyOCR to do character identification. Right now, im calling the OCR function in the while loop everytime a detection occured. Is there a way to call the OCR function outside the loop without stopping the webcam feed ? some people suggested me to use queue or sub process but im not quite familiar with the concept. Any help would be very appreciated
while 1:
#_, pre_img = cap.read()
#pre_img= cv2.resize(pre_img, (640, 480))
_, img = cap.read()
#img = cv2.flip(pre_img,1)
hight, width, _ = img.shape
blob = cv2.dnn.blobFromImage(img, 1 / 255, (416, 416), (0, 0, 0), swapRB=True, crop=False)
output_layers_name = net.getUnconnectedOutLayersNames()
layerOutputs = net.forward(output_layers_name)
boxes = []
confidences = []
class_ids = []
for output in layerOutputs:
for detection in output:
score = detection[5:]
class_id = np.argmax(score)
confidence = score[class_id]
if confidence > 0.7:
center_x = int(detection[0] * width)
center_y = int(detection[1] * hight)
w = int(detection[2] * width)
h = int(detection[3] * hight)
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
indexes = cv2.dnn.NMSBoxes(boxes, confidences, .5, .4)
for output in layerOutputs:
for detection in output:
score = detection[5:]
class_id = np.argmax(score)
confidence = score[class_id]
if confidence > 0.5:
center_x = int(detection[0] * width)
center_y = int(detection[1] * hight)
w = int(detection[2] * width)
h = int(detection[3] * hight)
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
indexes = cv2.dnn.NMSBoxes(boxes, confidences, .8, .4)
colors = np.random.uniform(0, 255, size=(len(boxes), 3))
if len(indexes) > 0:
for i in indexes.flatten():
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
confidence = str(round(confidences[i], 2))
color = colors[i]
cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
detected_image = img[y:y+h, x:x+w]
cv2.putText(img, label + " " + confidence, (x, y + 400), font, 2, color, 2)
result = OCR(detected_image)
Function for OCR
def OCR(cropped_image):
result = reader.readtext(cropped_image)
text = ''
for result in result:
text += result[1] + ' '
spliced = (remove(text)).upper()
return spliced
You could run the OCR function on an other thread with the thread library like so:
import time # not necessary only to simulate work time
import _thread as thread # in python 3 the name has changed to _thread
def OCR(cropped_image):
result = reader.readtext(cropped_image)
text = ''
for result in result:
text += result[1] + ' '
spliced = (remove(text)).upper()
print(spliced) # you would have to print the result in the OCR function because you can't easily return stuff
while 1:
time.sleep(5) # simulating some work time
detected_image = 1
thread.start_new_thread(OCR, (detected_image,)) # calling the OCR function on a new thread.
I hope it will help you...
I have a problem with this coding, because on my device it doesn't run with an error code
layerOutputs = net. forward(output_layers_names)
cv2.error: Unknown C++ exception from OpenCV code
Here's my coding
import cv2
import numpy as np
net = cv2.dnn.readNet('yolov3.weights', 'yolov3.cfg')
classes = []
with open("coco.txt", "r") as f:
classes = f.read().splitlines()
cap = cv2.VideoCapture(0)
colors = np.random.uniform(0, 255, size=(100, 3))
while True:
_, img = cap.read()
height, width, _ = img.shape
blob = cv2.dnn.blobFromImage(img, 1/255, (416, 416), (0,0,0), swapRB=True, crop=False)
output_layers_names = net.getUnconnectedOutLayersNames()
layerOutputs = net.forward(output_layers_names)
boxes = []
confidences = []
class_ids = []
for output in layerOutputs:
for detection in output:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.3:
center_x = int(detection[0]*width)
center_y = int(detection[1]*height)
w = int(detection[2]*width)
h = int(detection[3]*height)
x = int(center_x - w/2)
y = int(center_y - h/2)
boxes.append([x, y, w, h])
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.2, 0.4)
if len(indexes)>0:
for i in indexes.flatten():
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
confidence = str(round(confidences[i],2))
color = colors[i]
cv2.rectangle(img, (x,y), (x+w, y+h), color, 2)
cv2.putText(img, label + " " + confidence, (x, y+20), font, 2, (255,255,255), 2)
cv2.imshow('Image', img)
key = cv2.waitKey(1)
if key==27:
This my problem
I have tried several versions of OpenCV but it still doesn't work and there is the same error. i hope you guys can help me, thanks
I couldn't find a solution to use YOLOv3 for single-class. I want to detect just for motorbikes. I edited the coco.names just for motorbikes, and edited the filters, classes in cfg file.
But whenever i run my code it errors as
line 48, in <module>
for i in indexes.flatten():
AttributeError: 'tuple' object has no attribute 'flatten'". Here is my code.
import cv2
import numpy as np
net = cv2.dnn.readNet('yolov3.weights', 'yolov3.cfg')
classes = []
with open('coco.names', 'r') as f:
classes = f.read().splitlines()
cap = cv2.VideoCapture('test.mp4')
#img = cv2.imread('image.jpg')
while True:
_, img = cap.read()
height, width, _ = img.shape
blob = cv2.dnn.blobFromImage(img, 1/255, (416, 416), (0,0,0), swapRB=True, crop=False)
output_layers_names = net.getUnconnectedOutLayersNames()
layerOutputs = net.forward(output_layers_names)
boxes = []
confidences = []
class_ids = []
for output in layerOutputs:
for detection in output:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
center_x = int(detection[0]*width)
center_y = int(detection[1]*height)
w = int(detection[2]*width)
h = int(detection[3]*height)
x=int(center_x - w/2)
y=int(center_y - h/2)
boxes.append([x, y, w, h])
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
colors = np.random.uniform(0, 255, size=(len(boxes), 3))
for i in indexes.flatten():
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
confidence = str(round(confidences[i], 2))
color = colors[i]
cv2.rectangle(img, (x,y), (x+w, y+h), color, 2)
cv2.putText(img, label + " " + confidence, (x, y+20), font, 2, (0, 0, 0), 2)
cv2.imshow('Image', img)
key = cv2.waitKey(1)
if key==27:
You must train your model again for the desired class, you can refer to this question for details.
The code below is able to detect objects without issue, however, towards the end there is the line "cv2.imshow("demo", img)"
I would expect this window to show the image with the generated bounding boxes and labels, but all I get is a blank window. I got this code originally from some examples on the internet so I'm a bit lost as to how to position that line, or why it's not generating the image.
import cv2
import numpy as np
def take_pic(output_filename):
import os
capture_img="ffmpeg -y -rtsp_transport udp -i rtsp://mycamera:apassword# -vframes 1 " + output_filename
net = cv2.dnn.readNet("yolov3.weights", "./darknet/cfg/yolov3.cfg")
classes = []
with open("./darknet/data/coco.names", "r") as f:
classes = [line.strip() for line in f.readlines()]
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
colors = np.random.uniform(0, 255, size=(len(classes), 3))
output_filename = "/tmp/camera.jpeg"
cap = cv2.imread(output_filename)
j = 0
if j==0:
cv2.namedWindow("demo", cv2.WINDOW_AUTOSIZE)
while True:
cap = cv2.imread(source)
j = j + 1
print("j= " + str(j))
img = cap
img = cv2.resize(img, None, fx=0.4, fy=0.4)
height, width, channels = img.shape
blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
outs = net.forward(output_layers)
class_ids = []
confidences = []
boxes = []
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
# Object detected
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
print(str(center_x)+" "+str(center_y))
w = int(detection[2] * width)
h = int(detection[3] * height)
# Rectangle coordinates
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
for i in range(len(boxes)):
if i in indexes:
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
print("label :"+str(label)+"x: "+str(x)+" y: " + str(y))
color = colors[i]
cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
cv2.putText(img, label, (x, y + 30), font, 3, color, 3)
cv2.imshow("demo", img)
print("camera open failed")
With opencv, a imshow is required to be accompanied with a waitKey method in order to display an image.
Paste something similar to this towards the end of your loop, after you call cv2.imshow:
if cv2.waitKey(0) == ord('q'):
print('exitting loop')
If the image shows blank during imshow method, then you might need to multiply pixels with 255. For instance, in Matlab, the images are normalized between 0 - 1.
cv2.imshow("demo", img * 255)
Hello I am trying to identify the odometer reading from the image attached using open CV and EAST model along with Pyteserract.
Following is my code :
import cv2
import numpy as np
import matplotlib.pyplot as plt
# assuming you have the result image store in median
median = cv2.imread("odo_4.jpg", 0)
image_gray = median
binary = cv2.bitwise_not(image_gray)
blur = cv2.GaussianBlur(image_gray,(5,5),0)
ret2,th2 = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
edged = cv2.Canny(th2, 50, 80, 255)
#threshold = cv2.adaptiveThreshold(edged,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
close = cv2.morphologyEx(edged, cv2.MORPH_CLOSE, kernel, iterations=1)
contours = cv2.findContours(close, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
rect_cnts = []
for cnt in contours:
peri = cv2.arcLength(cnt, True)
approx = cv2.approxPolyDP(cnt, 0.04 * peri, True)
(x, y, w, h) = cv2.boundingRect(cnt)
ar = w / float(h)
if (len(approx) == 4) & (ar >= 0.95 and ar <= 1.05) : # shape filtering condition
else :
max_area = 0
football_square = None
for cnt in rect_cnts:
(x, y, w, h) = cv2.boundingRect(cnt)
if max_area < w*h:
max_area = w*h
football_square = cnt
image = cv2.cvtColor(image_gray, cv2.COLOR_GRAY2RGB)
(x, y, w, h) = cv2.boundingRect(football_square)
new_image = image[y:y+h, x:x+w]
new = new_image
import cv2 as cv
orig = new.copy()
(origH, origW) = new.shape[:2]
rW = origW / 320.0
rH = origH / 320.0
# resize the original image to new dimensions
new = cv.resize(new, (320, 320))
(H, W) = new.shape[:2]
# construct a blob from the image to forward pass it to EAST model
blob = cv.dnn.blobFromImage(new, 1.0, (W, H),
(123.68, 116.78, 103.94), swapRB=True, crop=False)
net = cv.dnn.readNet('frozen_east_text_detection.pb')
layerNames = [
(scores, geometry) = net.forward(layerNames)
def predictions(prob_score, geo):
(numR, numC) = prob_score.shape[2:4]
boxes = []
confidence_val = []
# loop over rows
for y in range(0, numR):
scoresData = prob_score[0, 0, y]
x0 = geo[0, 0, y]
x1 = geo[0, 1, y]
x2 = geo[0, 2, y]
x3 = geo[0, 3, y]
anglesData = geo[0, 4, y]
# loop over the number of columns
for i in range(0, numC):
if scoresData[i] < 0.5:
(offX, offY) = (i * 4.0, y * 4.0)
# extracting the rotation angle for the prediction and computing the sine and cosine
angle = anglesData[i]
cos = np.cos(angle)
sin = np.sin(angle)
# using the geo volume to get the dimensions of the bounding box
h = x0[i] + x2[i]
w = x1[i] + x3[i]
# compute start and end for the text pred bbox
endX = int(offX + (cos * x1[i]) + (sin * x2[i]))
endY = int(offY - (sin * x1[i]) + (cos * x2[i]))
startX = int(endX - w)
startY = int(endY - h)
boxes.append((startX, startY, endX, endY))
# return bounding boxes and associated confidence_val
return (boxes, confidence_val)
(boxes, confidence_val) = predictions(scores, geometry)
boxes = non_max_suppression(np.array(boxes), probs=confidence_val)
# initialize the list of results
results = []
# loop over the bounding boxes to find the coordinate of bounding boxes
for (startX, startY, endX, endY) in boxes:
# scale the coordinates based on the respective ratios in order to reflect bounding box on the original image
startX = int(startX * rW)
startY = int(startY * rH)
endX = int(endX * rW)
endY = int(endY * rH)
#extract the region of interest
r = orig[startY:endY, startX:endX]
#configuration setting to convert image to string.
configuration = ("-l eng --oem 1 --psm 7")
##This will recognize the text from the image of bounding box
text = pytesseract.image_to_string(r, config=configuration)
# append bbox coordinate and associated text to the list of results
results.append(((startX, startY, endX, endY), text))
The results are bad - but my EAST model is identify the contour ( area) where the digits are present. Can you please help me ? I have tried different psm values in config for image_to_string.
Use InRange() for selection. See example:
import cv2 as cv
low_H = 80
low_S = 160
low_V = 200
high_H = 100
high_S = 255
high_V = 255
frame = cv.imread('OAPgE.jpg')
frame_HSV = cv.cvtColor(frame, cv.COLOR_BGR2HSV)
frame_threshold = cv.inRange(frame_HSV, (low_H, low_S, low_V), (high_H, high_S, high_V))
cv.imwrite('out_36.png', frame_threshold)
I am trying to fill the rectangle but even after changing the code(chaning thickness to -10) there is no effect. I feel that the global has something to do with this.
I have attached the code below.
import cv2
import os
import numpy as np
from .utils import download_file
initialize = True
net = None
dest_dir = os.path.expanduser('~') + os.path.sep + '.cvlib' + os.path.sep + 'object_detection' + os.path.sep + 'yolo' + os.path.sep + 'yolov3'
classes = None
COLORS = np.random.uniform(0, 255, size=(80, 3))
def draw_bbox(img, bbox, labels, confidence, colors=None, write_conf=False):
global COLORS
global classes
if classes is None:
classes = populate_class_labels()
for i, label in enumerate(labels):
if colors is None:
color = COLORS[classes.index(label)]
color = colors[classes.index(label)]
if write_conf:
label += ' ' + str(format(confidence[i] * 100, '.2f')) + '%'
cv2.rectangle(img, (bbox[i][0],bbox[i][1]), (bbox[i][2],bbox[i][3]), color,-1)
cv2.putText(img, label, (bbox[i][0],bbox[i][1]-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
return img
def detect_common_objects(image):
Height, Width = image.shape[:2]
scale = 0.00392
global classes
global dest_dir
config_file_name = 'yolov3.cfg'
config_file_abs_path = dest_dir + os.path.sep + config_file_name
weights_file_name = 'yolov3.weights'
weights_file_abs_path = dest_dir + os.path.sep + weights_file_name
url = 'https://github.com/arunponnusamy/object-detection-opencv/raw/master/yolov3.cfg'
if not os.path.exists(config_file_abs_path):
download_file(url=url, file_name=config_file_name, dest_dir=dest_dir)
url = 'https://pjreddie.com/media/files/yolov3.weights'
if not os.path.exists(weights_file_abs_path):
download_file(url=url, file_name=weights_file_name, dest_dir=dest_dir)
global initialize
global net
if initialize:
classes = populate_class_labels()
net = cv2.dnn.readNet(weights_file_abs_path, config_file_abs_path)
initialize = False
blob = cv2.dnn.blobFromImage(image, scale, (416,416), (0,0,0), True, crop=False)
outs = net.forward(get_output_layers(net))
class_ids = []
confidences = []
boxes = []
conf_threshold = 0.5
nms_threshold = 0.4
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5 and class_id=='person':
center_x = int(detection[0] * Width)
center_y = int(detection[1] * Height)
w = int(detection[2] * Width)
h = int(detection[3] * Height)
x = center_x - w / 2
y = center_y - h / 2
boxes.append([x, y, w, h])
indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)
bbox = []
label = []
conf = []
for i in indices:
i = i[0]
box = boxes[i]
x = box[0]
y = box[1]
w = box[2]
h = box[3]
if str(classes[class_ids[i]])=='person':
bbox.append([round(x), round(y), round(x+w), round(y+h)])
return bbox, label, conf
The entire code is the above. It is an object detection program using Yolo and opencv. I have also added few lines in the last line to enable only the person class but it seems to detect all classes. I have also tried to modify the thickness of the rectangles but changing the values had no effect.
You just have to change -10 with -1. after change your code will look like
def draw_bbox(img, bbox, labels, confidence, colors=None, write_conf=False):
global COLORS
global classes
if classes is None:
classes = populate_class_labels()
for i, label in enumerate(labels):
if colors is None:
color = COLORS[classes.index(label)]
color = colors[classes.index(label)]
if write_conf:
label += ' ' + str(format(confidence[i] * 100, '.2f')) + '%'
cv2.rectangle(img, (bbox[i][0],bbox[i][1]), (bbox[i][2],bbox[i][3]), color,-1)
cv2.putText(img, label, (bbox[i][0],bbox[i][1]-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
return img
I have a boundary of rectangles and for filling these rectangles. Put "-1"
cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 255, 255), -1)
I was indeed making a dumb mistake. I was changing the object/detection.py file in my Github Folder. However, when I saw this everything made sense.
File "/Users/dukeglacia/anaconda3/lib/python3.6/site-packages/cvlib/object_detection.py"
I was in fact changing the wrong file(exactly the same originally though).