opencv show rendered image with bounding boxes and labels

opencv show rendered image with bounding boxes and labels - python

The code below is able to detect objects without issue, however, towards the end there is the line "cv2.imshow("demo", img)"
I would expect this window to show the image with the generated bounding boxes and labels, but all I get is a blank window. I got this code originally from some examples on the internet so I'm a bit lost as to how to position that line, or why it's not generating the image.
import cv2
import numpy as np
def take_pic(output_filename):
import os
capture_img="ffmpeg -y -rtsp_transport udp -i rtsp://mycamera:apassword#172.16.66.106/live -vframes 1 " + output_filename
net = cv2.dnn.readNet("yolov3.weights", "./darknet/cfg/yolov3.cfg")
classes = []
with open("./darknet/data/coco.names", "r") as f:
classes = [line.strip() for line in f.readlines()]
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
colors = np.random.uniform(0, 255, size=(len(classes), 3))
output_filename = "/tmp/camera.jpeg"
cap = cv2.imread(output_filename)
j = 0
if j==0:
cv2.namedWindow("demo", cv2.WINDOW_AUTOSIZE)
while True:
take_pic(output_filename)
cap = cv2.imread(source)
j = j + 1
print("j= " + str(j))
img = cap
img = cv2.resize(img, None, fx=0.4, fy=0.4)
height, width, channels = img.shape
blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
class_ids = []
confidences = []
boxes = []
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
# Object detected
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
print(str(center_x)+" "+str(center_y))
w = int(detection[2] * width)
h = int(detection[3] * height)
# Rectangle coordinates
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
font = cv2.FONT_HERSHEY_PLAIN
for i in range(len(boxes)):
if i in indexes:
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
print("label :"+str(label)+"x: "+str(x)+" y: " + str(y))
color = colors[i]
cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
cv2.putText(img, label, (x, y + 30), font, 3, color, 3)
cv2.imshow("demo", img)
else:
print("camera open failed")
cv2.destroyAllWindows()

With opencv, a imshow is required to be accompanied with a waitKey method in order to display an image.
Paste something similar to this towards the end of your loop, after you call cv2.imshow:
if cv2.waitKey(0) == ord('q'):
print('exitting loop')
break

If the image shows blank during imshow method, then you might need to multiply pixels with 255. For instance, in Matlab, the images are normalized between 0 - 1.
Try:
cv2.imshow("demo", img * 255)
cv2.waitKey(0)

Related

layerOutputs = net.forward(output_layers_names) cv2.error: Unknown C++ exception from OpenCV code

I have a problem with this coding, because on my device it doesn't run with an error code
layerOutputs = net. forward(output_layers_names)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
cv2.error: Unknown C++ exception from OpenCV code
Here's my coding
import cv2
import numpy as np
net = cv2.dnn.readNet('yolov3.weights', 'yolov3.cfg')
classes = []
with open("coco.txt", "r") as f:
classes = f.read().splitlines()
cap = cv2.VideoCapture(0)
font = cv2.FONT_HERSHEY_PLAIN
colors = np.random.uniform(0, 255, size=(100, 3))
while True:
_, img = cap.read()
height, width, _ = img.shape
blob = cv2.dnn.blobFromImage(img, 1/255, (416, 416), (0,0,0), swapRB=True, crop=False)
net.setInput(blob)
output_layers_names = net.getUnconnectedOutLayersNames()
layerOutputs = net.forward(output_layers_names)
boxes = []
confidences = []
class_ids = []
for output in layerOutputs:
for detection in output:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.3:
center_x = int(detection[0]*width)
center_y = int(detection[1]*height)
w = int(detection[2]*width)
h = int(detection[3]*height)
x = int(center_x - w/2)
y = int(center_y - h/2)
boxes.append([x, y, w, h])
confidences.append((float(confidence)))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.2, 0.4)
if len(indexes)>0:
for i in indexes.flatten():
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
confidence = str(round(confidences[i],2))
color = colors[i]
cv2.rectangle(img, (x,y), (x+w, y+h), color, 2)
cv2.putText(img, label + " " + confidence, (x, y+20), font, 2, (255,255,255), 2)
cv2.imshow('Image', img)
key = cv2.waitKey(1)
if key==27:
break
cap.release()
cv2.destroyAllWindows()
This my problem
I have tried several versions of OpenCV but it still doesn't work and there is the same error. i hope you guys can help me, thanks

Running task / function in the background

i wrote a program to capture the position of license plate with my webcam feed using YOLOv4. The result of the detection is then passed to easyOCR to do character identification. Right now, im calling the OCR function in the while loop everytime a detection occured. Is there a way to call the OCR function outside the loop without stopping the webcam feed ? some people suggested me to use queue or sub process but im not quite familiar with the concept. Any help would be very appreciated
#detection
while 1:
#_, pre_img = cap.read()
#pre_img= cv2.resize(pre_img, (640, 480))
_, img = cap.read()
#img = cv2.flip(pre_img,1)
hight, width, _ = img.shape
blob = cv2.dnn.blobFromImage(img, 1 / 255, (416, 416), (0, 0, 0), swapRB=True, crop=False)
net.setInput(blob)
output_layers_name = net.getUnconnectedOutLayersNames()
layerOutputs = net.forward(output_layers_name)
boxes = []
confidences = []
class_ids = []
for output in layerOutputs:
for detection in output:
score = detection[5:]
class_id = np.argmax(score)
confidence = score[class_id]
if confidence > 0.7:
center_x = int(detection[0] * width)
center_y = int(detection[1] * hight)
w = int(detection[2] * width)
h = int(detection[3] * hight)
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append((float(confidence)))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, .5, .4)
boxes = []
confidences = []
class_ids = []
for output in layerOutputs:
for detection in output:
score = detection[5:]
class_id = np.argmax(score)
confidence = score[class_id]
if confidence > 0.5:
center_x = int(detection[0] * width)
center_y = int(detection[1] * hight)
w = int(detection[2] * width)
h = int(detection[3] * hight)
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append((float(confidence)))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, .8, .4)
font = cv2.FONT_HERSHEY_PLAIN
colors = np.random.uniform(0, 255, size=(len(boxes), 3))
if len(indexes) > 0:
for i in indexes.flatten():
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
confidence = str(round(confidences[i], 2))
color = colors[i]
cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
detected_image = img[y:y+h, x:x+w]
cv2.putText(img, label + " " + confidence, (x, y + 400), font, 2, color, 2)
#print(detected_image)
cv2.imshow('detection',detected_image)
result = OCR(detected_image)
print(result)
Function for OCR
def OCR(cropped_image):
result = reader.readtext(cropped_image)
text = ''
for result in result:
text += result[1] + ' '
spliced = (remove(text)).upper()
return spliced

You could run the OCR function on an other thread with the thread library like so:
import time # not necessary only to simulate work time
import _thread as thread # in python 3 the name has changed to _thread
def OCR(cropped_image):
result = reader.readtext(cropped_image)
text = ''
for result in result:
text += result[1] + ' '
spliced = (remove(text)).upper()
print(spliced) # you would have to print the result in the OCR function because you can't easily return stuff
while 1:
time.sleep(5) # simulating some work time
print("main")
detected_image = 1
thread.start_new_thread(OCR, (detected_image,)) # calling the OCR function on a new thread.
I hope it will help you...

How To Draw a Triangle-Arrow With The Positions of Detected Objects

I am making a object detection project.
I have my code. And I have written it by following a tutorial. In the tutorial, the guy drew a rectangle in opencv for every single object which is detected.
But I want to change the rectangle to triangle or Arrow.
let me explain with code===>
In my function, I detect objects.
And here I draw rectangle for detected objects==>
cv2.rectangle(img, (x, y), (x+w,y+h), (255, 0 , 255), 2)
But I want to change this rectangle to a triangle.(And I want to set position of triangle to above of object.
Just like in these images:::
This is the object detection with triangle
[![enter image description here][1]][1]
This is the thing that what I want to make instead of rectangle:::
[![enter image description here][2]][2]
How Can I make a triangle/arrow with positions of my detected objects?
All of my code is here==>
from os.path import sep
import cv2 as cv2
import numpy as np
import json
# Camera feed
cap_cam = cv2.VideoCapture(0)
ret, frame_cam = cap_cam.read()
hey = 0
print(cv2. __version__)
whT = 320
confThreshold =0.5
nmsThreshold= 0.2
classesFile = "coco.names"
classNames = []
with open(classesFile, 'rt') as f:
classNames = f.read().rstrip('\n').split('\n')
print(classNames)
## Model Files
modelConfiguration = "custom-yolov4-tiny-detector.cfg"
modelWeights = "custom-yolov4-tiny-detector_last.weights"
net = cv2.dnn.readNetFromDarknet(modelConfiguration, modelWeights)
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)
def findObjects(outputs,img):
global hey
global previousHey
hT, wT, cT = img.shape
bbox = []
classIds = []
confs = []
for output in outputs:
for det in output:
scores = det[5:]
classId = np.argmax(scores)
confidence = scores[classId]
if confidence > confThreshold:
w,h = int(det[2]*wT) , int(det[3]*hT)
x,y = int((det[0]*wT)-w/2) , int((det[1]*hT)-h/2)
bbox.append([x,y,w,h])
classIds.append(classId)
confs.append(float(confidence))
global indicates
indices = cv2.dnn.NMSBoxes(bbox, confs, confThreshold, nmsThreshold)
hey = 0
for i in indices:
i = i[0]
box = bbox[i]
x, y, w, h = box[0], box[1], box[2], box[3]
# print(x,y,w,h)
cv2.rectangle(img, (x, y), (x+w,y+h), (255, 0 , 255), 2)
#cv2.line(img, (350,400), (x, y), (255,0,0), 4)
#cv2.line(img, (400,400), (x + 50 , y), (255,0,0), 4)
#cv.putText(img,f'{classNames[classIds[i]].upper()} {int(confs[i]*100)}%',
#(x, y-10), cv.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 2)
print('success')
hey = 1
video_frame_counter = 0
while cap_cam.isOpened():
img = cv2.imread('photos' + sep + 'lutfen.jpg')
#BURADA OK VİDEOSU OYNATILACAK
#if not decetiona diye dene yarın.
blob = cv2.dnn.blobFromImage(img, 1 / 255, (whT, whT), [0, 0, 0], 1, crop=False)
net.setInput(blob)
layersNames = net.getLayerNames()
outputNames = [(layersNames[i[0] - 1]) for i in net.getUnconnectedOutLayers()]
outputs = net.forward(outputNames)
findObjects(outputs,img)
cv2.imshow('Image', img)
# Video feed
if hey == 1:
filename = 'photos' + sep + 'Baslksz-3.mp4'
cap_vid = cv2.VideoCapture(filename)
if hey == 0:
filename = 'photos' + sep + 'vid2.mp4'
cap_vid = cv2.VideoCapture(filename)
print(hey)
ret, frame_vid = cap_vid.read()
#cap_cam.set(cv2.CAP_PROP_FRAME_WIDTH, 1920)
#cap_cam.set(cv2.CAP_PROP_FRAME_HEIGHT, 1080)
# Resize the camera frame to the size of the video
height = int(cap_vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
width = int(cap_vid.get(cv2.CAP_PROP_FRAME_WIDTH))
# Capture the next frame from camera
ret, frame_cam = cap_cam.read()
video_frame_counter += 1
if video_frame_counter == cap_vid.get(cv2.CAP_PROP_FRAME_COUNT):
video_frame_counter = 0
cap_vid.set(cv2.CAP_PROP_POS_FRAMES, 0)
frame_cam = cv2.resize(frame_cam, (width, height), interpolation = cv2.INTER_AREA)
#ret = cap_vid.set(cv2.CAP_PROP_POS_MSEC, time_passed)
ret, frame_vid = cap_vid.read()
if not ret:
print('Cannot read from video stream')
break
# Blend the two images and show the result
tr = 0.4 # transparency between 0-1, show camera if 0
frame = ((1-tr) * frame_cam.astype(np.float) + tr * frame_vid.astype(np.float)).astype(np.uint8)
cv2.imshow('Transparent result', frame)
if cv2.waitKey(1) == 27: # ESC is pressed
break
cap_cam.release()
cap_vid.release()
cv2.destroyAllWindows()

The easy way
You can use the cv.arrowedLine() function that will draw something similar to what you want. For example, to draw a red arrow above your rectangle:
center_x = x + w//2
cv2.arrowedLine(img, (center_x, y-50), (center_x, y-5), (0,0,255), 2, 8, 0, 0.5)
which should give a result similar to the image below. Take a look at the OpenCV documentation for the description of the parameters of the function. You can change its size, thickness, color, etc.
Custom arrow shape
If you want more control over the shape of your arrow, you can define a contour (vertex by vertex) and use cv.drawContours() to render it. For example:
# define the arrow shape
shape = np.array([[[0,0],[-25,-25],[-10,-25],[-10,-50],
[10,-50],[10,-25],[25,-25]]])
# move it to the desired position
cx = x + w // 2
cy = y - 5
shape[:,:,0] += cx
shape[:,:,1] += cy
# draw it
cv2.drawContours(img, shape, -1, (0, 255, 0), -1)
This snippet will give you the image below. You can adjust the shape by altering the vertices in the shape array, or look at the documentation to change the way OpenCV draws it.

using YOLOv3 for single-class

I couldn't find a solution to use YOLOv3 for single-class. I want to detect just for motorbikes. I edited the coco.names just for motorbikes, and edited the filters, classes in cfg file.
But whenever i run my code it errors as
line 48, in <module>
for i in indexes.flatten():
AttributeError: 'tuple' object has no attribute 'flatten'". Here is my code.
import cv2
import numpy as np
net = cv2.dnn.readNet('yolov3.weights', 'yolov3.cfg')
classes = []
with open('coco.names', 'r') as f:
classes = f.read().splitlines()
cap = cv2.VideoCapture('test.mp4')
#img = cv2.imread('image.jpg')
while True:
_, img = cap.read()
height, width, _ = img.shape
blob = cv2.dnn.blobFromImage(img, 1/255, (416, 416), (0,0,0), swapRB=True, crop=False)
net.setInput(blob)
output_layers_names = net.getUnconnectedOutLayersNames()
layerOutputs = net.forward(output_layers_names)
boxes = []
confidences = []
class_ids = []
for output in layerOutputs:
for detection in output:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
center_x = int(detection[0]*width)
center_y = int(detection[1]*height)
w = int(detection[2]*width)
h = int(detection[3]*height)
x=int(center_x - w/2)
y=int(center_y - h/2)
boxes.append([x, y, w, h])
confidences.append((float(confidence)))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
font = cv2.FONT_HERSHEY_PLAIN
colors = np.random.uniform(0, 255, size=(len(boxes), 3))
for i in indexes.flatten():
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
confidence = str(round(confidences[i], 2))
color = colors[i]
cv2.rectangle(img, (x,y), (x+w, y+h), color, 2)
cv2.putText(img, label + " " + confidence, (x, y+20), font, 2, (0, 0, 0), 2)
cv2.imshow('Image', img)
key = cv2.waitKey(1)
if key==27:
break
cap.release()
cv2.destroyAllWindows()

You must train your model again for the desired class, you can refer to this question for details.

Bad character recognition with Pytesseract OCR for images with table structure

I use a code to locate text boxes and create a rectangle around them. This allows me to rebuild the grid around the table structure in the image.
However, even if the text box detection works very well, if I try to define the characters present in each rectangle, pytesseract does not identify them well and does not allow to find the original text.
Here is my Python code :
import os
import cv2
import imutils
import argparse
import numpy as np
import pytesseract
# This only works if there's only one table on a page
# Important parameters:
# - morph_size
# - min_text_height_limit
# - max_text_height_limit
# - cell_threshold
# - min_columns
def pre_process_image(img, save_in_file, morph_size=(8, 8)):
# get rid of the color
pre = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
def img_estim(img, threshold=127):
is_dark = np.mean(img) < threshold
return True if is_dark else False
# Negative
if img_estim(pre):
print("non")
pre = cv2.bitwise_not(pre)
# Contrast & Brightness control
contrast = 2.0 #0 to 3
brightness = 0 #0 to 100
for y in range(pre.shape[0]):
for x in range(pre.shape[1]):
pre[y,x] = np.clip(contrast*pre[y,x] + brightness, 0, 255)
# Otsu threshold
pre = cv2.threshold(pre, 250, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
# dilate the text to make it solid spot
cpy = pre.copy()
struct = cv2.getStructuringElement(cv2.MORPH_RECT, morph_size)
cpy = cv2.dilate(~cpy, struct, anchor=(-1, -1), iterations=1)
pre = ~cpy
if save_in_file is not None:
cv2.imwrite(save_in_file, pre)
return pre
def find_text_boxes(pre, min_text_height_limit=15, max_text_height_limit=40):
# Looking for the text spots contours
# OpenCV 3
# img, contours, hierarchy = cv2.findContours(pre, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
# OpenCV 4
contours, hierarchy = cv2.findContours(pre, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
# Getting the texts bounding boxes based on the text size assumptions
boxes = []
for contour in contours:
box = cv2.boundingRect(contour)
h = box[3]
if min_text_height_limit < h < max_text_height_limit:
boxes.append(box)
return boxes
def find_table_in_boxes(boxes, cell_threshold=10, min_columns=2):
rows = {}
cols = {}
# Clustering the bounding boxes by their positions
for box in boxes:
(x, y, w, h) = box
col_key = x // cell_threshold
row_key = y // cell_threshold
cols[row_key] = [box] if col_key not in cols else cols[col_key] + [box]
rows[row_key] = [box] if row_key not in rows else rows[row_key] + [box]
# Filtering out the clusters having less than 2 cols
table_cells = list(filter(lambda r: len(r) >= min_columns, rows.values()))
# Sorting the row cells by x coord
table_cells = [list(sorted(tb)) for tb in table_cells]
# Sorting rows by the y coord
table_cells = list(sorted(table_cells, key=lambda r: r[0][1]))
return table_cells
def build_lines(table_cells):
if table_cells is None or len(table_cells) <= 0:
return [], []
max_last_col_width_row = max(table_cells, key=lambda b: b[-1][2])
max_x = max_last_col_width_row[-1][0] + max_last_col_width_row[-1][2]
max_last_row_height_box = max(table_cells[-1], key=lambda b: b[3])
max_y = max_last_row_height_box[1] + max_last_row_height_box[3]
hor_lines = []
ver_lines = []
for box in table_cells:
x = box[0][0]
y = box[0][1]
hor_lines.append((x, y, max_x, y))
for box in table_cells[0]:
x = box[0]
y = box[1]
ver_lines.append((x, y, x, max_y))
(x, y, w, h) = table_cells[0][-1]
ver_lines.append((max_x, y, max_x, max_y))
(x, y, w, h) = table_cells[0][0]
hor_lines.append((x, max_y, max_x, max_y))
return hor_lines, ver_lines
if __name__ == "__main__":
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True,
help="path to input image to be OCR'd")
# ap.add_argument("-east", "--east", type=str,
# help="path to input EAST text detector")
args = vars(ap.parse_args())
in_file = os.path.join("images", args["image"])
pre_file = os.path.join("images", "pre.png")
out_file = os.path.join("images", "out.png")
img = cv2.imread(os.path.join(in_file))
top, bottom, left, right = [25]*4
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_REPLICATE)
orig = img.copy()
pre_processed = pre_process_image(img, pre_file)
text_boxes = find_text_boxes(pre_processed)
cells = find_table_in_boxes(text_boxes)
hor_lines, ver_lines = build_lines(cells)
# (H, W) = img.shape[:2]
# net = cv2.dnn.readNet(args["east"])
# blob = cv2.dnn.blobFromImage(img, 1.0, (W, H),(123.68, 116.78, 103.94), swapRB=True, crop=False)
# net.setInput(blob)
# Visualize the result
vis = img.copy()
results = []
for box in text_boxes:
(x, y, w, h) = box
startX = x -2
startY = y -2
endX = x + w
endY = y + h
cv2.rectangle(vis, (startX, startY), (endX, endY), (0, 255, 0), 1)
roi=orig[startX:endX,startY:endY]
config = ("-l eng --psm 6")
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files (x86)\Tesseract-OCR\tesseract.exe'
text = pytesseract.image_to_string(roi,config=config )
results.append(((startX, startY, (endX), (endY)), text))
results = sorted(results, key=lambda r:r[0][1])
output = orig.copy()
for ((startX, startY, endX, endY), text) in results:
print("{}\n".format(text))
text = "".join([c if ord(c) < 128 else "" for c in text]).strip()
cv2.rectangle(output, (startX, startY), (endX, endY),(0, 0, 255), 1)
cv2.putText(output, text, (startX, startY - 20),cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 3)
# for line in hor_lines:
# [x1, y1, x2, y2] = line
# cv2.line(vis, (x1, y1), (x2, y2), (0, 0, 255), 1)
# for line in ver_lines:
# [x1, y1, x2, y2] = line
# cv2.line(vis, (x1, y1), (x2, y2), (0, 0, 255), 1)
cv2.imwrite(out_file, vis)
cv2.imshow("Text Detection", output)
cv2.waitKey(0)
Initial image :
Initial image
Preprocessed image with detection of text outlines to define the dimensions of rectangles :
Preprocessed image with detection of text outlines to define the dimensions of rectangles
Final image :
Final image
Résultat obtenu par OCR :
"
a
ra
at
12
1
"
Thank you in advance for your help, hope my description is clear enough.

When performing OCR, it is extrememly important to preprocess the image to get the foreground text in black with the background in white. In addition, enlarging the image can help improve the detection results. I've also found that adding a slight Gaussian blur improves accuracy before throwing it into Pytesseract. Here's the results with --psm 6 to treat the image as a single block of text. Look here for more configuration options.
Preprocessed enlarged, thresholded, and slightly blurred image
Results from Pytesseract OCR
Series Type Scan Range CTDIvol DLP Phantom
(mm) (mGy) — (mGy-cm) cm
1 Scout - - - -
1 Scout - - - -
2 Axial = 113.554-1272.929 11.22 269.35 Body 32
Total Exam DLP: = 269.35
1/1
Code
import cv2
import pytesseract
import imutils
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
image = cv2.imread('1.jpg')
image = imutils.resize(image, width=700)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
thresh = cv2.GaussianBlur(thresh, (3,3), 0)
data = pytesseract.image_to_string(thresh, lang='eng', config='--psm 6')
print(data)
cv2.imshow('thresh', thresh)
cv2.imwrite('thresh.png', thresh)
cv2.waitKey()

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

opencv show rendered image with bounding boxes and labels - python

With opencv, a imshow is required to be accompanied with a waitKey method in order to display an image. Paste something similar to this towards the end of your loop, after you call cv2.imshow: if cv2.waitKey(0) == ord('q'): print('exitting loop') break

If the image shows blank during imshow method, then you might need to multiply pixels with 255. For instance, in Matlab, the images are normalized between 0 - 1. Try: cv2.imshow("demo", img * 255) cv2.waitKey(0)

Related

layerOutputs = net.forward(output_layers_names) cv2.error: Unknown C++ exception from OpenCV code

Running task / function in the background

How To Draw a Triangle-Arrow With The Positions of Detected Objects

using YOLOv3 for single-class

Bad character recognition with Pytesseract OCR for images with table structure

Categories

Resources