I'm trying to detect the region here (circled in red) more effectively. As it currently stands, I have a few steps to get the area:
Brighten the input image
to increase contrast and likelihood to pick up edges of the image file to get this image
Crop and threshold the region of interest, and add Gaussian blur to get this image:
Use OpenCV to detect Hough Circles on the thresholded image
Select the top 10 largest circles found, then choose the one closest to the grid intersection (in the code as fv_cx and fv_cy) vertically and closest to the edge of the image horizontally.
While this works well in general
often times it misses the right circle
or it encircles an area too small
.
Using these input images, is there a better way to work on this problem?
This is my code so far:
from __future__ import print_function
import pandas as pd
from pandas.api.types import is_numeric_dtype
import os
from PIL import Image, ImageDraw, ImageFont
import math
import cv2
import matplotlib.pyplot as plt
import time
import re
import csv
from skimage import data, color, io, img_as_ubyte
from skimage.transform import hough_circle, hough_circle_peaks, hough_ellipse
from skimage.feature import canny
from skimage.draw import circle_perimeter, ellipse_perimeter
from skimage.util import img_as_ubyte
from builtins import input
import numpy as np
def append_list_as_row(file_name, list_of_elem):
with open(file_name, 'a+', newline='', encoding='utf-8') as write_obj:
csv_writer = csv.writer(write_obj, dialect='excel')
csv_writer.writerow(list_of_elem)
def round_up_to_odd(f):
return int(np.ceil(f) // 2 * 2 + 1)
# Folder path here
folder = r""
csv_file = folder + os.sep + "Measurements.csv"
csv_file2 = folder + os.sep + "Measurements2.csv"
df2 = pd.DataFrame(columns = ["filepath","od_cx","od_cy", "fv_x", "fv_y"])
for subdir, dirs, files in os.walk(folder):
for file in files:
#print os.path.join(subdir, file)
filepath = subdir + os.sep + file
if filepath.endswith(".jpeg") or filepath.endswith(".tiff") and not filepath.endswith("_OD.tiff") and not filepath.endswith("_bright.tiff") and not filepath.endswith("_FV.tiff") and not filepath.endswith("_mask.tiff"):
og_cv = cv2.imread(filepath, cv2.IMREAD_COLOR)
if "left" in str(filepath):
od = "left"
elif "right" in str(filepath):
od = "right"
OD_path = subdir + os.sep + "OD"
if not os.path.exists(str(OD_path)):
os.mkdir(str(OD_path))
OD = OD_path + os.sep + str(os.path.splitext(file)[0]) + "_OD.tiff"
fovea_path = OD_path + os.sep + str(os.path.splitext(file)[0]) + "_FV.tiff"
temp_path = subdir + os.sep + "Temp"
if not os.path.exists(str(temp_path)):
os.mkdir(str(temp_path))
bright = temp_path + os.sep + str(os.path.splitext(file)[0]) + "_bright.tiff"
thresholded_od = temp_path + os.sep + str(os.path.splitext(file)[0]) + "_thresholded_OD.tiff"
thresholded_fv = temp_path + os.sep + str(os.path.splitext(file)[0]) + "_thresholded_FV.tiff"
mask_file = temp_path + os.sep + str(os.path.splitext(file)[0]) + "_mask.tiff"
## Fovea
image = cv2.imread(filepath)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
h = image.shape[0]
w = image.shape[1]
# loop over the image
fv_cx = []
fv_cy = []
for y in range(0, h):
for x in range(0, w):
# threshold the pixel
if np.all(image[y, x] == (255, 0, 255)) and np.all(image[y, x+3] == (0, 255, 255)) and np.all(image[y, x-3] == (0, 255, 255)):
print("Found fovea")
fv_cx.append(x)
fv_cy.append(y)
# Draw them
# fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(10, 4))
# image = color.gray2rgb(image)
image_draw = image
if image.shape[2] == 3:
image[fv_cy, fv_cx] = (220, 20, 20)
if image.shape[2] == 4:
image[fv_cy, fv_cx] = (220, 20, 20, 0)
plt.imsave(OD, image)
print(fv_cx, fv_cy)
# else:
# fv_cx = "No fv_cx"
# fv_cy = "No fv_cy"
## Find image dimensions
source_img = Image.open(filepath)
width, height = source_img.size
x_max = int(width)
y_max = int(height)
print(x_max)
print(y_max)
#Load image
im = cv2.imread(filepath, cv2.IMREAD_COLOR)
background = Image.open(filepath).convert('RGB')
width, height = background.size
x_max = int(width)
y_max = int(height)
# Brightness adjustment - https://docs.opencv.org/3.4/d3/dc1/tutorial_basic_linear_transform.html
new_image = np.zeros(im.shape, im.dtype)
alpha = 1.0 # contrast control
beta = 0 # brightness control
new_image = cv2.convertScaleAbs(im, alpha=alpha, beta=100)
# cv2.imshow('New Image', new_image)
# cv2.waitKey(0)
cv2.imwrite(bright, new_image)
new_image = cv2.imread(bright, cv2.IMREAD_COLOR)
## OD
#Convert to HLS, so we can remove the saturated fovea
HLS = cv2.cvtColor(new_image,cv2.COLOR_BGR2HLS)
Schannel = HLS[:,:,2]
mask = cv2.inRange(Schannel, 0, 0)
# res = cv2.bitwise_and(new_image,new_image, mask= mask)
new_image = cv2.cvtColor(new_image,cv2.COLOR_BGR2GRAY)
thresh_x = round_up_to_odd((21/1033) * width)
thresh_x = 21
#### Thresholding Example Options
# img = cv2.bitwise_and(new_image,new_image, mask= mask)
img = cv2.medianBlur(new_image,5)
pil_im = Image.fromarray(mask)
# mask_width, mask_height = pil_im.size
mask_width, mask_height = (165 * (width/290)), (165 * (width/290))
print(width, height)
print(mask_width, mask_height)
margin = 10
if "_L" in filepath or "OS" in filepath:
x_center = width/2
crop_x_start = 0
crop_x_stop = int(x_center-(mask_width/2)) + margin
crop_img = img[0:height, crop_x_start:crop_x_stop]
# cv2.imshow("cropped", crop_img)
cv2.waitKey()
if "_R" in filepath or "OD" in filepath:
x_center = width/2
crop_x_start = int((x_center+(mask_width/2))) - margin
crop_x_stop = width
crop_img = img[0:height, crop_x_start:crop_x_stop]
# cv2.imshow("cropped", crop_img)
cv2.waitKey()
th2 = cv2.adaptiveThreshold(crop_img,255,cv2.ADAPTIVE_THRESH_MEAN_C,\
cv2.THRESH_BINARY,thresh_x,2)
th2 = cv2.GaussianBlur(th2,(21,21),0)
# cv2.imshow("cropped", th2)
cv2.waitKey()
cv2.imwrite(thresholded_od, th2)
## Hough Circle
# Load picture and detect edges
image = img_as_ubyte(th2)
edges = canny(image, sigma=3, low_threshold=10, high_threshold=50)
# Detect two radii
x=50
y=500
z=2
start = math.ceil((x/1033) * width)
stop = math.ceil((y/1033) * width)
step = math.ceil((z/1033) * width)
hough_radii = np.arange(start, stop, step)
hough_res = hough_circle(edges, hough_radii)
if fv_cy != []:
# Select the most prominent 3 circles
accums, cx, cy, radii = hough_circle_peaks(hough_res, hough_radii,
total_num_peaks=10)
df = pd.DataFrame(columns = ["index", "distance", "area", "cX", "cY"])
idx = (0)
# Draw them
fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(10, 4))
# image = color.gray2rgb(image)
image = io.imread(filepath)
cx = (cx + crop_x_start)
for center_y, center_x, radius in zip(cy, cx, radii):
# d = math.sqrt(((center_x-fv_cx)**2) + ((center_y-fv_cy)**2))
p = abs(center_y - fv_cy)
q = -1 * abs(center_x - fv_cx)
d = p + q
print(d)
area = math.pi * (radius**2)
df.loc[idx, 'index'] = idx
df.loc[idx, 'area'] = int(area)
df.loc[idx, 'distance'] = int(d)
df.loc[idx, 'cX'] = int(center_x)
df.loc[idx, 'cY'] = int(center_y)
df.loc[idx, 'radius'] = int(radius)
idx += 1
df['distance'] = pd.to_numeric(df['distance'])
df['radius'] = pd.to_numeric(df['radius'])
print("DF?")
print(df)
if len(df["distance"]) > 0:
print("pass")
df_radius = df.nsmallest(3, 'distance')
print(df_radius)
if (df_radius['radius'].max()-df_radius['radius'].min()) < 3:
idx = df_radius['radius'].idxmax()
else:
idx = df['distance'].idxmin()
center_y = int(df.loc[idx, 'cY'])
center_x = int(df.loc[idx, 'cX'])
radius = int(df.loc[idx, 'radius'])
print(center_y, center_x, radius)
# Draw them
# fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(10, 4))
# image = color.gray2rgb(image)
image = io.imread(filepath)
image = image_draw
circy, circx = circle_perimeter(center_y, center_x, radius,
shape=image.shape)
print(image.shape)
print(image.shape[2])
if image.shape[2] == 3:
image_draw[circy, circx] = (220, 20, 20)
circy, circx = circle_perimeter(center_y, center_x, 0,
shape=image.shape)
image_draw[circy, circx] = (220, 20, 20)
if image.shape[2] == 4:
image_draw[circy, circx] = (220, 20, 20, 0)
circy, circx = circle_perimeter(center_y, center_x, 0,
shape=image.shape)
image_draw[circy, circx] = (220, 20, 20, 0)
# final = ax.imshow(image, cmap=plt.cm.gray)
# fig = plt.show()
## Need to fix saving
plt.imsave(OD, image_draw)
else:
hough_radii = np.arange(start, stop, step)
hough_res = hough_circle(edges, hough_radii)
# Select the most prominent 3 circles
accums, cx, cy, radii = hough_circle_peaks(hough_res, hough_radii,
total_num_peaks=1)
if cx != None:
print("Found OD")
od_cx = (re.search(r"\[([A-Za-z0-9_]+)\]",str(cx))).group(1)
od_cy = (re.search(r"\[([A-Za-z0-9_]+)\]",str(cy))).group(1)
else:
od_cx = "Not found"
od_cy = "Not found"
# Draw them
#fig, ax = plt.subplots(ncols=1, nrows=1, #figsize=(10, 4))
# image = color.gray2rgb(image)
image = io.imread(filepath)
image = image_draw
for center_y, center_x, radius in zip(cy, cx, radii):
circy, circx = circle_perimeter(center_y, center_x, radius,
shape=image.shape)
print(image.shape)
print(image.shape[2])
if image.shape[2] == 3:
image_draw[circy, circx] = (220, 20, 20)
circy, circx = circle_perimeter(center_y, center_x, 0,
shape=image.shape)
image_draw[circy, circx] = (220, 20, 20)
if image.shape[2] == 4:
image_draw[circy, circx] = (220, 20, 20, 0)
circy, circx = circle_perimeter(center_y, center_x, 0,
shape=image.shape)
image_draw[circy, circx] = (220, 20, 20, 0)
# final = ax.imshow(image, cmap=plt.cm.gray)
# fig = plt.show()
## Need to fix saving
plt.imsave(OD, image_draw)
append_list_as_row(csv_file,[filepath,center_x,center_y, fv_cx, fv_cy])
plt.close('all')
df2 = df2.append({"filepath":filepath,"od_cx":center_x, "od_cy":center_y, "fv_x":fv_cx, "fv_y":fv_cy}, ignore_index=True)
print(df2)
df2.to_csv(csv_file2)
Related
I have multiple different folders with the images have same naming like a.png etc. I want to modify the above code to read this same named files in different directories and give their opencv output using yolo at the same time. To be more specific I have 10 files which contains images transported with different categories like one folder contains rgb files and the other contains gray files etc. To compare their output, I want to show the images with same naming but in different folders. I know it should not be that hard but I am pretty confused. Thanks in advance!
import cv2
import numpy as np
import os
import matplotlib.pyplot as plt
import tkinter
from tkinter import filedialog
def cal_alpB(minMax):
minD = minMax[0]
maxD = minMax[1]
alpha = 255/(maxD-minD)
beta = -alpha*minD
return [alpha, beta]
def getMinMax(path):
with open(path+'/config') as f:
minMax = f.read().splitlines()
minMax = minMax[0].split(',')
minMax = [eval(x) for x in minMax]
return minMax
def normalizeData(minMax, img):
alpB = cal_alpB(minMax)
img[img>minMax[1]] = minMax[1]
img[img<0] = 0
return alpB
def boxDrawing(layerOutput, frameWidth, frameHeight, class_ids, confidences, boxes, img):
for output in layerOutput:
for detection in output:
score = detection[5:]
class_id = np.argmax(score)
confidence = score[class_id]
if confidence > 0.5:
center_x = int(detection[0] * frameWidth)
center_y = int(detection[1] * frameHeight)
width = int(detection[2] * frameWidth)
height = int(detection[3] * frameHeight)
left = int(center_x - width / 2)
top = int(center_y - height / 2)
class_ids.append(class_id)
confidences.append(float(confidence))
boxes.append([left, top, width, height])
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.8, 0.7)
font = cv2.FONT_HERSHEY_PLAIN
colors = np.random.uniform(0, 255, size = (len(boxes),3))
for i in range(len(boxes)):
if i in indexes:
x,y,w,h = boxes[i]
label = str(classes[class_ids[i]])
confi = str(round(confidences[i],2))
color = colors[i]
cv2.rectangle(img, (x,y), (x+w,y+h), color,1)
cv2.putText(img, label+" "+ confi, (x,y+20), font, 1, (255,255,255),1)
def algorythmYolo():
tkinter.Tk().withdraw()
folder = filedialog.askdirectory()
minMax = getMinMax(folder)
for filename in sorted(os.listdir(folder)):
img = cv2.imread(os.path.join(folder,filename),-1)
if img is not None:
alpB = normalizeData(minMax,img)
img = cv2.convertScaleAbs(img, alpha=alpB[0], beta= alpB[1])
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
frameHeight, frameWidth, channels = img.shape
blob = cv2.dnn.blobFromImage(img, 1/255, (frameWidth,frameHeight), (0,0,0), swapRB = True, crop = False)
yolo.setInput(blob)
layerOutput = yolo.forward(outputLayers)
boxes = []
confidences = []
class_ids = []
boxDrawing(layerOutput,frameWidth, frameHeight,class_ids,confidences,boxes,img)
cv2.imshow("window", img)
cv2.setWindowTitle('window', folder)
cv2.waitKey(1)
else:
break
cv2.destroyAllWindows()
yolo = cv2.dnn.readNet("./yolov3.weights","./yolov3.cfg")
with open("./coco.names","r") as f:
classes = f.read().splitlines()
layers_names = yolo.getLayerNames()
outputLayers = [layers_names[i-1] for i in yolo.getUnconnectedOutLayers()]
cv2.namedWindow("window", cv2.WINDOW_NORMAL)
algorythmYolo()
Hi I'm making a project about detecting bullet holes in target circles. My original idea was to use Hough circle algorithms to detect both targets which works quite alright for photos that are straight in front of it and bullet holes that are not as good. Sooo I was wandering if anyone could tip me with some better solution on finding them or helping me improve this code.
import cv2 as cv
import numpy as np
import math
import sys
from PIL import Image
import matplotlib.pyplot as plt
MAX_POINTS = 10
def main(argv):
default_file = 'tarczamala.jpg'
default_size = 600, 600
im = Image.open(default_file)
im = im.resize(default_size, Image.ANTIALIAS)
im.save('600' + default_file)
filename = argv[0] if len(argv) > 0 else '600' + default_file
# Loads an image
src = cv.imread(cv.samples.findFile(filename), cv.IMREAD_COLOR)
# Check if image is loaded fine
if src is None:
print ('Error opening image!')
print ('Usage: hough_circle.py [image_name -- default ' + default_file + '] \n')
return -1
# skala szarości
gray = cv.cvtColor(src, cv.COLOR_BGR2GRAY)
cv.imshow('gray', gray)
# Bilateral
bilateral = cv.bilateralFilter(gray, 7, 15, 10)
cv.imshow('bilateral', bilateral)
blank = np.zeros(bilateral.shape[:2], dtype='uint8')
cv.imshow('blank', blank)
# mask = cv.circle(blank, (bilateral.shape[1] // 2, bilateral.shape[0] // 2), 320, 255, -1)
# cv.imshow('Mask', mask)
#
# masked = cv.bitwise_and(bilateral, bilateral, mask=mask)
# cv.imshow('masked', masked)
# Edge Cascade
canny = cv.Canny(bilateral, 50, 175)
cv.imshow('canny1', canny)
# ret, tresh = cv.threshold(gray, 125, 255, cv.THRESH_BINARY)
# cv.imshow('tresch', tresh)
contours, hierarchies = cv.findContours(canny, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_NONE)
print(f'{len(contours)} contour(s) found')
# cv.drawContours(blank, contours, -1, (255,0,0), 1)
# cv.imshow('contours drawn', blank)
rows = canny.shape[0]
# Target
circles = cv.HoughCircles(canny, cv.HOUGH_GRADIENT, 1, 0.01,
param1=100, param2=50,
minRadius=7, maxRadius=300)
# print(f'{circles}"')
biggestCircle = findBiggestCircle(circles)
# print(f'{biggestCircle} biggest circle')
mask = cv.circle(blank, (math.floor(biggestCircle[0]), math.floor(biggestCircle[1])), math.floor(biggestCircle[2]), 255, -1)
cv.imshow('rysowanie granicy', mask)
masked = cv.bitwise_and(bilateral, bilateral, mask=mask)
cv.imshow('granice', masked)
# Edge Cascade
canny = cv.Canny(masked, 50, 175)
cv.imshow('canny2', canny)
if biggestCircle is not None:
circles = np.uint16(np.around(circles))
# print(f'{biggestCircle} biggest circle')
delta_r = biggestCircle[2] / 10
biggest_circle_center = [biggestCircle[0], biggestCircle[1]]
center = (math.floor(biggestCircle[0]), math.floor(biggestCircle[1]))
# print(f'{center} center')
# circle center
cv.circle(src, center, 1, (255, 0, 0), 3)
# circle outline
radius = math.floor(biggestCircle[2])
cv.circle(src, center, radius, (0, 0, 255), 3)
# bullet holes
hits = cv.HoughCircles(canny, cv.HOUGH_GRADIENT, 1, 10,
param1=300, param2=10,
minRadius=7, maxRadius=10)
# print(f'{hits}"')
score = countHitScore(hits.tolist(), delta_r, biggest_circle_center)
print(f'The score is: {score}"')
if hits is not None:
hits = np.uint16(np.around(hits))
for i in hits[0, :]:
# print(f'promien trafienia {i[2]}"')
center = (i[0], i[1])
# circle center
cv.circle(src, center, 1, (0, 100, 100), 3)
# circle outline
radius = i[2]
cv.circle(src, center, radius, (255, 0, 255), 3)
cv.imshow("detected circles", src)
cv.waitKey(0)
return 0
def findBiggestCircle(circles):
# print(f'{circles}')
listOfCircles = circles[0]
biggestCircle = listOfCircles[0]
for circle in listOfCircles:
# print(f'{circle} circle')
# print(f'2 {circle}')
# print(f'3 {biggestCircle}')
if circle[2] > biggestCircle[2]:
# print('4')
biggestCircle = circle
print(biggestCircle)
return biggestCircle.tolist()
def countHitScore(hits, delta_r, target_center):
score = 0
print(f'{hits} hits')
for hit in hits[0]:
# print(f'{hit} hit')
# print(f'{(target_center)} center')
x_dist = hit[0] - target_center[0] if hit[0] > target_center[0] else target_center[0] - hit[0]
y_dist = hit[1] - target_center[1] if hit[1] > target_center[1] else target_center[1] - hit[1]
total_dist = math.hypot(x_dist, y_dist) - hit[2]
punkty = math.ceil(total_dist / delta_r)
if punkty < 1:
punkty = 1
score += 11 - punkty
# print(f'{total_dist / delta_r} math')
# print(f'{total_dist / delta_r} total_dist / delta_r')
print(f'{11 - punkty} zdobyte punkty')
# print(f'{x_dist} x {y_dist} y')
return score
if __name__ == "__main__":
main(sys.argv[1:])
I have a collection of images as below -
Example 1
Example 2
Example 3
These represent dates in DDMMYYYY format. For each of these images, I want to save each digit as a separate image.For example 1, I wish to save 7,9,0,8,5,8,7,1 as separate images sliced from the original image. So far, I have tried various methods described on different stackoverflow & blogposts but none of them seems to work.
Code to extract boxes surrounding dates -
from glob import glob
import cv2 as cv
import numpy as np
from tqdm import tqdm
class ExtractRectangle:
def __init__(self):
super().__init__()
self.minLinLength_h = 70
self.minLinLength_v = 5
self.maxLineGap = 20
def is_horizontal(self, line, thresh=5):
return abs(line[1] - line[3]) <= thresh
def is_vertical(self, line, thresh=5):
return abs(line[0] - line[2]) <= thresh
def get_lines(self, canny, horizontal=True):
lines = []
if horizontal:
linesP = cv.HoughLinesP(
canny,
rho=1,
theta=np.pi / 180,
threshold=10,
lines=None,
minLineLength=self.minLinLength_h,
maxLineGap=20,
)
else:
linesP = cv.HoughLinesP(
canny,
rho=1,
theta=np.pi / 180,
threshold=10,
lines=None,
minLineLength=self.minLinLength_v,
maxLineGap=20,
)
if linesP is not None:
for i in range(0, len(linesP)):
l = linesP[i][0]
if self.is_horizontal(l, 3) and horizontal:
lines.append(l)
elif self.is_vertical(l, 3):
lines.append(l)
return lines
def remove_whitespace(self, img):
# https://stackoverflow.com/questions/48395434/how-to-crop-or-remove-white-background-from-an-image
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
th, threshed = cv.threshold(gray, 127, 255, cv.THRESH_BINARY_INV)
kernel = cv.getStructuringElement(cv.MORPH_ELLIPSE, (11, 11))
morphed = cv.morphologyEx(threshed, cv.MORPH_CLOSE, kernel)
cnts = cv.findContours(morphed, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)[-2]
cnt = sorted(cnts, key=cv.contourArea)[-1]
x, y, w, h = cv.boundingRect(cnt)
dst = img[y : y + h, x : x + w]
return dst
def process_image(self, filename, path):
errenous = False
img = cv.imread(cv.samples.findFile(filename))
img = self.remove_whitespace(img)
cImage = np.copy(img)
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
canny = cv.Canny(gray, 100, 200)
horizontal_lines = self.get_lines(canny)
horizontal_lines = sorted(horizontal_lines, key=lambda a_entry: a_entry[..., 1])
vertical_lines = self.get_lines(canny, horizontal=False)
vertical_lines = sorted(vertical_lines, key=lambda a_entry: a_entry[..., 0])
if len(horizontal_lines) > 0:
initial_line = horizontal_lines[0]
final_line = horizontal_lines[-1]
# LeftTop(x1, y1) -> RightTop(x2, y1) -> RightBottom(x2, y2) -> LeftBottom(x1, y2)
y1 = initial_line[1]
y2 = final_line[1]
bottom = min(y1, y2)
top = max(y1, y2)
# post whitespace removal, dates should only be the major component
if (top-bottom) / img.shape[0] < 0.6:
errenous = True
else:
errenous = True
if len(vertical_lines) > 0:
initial_line = vertical_lines[0]
final_line = vertical_lines[-1]
x1 = initial_line[0]
x2 = final_line[0]
left = min(x1, x2)
right = max(x1, x2)
# as dates occupy majority of the horizontal space
if (right-left) / img.shape[1] < 0.95:
errenous = True
else:
errenous = True
if not errenous:
# cImage = cv.rectangle(cImage, (left, bottom), (right, top), (255, 0, 0), 2)
cImage = cImage[
bottom : bottom + (top - bottom), left : left + (right - left)
]
cv.imwrite(f"{path}/{filename.split('/')[-1]}", cImage)
if __name__ == "__main__":
extract = ExtractRectangle()
test_files = glob("data/raw/test/*.png")
test_path = "data/processed/test/"
for path in tqdm(test_files):
extract.process_image(path, test_path)
train_files = glob("data/raw/train/*.png")
train_path = "data/processed/train/"
for path in tqdm(train_files):
extract.process_image(path, train_path)
Resultant detection for above images -
Example 1
Example 2
Example 3
Some other samples
The code below is able to detect objects without issue, however, towards the end there is the line "cv2.imshow("demo", img)"
I would expect this window to show the image with the generated bounding boxes and labels, but all I get is a blank window. I got this code originally from some examples on the internet so I'm a bit lost as to how to position that line, or why it's not generating the image.
import cv2
import numpy as np
def take_pic(output_filename):
import os
capture_img="ffmpeg -y -rtsp_transport udp -i rtsp://mycamera:apassword#172.16.66.106/live -vframes 1 " + output_filename
net = cv2.dnn.readNet("yolov3.weights", "./darknet/cfg/yolov3.cfg")
classes = []
with open("./darknet/data/coco.names", "r") as f:
classes = [line.strip() for line in f.readlines()]
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
colors = np.random.uniform(0, 255, size=(len(classes), 3))
output_filename = "/tmp/camera.jpeg"
cap = cv2.imread(output_filename)
j = 0
if j==0:
cv2.namedWindow("demo", cv2.WINDOW_AUTOSIZE)
while True:
take_pic(output_filename)
cap = cv2.imread(source)
j = j + 1
print("j= " + str(j))
img = cap
img = cv2.resize(img, None, fx=0.4, fy=0.4)
height, width, channels = img.shape
blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
class_ids = []
confidences = []
boxes = []
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
# Object detected
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
print(str(center_x)+" "+str(center_y))
w = int(detection[2] * width)
h = int(detection[3] * height)
# Rectangle coordinates
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
font = cv2.FONT_HERSHEY_PLAIN
for i in range(len(boxes)):
if i in indexes:
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
print("label :"+str(label)+"x: "+str(x)+" y: " + str(y))
color = colors[i]
cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
cv2.putText(img, label, (x, y + 30), font, 3, color, 3)
cv2.imshow("demo", img)
else:
print("camera open failed")
cv2.destroyAllWindows()
With opencv, a imshow is required to be accompanied with a waitKey method in order to display an image.
Paste something similar to this towards the end of your loop, after you call cv2.imshow:
if cv2.waitKey(0) == ord('q'):
print('exitting loop')
break
If the image shows blank during imshow method, then you might need to multiply pixels with 255. For instance, in Matlab, the images are normalized between 0 - 1.
Try:
cv2.imshow("demo", img * 255)
cv2.waitKey(0)
Hello I am trying to identify the odometer reading from the image attached using open CV and EAST model along with Pyteserract.
Following is my code :
import cv2
import numpy as np
import matplotlib.pyplot as plt
# assuming you have the result image store in median
median = cv2.imread("odo_4.jpg", 0)
image_gray = median
binary = cv2.bitwise_not(image_gray)
blur = cv2.GaussianBlur(image_gray,(5,5),0)
ret2,th2 = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
edged = cv2.Canny(th2, 50, 80, 255)
#threshold = cv2.adaptiveThreshold(edged,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
close = cv2.morphologyEx(edged, cv2.MORPH_CLOSE, kernel, iterations=1)
contours = cv2.findContours(close, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
rect_cnts = []
for cnt in contours:
peri = cv2.arcLength(cnt, True)
approx = cv2.approxPolyDP(cnt, 0.04 * peri, True)
(x, y, w, h) = cv2.boundingRect(cnt)
ar = w / float(h)
if (len(approx) == 4) & (ar >= 0.95 and ar <= 1.05) : # shape filtering condition
pass
else :
rect_cnts.append(cnt)
max_area = 0
football_square = None
for cnt in rect_cnts:
(x, y, w, h) = cv2.boundingRect(cnt)
if max_area < w*h:
max_area = w*h
football_square = cnt
image = cv2.cvtColor(image_gray, cv2.COLOR_GRAY2RGB)
(x, y, w, h) = cv2.boundingRect(football_square)
new_image = image[y:y+h, x:x+w]
new = new_image
import cv2 as cv
orig = new.copy()
(origH, origW) = new.shape[:2]
rW = origW / 320.0
rH = origH / 320.0
# resize the original image to new dimensions
new = cv.resize(new, (320, 320))
(H, W) = new.shape[:2]
# construct a blob from the image to forward pass it to EAST model
blob = cv.dnn.blobFromImage(new, 1.0, (W, H),
(123.68, 116.78, 103.94), swapRB=True, crop=False)
net = cv.dnn.readNet('frozen_east_text_detection.pb')
layerNames = [
"feature_fusion/Conv_7/Sigmoid",
"feature_fusion/concat_3"]
net.setInput(blob)
(scores, geometry) = net.forward(layerNames)
def predictions(prob_score, geo):
(numR, numC) = prob_score.shape[2:4]
boxes = []
confidence_val = []
# loop over rows
for y in range(0, numR):
scoresData = prob_score[0, 0, y]
x0 = geo[0, 0, y]
x1 = geo[0, 1, y]
x2 = geo[0, 2, y]
x3 = geo[0, 3, y]
anglesData = geo[0, 4, y]
# loop over the number of columns
for i in range(0, numC):
if scoresData[i] < 0.5:
continue
(offX, offY) = (i * 4.0, y * 4.0)
# extracting the rotation angle for the prediction and computing the sine and cosine
angle = anglesData[i]
cos = np.cos(angle)
sin = np.sin(angle)
# using the geo volume to get the dimensions of the bounding box
h = x0[i] + x2[i]
w = x1[i] + x3[i]
# compute start and end for the text pred bbox
endX = int(offX + (cos * x1[i]) + (sin * x2[i]))
endY = int(offY - (sin * x1[i]) + (cos * x2[i]))
startX = int(endX - w)
startY = int(endY - h)
boxes.append((startX, startY, endX, endY))
confidence_val.append(scoresData[i])
# return bounding boxes and associated confidence_val
return (boxes, confidence_val)
(boxes, confidence_val) = predictions(scores, geometry)
boxes = non_max_suppression(np.array(boxes), probs=confidence_val)
# initialize the list of results
results = []
# loop over the bounding boxes to find the coordinate of bounding boxes
for (startX, startY, endX, endY) in boxes:
# scale the coordinates based on the respective ratios in order to reflect bounding box on the original image
startX = int(startX * rW)
startY = int(startY * rH)
endX = int(endX * rW)
endY = int(endY * rH)
#extract the region of interest
r = orig[startY:endY, startX:endX]
plt.imshow(r)
#configuration setting to convert image to string.
configuration = ("-l eng --oem 1 --psm 7")
##This will recognize the text from the image of bounding box
text = pytesseract.image_to_string(r, config=configuration)
# append bbox coordinate and associated text to the list of results
results.append(((startX, startY, endX, endY), text))
The results are bad - but my EAST model is identify the contour ( area) where the digits are present. Can you please help me ? I have tried different psm values in config for image_to_string.
Use InRange() for selection. See example:
import cv2 as cv
low_H = 80
low_S = 160
low_V = 200
high_H = 100
high_S = 255
high_V = 255
frame = cv.imread('OAPgE.jpg')
frame_HSV = cv.cvtColor(frame, cv.COLOR_BGR2HSV)
frame_threshold = cv.inRange(frame_HSV, (low_H, low_S, low_V), (high_H, high_S, high_V))
frame_threshold=cv.bitwise_not(frame_threshold)
cv.imwrite('out_36.png', frame_threshold)