How can I improve my detection efficiency when flann? - python

I'm trying to improve the speed of my object detection. I'm using OpenCV and ORB brute force keypoint matching.
I've got 30 seconds for keypoint_detection to run and at the moment it's taking about 23 seconds. This is fine for now with 74 images, but that number is likely to increase.
So far I've;
Reduced my search area down to the absolute minimum
Refined my 74 images down to the absolute minimum size possible
I tried pre-processing to remove some of the clutter from the matching but it impacted my ability to find a successful match dramatically.
I've also tried;
SIF, SURF, MatchTemplate, Canny and a few other methods. ORB brute force is my best match by a significant margin.
This is my exact working code, not pseudo-code and not an MVP as I didn't think it would make sense presenting a cut down version of the code I'm looking to speed up.
Is there any way to improve the efficiency of my code?
import cv2 as cv
import os
import glob
import pyautogui as py
from time import sleep
from windowcapture import WindowCapture
from vision import Vision
# Change the working directory to the folder this script is in.
os.chdir(r'C:\test')
avoid = glob.glob(r"C:\Users\test\*.png")
def loadImages(directory):
# Intialise empty array
image_list = []
# Add images to array
for i in directory:
img = cv.imread(i, cv.IMREAD_UNCHANGED)
image_list.append((img, i))
return image_list
# initialize the WindowCapture class
wincap = WindowCapture()
def keypoint_detection(image_list):
counter = 0
for i in image_list:
counter += 1
needle_img = i[0]
# load image to find
objectToFind = Vision(needle_img)
# get an updated image of the screen
keypoint_haystack = wincap.get_haystack()
# crop the image
x, w, y, h = [600,700,20,50]
keypoint_haystack = keypoint_haystack[y:y+h, x:x+w]
kp1, kp2, matches, match_points = objectToFind.match_keypoints(keypoint_haystack, sliced_name, min_match_count=30)
match_image = cv.drawMatches(objectToFind.needle_img, kp1, keypoint_haystack, kp2, matches, None)
if match_points:
# find the center point of all the matched features
center_point = objectToFind.centeroid(match_points)
# account for the width of the needle image that appears on the left
center_point[0] += objectToFind.needle_w
# drawn the found center point on the output image
match_image = objectToFind.draw_crosshairs(match_image, [center_point])
sleep(3)
break
while(True):
ships_to_avoid = loadImages(avoid)
keypoint_detection(ships_to_avoid)
WindowCapture Class
import numpy as np
import win32gui, win32ui, win32con
class WindowCapture:
# properties
w = 0
h = 0
hwnd = None
cropped_x = 0
cropped_y = 0
offset_x = 0
offset_y = 0
# constructor
def __init__(self, window_name=None):
# find the handle for the window we want to capture.
# if no window name is given, capture the entire screen
if window_name is None:
self.hwnd = win32gui.GetDesktopWindow()
else:
self.hwnd = win32gui.FindWindow(None, window_name)
if not self.hwnd:
raise Exception('Window not found: {}'.format(window_name))
# get the window size
window_rect = win32gui.GetWindowRect(self.hwnd)
self.w = window_rect[2] - window_rect[0]
self.h = window_rect[3] - window_rect[1]
# account for the window border and titlebar and cut them off
border_pixels = 0
titlebar_pixels = 5
self.w = self.w - border_pixels
self.h = self.h - titlebar_pixels - border_pixels
self.cropped_x = border_pixels
self.cropped_y = titlebar_pixels
# set the cropped coordinates offset so we can translate screenshot
# images into actual screen positions
self.offset_x = window_rect[0] + self.cropped_x
self.offset_y = window_rect[1] + self.cropped_y
def get_haystack(self):
# get the window image data
wDC = win32gui.GetWindowDC(self.hwnd)
dcObj = win32ui.CreateDCFromHandle(wDC)
cDC = dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, self.w, self.h)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0, 0), (self.w, self.h), dcObj, (self.cropped_x, self.cropped_y), win32con.SRCCOPY)
# convert the raw data into a format opencv can read
# dataBitMap.SaveBitmapFile(cDC, 'debug.bmp')
signedIntsArray = dataBitMap.GetBitmapBits(True)
img = np.fromstring(signedIntsArray, dtype='uint8')
img.shape = (self.h, self.w, 4)
# free resources
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(self.hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
img = img[...,:3]
img = np.ascontiguousarray(img)
return img
#staticmethod
def list_window_names():
def winEnumHandler(hwnd, ctx):
if win32gui.IsWindowVisible(hwnd):
print(hex(hwnd), win32gui.GetWindowText(hwnd))
win32gui.EnumWindows(winEnumHandler, None)
# translate a pixel position on a screenshot image to a pixel position on the screen.
# pos = (x, y)
def get_screen_position(self, pos):
return (pos[0] + self.offset_x, pos[1] + self.offset_y)
Vision Class
import cv2 as cv
import numpy as np
class Vision:
# properties
needle_img = None
needle_w = 0
needle_h = 0
# constructor
def __init__(self, needle_img_path):
self.needle_img = needle_img_path
# Save the dimensions of the needle image
self.needle_w = self.needle_img.shape[1]
self.needle_h = self.needle_img.shape[0]
def match_keypoints(self, haystack_screenshot, name, min_match_count, patch_size=32):
orb = cv.ORB_create(edgeThreshold=0, patchSize=patch_size)
keypoints_needle, descriptors_needle = orb.detectAndCompute(self.needle_img, None)
orb2 = cv.ORB_create(edgeThreshold=0, patchSize=patch_size, nfeatures=2000)
keypoints_haystack, descriptors_haystack = orb2.detectAndCompute(haystack_screenshot, None)
FLANN_INDEX_LSH = 6
index_params = dict(algorithm=FLANN_INDEX_LSH, table_number=6, key_size=12, multi_probe_level=1)
search_params = dict(checks=50)
try:
flann = cv.FlannBasedMatcher(index_params, search_params)
matches = flann.knnMatch(descriptors_needle, descriptors_haystack, k=2)
except cv.error:
return None, None, [], []
# store all the good matches as per Lowe's ratio test.
good = []
points = []
for pair in matches:
if len(pair) == 2:
if pair[0].distance < 0.7*pair[1].distance:
good.append(pair[0])
if len(good) > min_match_count:
print(str(name) + ' - ' + '%03d keypoints matched - %03d' % (len(good), len(keypoints_needle)))
for match in good:
points.append(keypoints_haystack[match.trainIdx].pt)
return keypoints_needle, keypoints_haystack, good, points

Related

How can I display an image slideshow in tkinter that I made with OpenCv so that I still have 1 line or 3 grids for text?

I'm very new to programming pytho my 3rd month. I'm making a desktop program that is supposed to show appointments and sayings. I already have a function process that opens a window in full screen and displays the slide show.
But I want to place the slide show in a tkinter window so that I can add other labels next to the show.
this is my code i want to call the function in a tkinter window so that i can assign it to a button.
import cv2
import numpy as np
import glob
import os
import random
class Image:
def __init__(self, filename, time=200, size=800):
self.size = size
self.time = time
self.shifted = 1.0
self.img = cv2.imread(filename)
self.height, self.width, _ = self.img.shape
if self.width < self.height:
self.height = int(self.height*size/self.width)
self.width = size
self.img = cv2.resize(self.img, (self.width, self.height))
self.shift = self.height - size
self.shift_height = True
else:
self.width = int(self.width*size/self.height)
self.height = size
self.shift = self.width - size
self.img = cv2.resize(self.img, (self.width, self.height))
self.shift_height = False
self.delta_shift = self.shift/self.time
def reset(self):
if random.randint(0, 1) == 0:
self.shifted = 0.0
self.delta_shift = abs(self.delta_shift)
else:
self.shifted = self.shift
self.delta_shift = -abs(self.delta_shift)
def get_frame(self):
if self.shift_height:
roi = self.img[int(self.shifted):int(self.shifted) + self.size, :, :]
else:
roi = self.img[:, int(self.shifted):int(self.shifted) + self.size, :]
self.shifted += self.delta_shift
if self.shifted > self.shift:
self.shifted = self.shift
if self.shifted < 0:
self.shifted = 0
return roi
def process():
text = f'xXxxxxxXXXXXxxxxxXx'
coordinates = (650, 1100)
font = cv2.FONT_HERSHEY_SIMPLEX
fontScale = 2
color = (255, 0, 255)
thickness = 3
filenames = glob.glob(os.path.join(path, "*"))
cnt = 0
images = []
for filename in filenames:
img = Image(filename)
images.append(img)
if cnt > len(images):
break
cnt += 1
prev_image = images[random.randrange(0, len(images))]
prev_image.reset()
while True:
while True:
img = images[random.randrange(0, len(images))]
if img != prev_image:
break
img.reset()
for i in range(100):
alpha = i/100
beta = 1.0 - alpha
dst = cv2.addWeighted(img.get_frame(), alpha, prev_image.get_frame(), beta, 0.0)
dst = cv2.putText(dst, text, coordinates, font, fontScale, color, thickness,cv2.LINE_AA)
cv2.imshow('Slideshow', dst)
if cv2.waitKey(10) == ord('q'):
cv2.destroyWindow('Slideshow')
return
prev_image = img
for _ in range(100):
cv2.imshow('Slideshow', img.get_frame())
if cv2.waitKey(10) == ord('q'):
cv2.destroyWindow('Slideshow')
return
def start():
cnt = 0
images = []
path = 'pictures'
filenames = glob.glob(os.path.join(path, "*"))
showWindow = tk.Tk()
showWindow.attributes('-fullscreen', True)
showWindow.mainloop()
I tried to display the text in opencv in the pictures but I had problems keeping the text in the exercises. That's why I want to display the whole thing in a Tkinter window so I can do the classification with grid. because I don't just want to display a single image but a slide show
Does the method not work here(**) or I just don't understand it could someone help me.
(**)=
#Import the tkinter library
from tkinter import *
import numpy as np
import cv2
from PIL import Image, ImageTk
#Create an instance of tkinter frame
show_Winow = Tk()
win.geometry("700x550")
#Load the image
img = cv2.imread('tutorialspoint.png')
#Rearrange colors
blue,green,red = cv2.split(img)
img = cv2.merge((red,green,blue))
im = Image.fromarray(img)
imgtk = ImageTk.PhotoImage(image=im)
#Create a Label to display the image
Label(show_Window, image= imgtk).pack()
show_Window.mainloop()

Reading images from files simultaneously

I have multiple different folders with the images have same naming like a.png etc. I want to modify the above code to read this same named files in different directories and give their opencv output using yolo at the same time. To be more specific I have 10 files which contains images transported with different categories like one folder contains rgb files and the other contains gray files etc. To compare their output, I want to show the images with same naming but in different folders. I know it should not be that hard but I am pretty confused. Thanks in advance!
import cv2
import numpy as np
import os
import matplotlib.pyplot as plt
import tkinter
from tkinter import filedialog
def cal_alpB(minMax):
minD = minMax[0]
maxD = minMax[1]
alpha = 255/(maxD-minD)
beta = -alpha*minD
return [alpha, beta]
def getMinMax(path):
with open(path+'/config') as f:
minMax = f.read().splitlines()
minMax = minMax[0].split(',')
minMax = [eval(x) for x in minMax]
return minMax
def normalizeData(minMax, img):
alpB = cal_alpB(minMax)
img[img>minMax[1]] = minMax[1]
img[img<0] = 0
return alpB
def boxDrawing(layerOutput, frameWidth, frameHeight, class_ids, confidences, boxes, img):
for output in layerOutput:
for detection in output:
score = detection[5:]
class_id = np.argmax(score)
confidence = score[class_id]
if confidence > 0.5:
center_x = int(detection[0] * frameWidth)
center_y = int(detection[1] * frameHeight)
width = int(detection[2] * frameWidth)
height = int(detection[3] * frameHeight)
left = int(center_x - width / 2)
top = int(center_y - height / 2)
class_ids.append(class_id)
confidences.append(float(confidence))
boxes.append([left, top, width, height])
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.8, 0.7)
font = cv2.FONT_HERSHEY_PLAIN
colors = np.random.uniform(0, 255, size = (len(boxes),3))
for i in range(len(boxes)):
if i in indexes:
x,y,w,h = boxes[i]
label = str(classes[class_ids[i]])
confi = str(round(confidences[i],2))
color = colors[i]
cv2.rectangle(img, (x,y), (x+w,y+h), color,1)
cv2.putText(img, label+" "+ confi, (x,y+20), font, 1, (255,255,255),1)
def algorythmYolo():
tkinter.Tk().withdraw()
folder = filedialog.askdirectory()
minMax = getMinMax(folder)
for filename in sorted(os.listdir(folder)):
img = cv2.imread(os.path.join(folder,filename),-1)
if img is not None:
alpB = normalizeData(minMax,img)
img = cv2.convertScaleAbs(img, alpha=alpB[0], beta= alpB[1])
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
frameHeight, frameWidth, channels = img.shape
blob = cv2.dnn.blobFromImage(img, 1/255, (frameWidth,frameHeight), (0,0,0), swapRB = True, crop = False)
yolo.setInput(blob)
layerOutput = yolo.forward(outputLayers)
boxes = []
confidences = []
class_ids = []
boxDrawing(layerOutput,frameWidth, frameHeight,class_ids,confidences,boxes,img)
cv2.imshow("window", img)
cv2.setWindowTitle('window', folder)
cv2.waitKey(1)
else:
break
cv2.destroyAllWindows()
yolo = cv2.dnn.readNet("./yolov3.weights","./yolov3.cfg")
with open("./coco.names","r") as f:
classes = f.read().splitlines()
layers_names = yolo.getLayerNames()
outputLayers = [layers_names[i-1] for i in yolo.getUnconnectedOutLayers()]
cv2.namedWindow("window", cv2.WINDOW_NORMAL)
algorythmYolo()

Keypoint detection not working when keypoints are a certain colour

I'm using keypoint detection to find text within a game.
The background in the below images is dynamic, it's always a vaguely moving star-lit sky that you can barely see.
The detection works well when the text is white:
However, when the text is purple (unpredictable when this happens) the detection fails entirely:
Both the object I'm looking to detect and the image I'm running detection on are identical, screenshots are taken directly from within the game of the text i.e. the above. And then run on the exact same location the original screenshot were taken from.
The below code I've written using the official documentation I found here and here as a guide but it's very light on explaining itself.
Question: Is this an inherent limitation or is there something I can do to adjust to detect keypoints within the purple image?
import cv2 as cv
import win32gui, win32con, win32ui
import numpy as np
import glob
def get_haystack_image():
w, h = 1920, 1080
hwnd = None
wDC = win32gui.GetWindowDC(hwnd)
dcObj = win32ui.CreateDCFromHandle(wDC)
cDC = dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, w, h)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0, 0), (w, h), dcObj, (0, 0), win32con.SRCCOPY)
signedIntsArray = dataBitMap.GetBitmapBits(True)
img = np.frombuffer(signedIntsArray, dtype='uint8')
img.shape = (h, w, 4)
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
img = img[...,:3]
img = np.ascontiguousarray(img)
return img
def loadImages(directory):
# Intialise empty array
image_list = []
# Add images to array
for i in directory:
img = cv.imread(i, cv.IMREAD_UNCHANGED)
image_list.append((img, i))
return image_list
def preProcessNeedle(image_list):
needle_kp1_desc = []
for i in image_list:
img = i[0]
orb = cv.ORB_create(edgeThreshold=0, patchSize=32)
keypoint_needle, descriptors_needle = orb.detectAndCompute(img, None)
needle_kp1_desc.append((keypoint_needle, descriptors_needle, img))
return needle_kp1_desc
def match_keypoints(descriptors_needle, keypoint_haystack, min_match_count):
orbHaystack = cv.ORB_create(edgeThreshold=0, patchSize=32, nfeatures=3000)
keypoints_haystack, descriptors_haystack = orbHaystack.detectAndCompute(keypoint_haystack, None)
FLANN_INDEX_LSH = 6
index_params = dict(algorithm=FLANN_INDEX_LSH, table_number=6, key_size=12, multi_probe_level=1)
search_params = dict(checks=50)
try:
flann = cv.FlannBasedMatcher(index_params, search_params)
matches = flann.knnMatch(descriptors_needle, descriptors_haystack, k=2)
except cv.error:
return None, None, [], []
good = []
points = []
for pair in matches:
if len(pair) == 2:
if pair[0].distance < 0.7*pair[1].distance:
good.append(pair[0])
if len(good) > min_match_count:
for match in good:
points.append(keypoints_haystack[match.trainIdx].pt)
return keypoints_haystack, good, points
def shipDetection(needle_kp1_desc):
res = False
# Object Detection
for i, img in enumerate(needle_kp1_desc):
kp1 = img[0]
descriptors_needle = img[1]
needle_img = img[2]
# get an updated image of the screen & crop it
keypoint_haystack = get_haystack_image()
keypoint_haystack = keypoint_haystack[40:110, 850:1000]
kp2, matches, match_points, ship_avoided = match_keypoints(kp1, descriptors_needle, keypoint_haystack, min_match_count=40)
# display the matches
match_image = cv.drawMatches(needle_img, kp1, keypoint_haystack, kp2, matches, None)
cv.imshow('Keypoint Search', match_image)
cv.moveWindow("Keypoint Search",1940,30)
cv.waitKey(1)
if match_points:
# removed code as irrelevant to detection but left comments in
# find the center point of all the matched features
# account for the width of the needle image that appears on the left
# drawn the found center point on the output image
# display the processed image
cv.imshow('Keypoint Search', match_image)
cv.waitKey(1)
res = True
break
return res
ships_to_avoid = loadImages(glob.glob(r"C:\Users\*.png"))
needle_kp1_desc = preProcessNeedle(ships_to_avoid)
if shipDetection(needle_kp1_desc):
# do something with the output
Isolating the red channel, converting to grayscale and applying binary thresholding has normalised the results, they're all now a consistent "white" which my detection is successfully identifying.
apply_thresholding will perform this pre-processing to a folder, move the images from image_dir to output_dir then it'll delete the un-processes images from image_dir.
def apply_thresholding():
# get directory path where the images are stored
image_dir = r"C:\Users\pre"
# get directory path where you want to save the images
output_dir = r"C:\Users\post"
#iterate through all the files in the image directory
for _, _, image_names in os.walk(image_dir):
#iterate through all the files in the image_dir
for image_name in image_names:
# check for extension .png
if '.png' in image_name:
# get image read path(path should not contain spaces in them)
filepath = os.path.join(image_dir, image_name)
# get image write path
dstpath = os.path.join(output_dir, image_name)
print(filepath, dstpath)
# read the image
image = cv.imread(filepath)
r = image.copy()
# set blue and green channels to 0
r[:, :, 0] = 0
r[:, :, 1] = 0
# convert to grayscale now we've dropped b and g channels
gray = cv.cvtColor(r, cv.COLOR_BGR2GRAY)
# Apply binary thersholding
(T, thresh) = cv.threshold(gray, 40, 255, cv.THRESH_BINARY)
# write the image in a different path with the same name
cv.imwrite(dstpath, thresh)
files = glob.glob(r"C:\Users\pre\*")
for f in files:
os.remove(f)
I then applied the same channel isolation, grayscale conversion and binary thresholding to my detection area.
def get_haystack_image():
w, h = 1920, 1080
hwnd = None
wDC = win32gui.GetWindowDC(hwnd)
dcObj = win32ui.CreateDCFromHandle(wDC)
cDC = dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, w, h)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0, 0), (w, h), dcObj, (0, 0), win32con.SRCCOPY)
signedIntsArray = dataBitMap.GetBitmapBits(True)
img = np.frombuffer(signedIntsArray, dtype='uint8')
img.shape = (h, w, 4)
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
img = img[...,:3]
img = np.ascontiguousarray(img)
r = img.copy()
# set blue and green channels to 0
r[:, :, 0] = 0
r[:, :, 1] = 0
# convert to grayscale now we've dropped b and g channels
gray = cv.cvtColor(r, cv.COLOR_BGR2GRAY)
# Apply binary thersholding
(T, img) = cv.threshold(gray, 40, 255, cv.THRESH_BINARY)
return img

How do I get rid of black bar in python window?

I'm working on a Object Detection project for the game Cuphead using OpenCV and Python. Now I'm trying to capture objects in real time but when the detection window displays I get this rare black bar on the top and I don't know how to get rid of it, here's what I see, on the left my object detection window and in the right the Cuphead game window.
Here's the code for the class used for this:
import numpy as np
import win32gui, win32ui, win32con
class WindowCapture:
# define monitor's width and height
w = 0
h = 0
hwnd = None
# constructor
def __init__(self, window_name):
if window_name is None: # if we don't pass any window names capture desktop
self.hwnd = win32gui.GetDesktopWindow()
else:
# Find the game window
self.hwnd = win32gui.FindWindow(None, window_name)
if not self.hwnd:
raise Exception("Window not founnd: {}".format(window_name))
# define window's widht and height. the resolution we'll work with
window_rect = win32gui.GetWindowRect(self.hwnd)
self.w = window_rect[2] - window_rect[0]
self.h = window_rect[3] - window_rect[1]
def get_screenshot(self):
# get the window image data
wDC = win32gui.GetWindowDC(self.hwnd)
dcObj = win32ui.CreateDCFromHandle(wDC)
cDC = dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, self.w, self.h)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0,0), (self.w, self.h), dcObj, (0,0), win32con.SRCCOPY)
# create the screenshot image that we want to return to be processed
signedIntsArray = dataBitMap.GetBitmapBits(True)
img = np.fromstring(signedIntsArray, dtype='uint8')
img.shape = (self.h, self.w, 4)
# Free Resources
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(self.hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
# get rid of the alpha channel in the img
img = img[..., :3]
img = np.ascontiguousarray(img)
return img
It seems img.shape = (self.h, self.w, 4) causes the problem. As GetWindowRect and #IInspectable said,
In Windows Vista and later, the Window Rect now includes the area
occupied by the drop shadow.

Is there an easy way to detect four near identical images with OpenCV when thresholding is insufficient?

I'm trying to detect these chevrons from four instances of bluestacks running the same game:
To make it a little easier at a glance to see the differences. These are the same chevrons zoomed. For some reason, bluestacks render them marginally different every time.
I've tried to use thresholding and different methods with OpenCV but I need to drop it so low I get too many false positives and the different methods don't seem to make a difference.
I'm using win32gui for my template:
class WindowCapture:
# properties
w = 0
h = 0
hwnd = None
cropped_x = 0
cropped_y = 0
offset_x = 0
offset_y = 0
# constructor
def __init__(self, window_name=None):
# find the handle for the window we want to capture.
# if no window name is given, capture the entire screen
if window_name is None:
self.hwnd = win32gui.GetDesktopWindow()
else:
self.hwnd = win32gui.FindWindow(None, window_name)
if not self.hwnd:
raise Exception('Window not found: {}'.format(window_name))
# get the window size
window_rect = win32gui.GetWindowRect(self.hwnd)
self.w = window_rect[2] - window_rect[0]
self.h = window_rect[3] - window_rect[1]
# account for the window border and titlebar and cut them off
border_pixels = 8
titlebar_pixels = 40
self.w = self.w - (border_pixels * 2)
self.h = self.h - titlebar_pixels - border_pixels
self.cropped_x = border_pixels
self.cropped_y = titlebar_pixels
# set the cropped coordinates offset so we can translate screenshot
# images into actual screen positions
self.offset_x = window_rect[0] + self.cropped_x
self.offset_y = window_rect[1] + self.cropped_y
def get_screenshot(self):
# get the window image data
wDC = win32gui.GetWindowDC(self.hwnd)
dcObj = win32ui.CreateDCFromHandle(wDC)
cDC = dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, self.w, self.h)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0, 0), (self.w, self.h), dcObj, (self.cropped_x, self.cropped_y), win32con.SRCCOPY)
# convert the raw data into a format opencv can read
# dataBitMap.SaveBitmapFile(cDC, 'debug.bmp')
signedIntsArray = dataBitMap.GetBitmapBits(True)
img = np.fromstring(signedIntsArray, dtype='uint8')
img.shape = (self.h, self.w, 4)
# free resources
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(self.hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
img = img[...,:3]
img = np.ascontiguousarray(img)
return img
#staticmethod
def list_window_names():
def winEnumHandler(hwnd, ctx):
if win32gui.IsWindowVisible(hwnd):
print(hex(hwnd), win32gui.GetWindowText(hwnd))
win32gui.EnumWindows(winEnumHandler, None)
# translate a pixel position on a screenshot image to a pixel position on the screen.
# pos = (x, y)
def get_screen_position(self, pos):
return (pos[0] + self.offset_x, pos[1] + self.offset_y)
And OpenCV and Numpy for my object detection:
class Vision:
# properties
needle_img = None
needle_w = 0
needle_h = 0
method = None
# constructor
def __init__(self, needle_img_path, method=cv.TM_CCOEFF_NORMED):
self.needle_img = cv.imread(needle_img_path, cv.IMREAD_UNCHANGED)
# Save the dimensions of the needle image
self.needle_w = self.needle_img.shape[1]
self.needle_h = self.needle_img.shape[0]
# There are 6 methods to choose from:
# TM_CCOEFF, TM_CCOEFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_SQDIFF, TM_SQDIFF_NORMED
self.method = method
def find(self, haystack_img, threshold=0.5, debug_mode=None):
# run the OpenCV algorithm
result = cv.matchTemplate(haystack_img, self.needle_img, self.method)
# Get the all the positions from the match result that exceed our threshold
locations = np.where(result >= threshold)
locations = list(zip(*locations[::-1]))
rectangles = []
for loc in locations:
rect = [int(loc[0]), int(loc[1]), self.needle_w, self.needle_h]
# Add every box to the list twice in order to retain single (non-overlapping) boxes
rectangles.append(rect)
rectangles.append(rect)
# Apply group rectangles
rectangles, weights = cv.groupRectangles(rectangles, groupThreshold=1, eps=0.5)
points = []
if len(rectangles):
line_color = (0, 255, 0)
line_type = cv.LINE_4
marker_color = (255, 0, 255)
marker_type = cv.MARKER_CROSS
# Loop over all the rectangles
for (x, y, w, h) in rectangles:
# Determine the center position
center_x = x + int(w/2)
center_y = y + int(h/2)
# Save the points
points.append((center_x, center_y))
if debug_mode == 'rectangles':
# Determine the box position
top_left = (x, y)
bottom_right = (x + w, y + h)
# Draw the box
cv.rectangle(haystack_img, top_left, bottom_right, color=line_color,
lineType=line_type, thickness=2)
elif debug_mode == 'points':
# Draw the center point
cv.drawMarker(haystack_img, (center_x, center_y),
color=marker_color, markerType=marker_type,
markerSize=40, thickness=2)
return points
And then I'm calling the match like this:
chevronsicon = Vision('chevrons.jpg')
# checks for chevrons
chevron = chevronsicon.find(screenshotCropped, 0.86, 'points')
# mouse controls
if chevrons :
py.moveTo(chevrons[0])
Thresholding and different OpenCV methods have worked every single time I've come across this issue previously. I was considering using a gaussian function to blur the edges and then canny to see if I could get a consistent match but I'm not sure if that's overworking what might be a very simple answer I'm simply not aware of.
Is there an easier way to detect these?

Categories

Resources