I'm working on a Object Detection project for the game Cuphead using OpenCV and Python. Now I'm trying to capture objects in real time but when the detection window displays I get this rare black bar on the top and I don't know how to get rid of it, here's what I see, on the left my object detection window and in the right the Cuphead game window.
Here's the code for the class used for this:
import numpy as np
import win32gui, win32ui, win32con
class WindowCapture:
# define monitor's width and height
w = 0
h = 0
hwnd = None
# constructor
def __init__(self, window_name):
if window_name is None: # if we don't pass any window names capture desktop
self.hwnd = win32gui.GetDesktopWindow()
else:
# Find the game window
self.hwnd = win32gui.FindWindow(None, window_name)
if not self.hwnd:
raise Exception("Window not founnd: {}".format(window_name))
# define window's widht and height. the resolution we'll work with
window_rect = win32gui.GetWindowRect(self.hwnd)
self.w = window_rect[2] - window_rect[0]
self.h = window_rect[3] - window_rect[1]
def get_screenshot(self):
# get the window image data
wDC = win32gui.GetWindowDC(self.hwnd)
dcObj = win32ui.CreateDCFromHandle(wDC)
cDC = dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, self.w, self.h)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0,0), (self.w, self.h), dcObj, (0,0), win32con.SRCCOPY)
# create the screenshot image that we want to return to be processed
signedIntsArray = dataBitMap.GetBitmapBits(True)
img = np.fromstring(signedIntsArray, dtype='uint8')
img.shape = (self.h, self.w, 4)
# Free Resources
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(self.hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
# get rid of the alpha channel in the img
img = img[..., :3]
img = np.ascontiguousarray(img)
return img
It seems img.shape = (self.h, self.w, 4) causes the problem. As GetWindowRect and #IInspectable said,
In Windows Vista and later, the Window Rect now includes the area
occupied by the drop shadow.
Related
I'm trying to improve the speed of my object detection. I'm using OpenCV and ORB brute force keypoint matching.
I've got 30 seconds for keypoint_detection to run and at the moment it's taking about 23 seconds. This is fine for now with 74 images, but that number is likely to increase.
So far I've;
Reduced my search area down to the absolute minimum
Refined my 74 images down to the absolute minimum size possible
I tried pre-processing to remove some of the clutter from the matching but it impacted my ability to find a successful match dramatically.
I've also tried;
SIF, SURF, MatchTemplate, Canny and a few other methods. ORB brute force is my best match by a significant margin.
This is my exact working code, not pseudo-code and not an MVP as I didn't think it would make sense presenting a cut down version of the code I'm looking to speed up.
Is there any way to improve the efficiency of my code?
import cv2 as cv
import os
import glob
import pyautogui as py
from time import sleep
from windowcapture import WindowCapture
from vision import Vision
# Change the working directory to the folder this script is in.
os.chdir(r'C:\test')
avoid = glob.glob(r"C:\Users\test\*.png")
def loadImages(directory):
# Intialise empty array
image_list = []
# Add images to array
for i in directory:
img = cv.imread(i, cv.IMREAD_UNCHANGED)
image_list.append((img, i))
return image_list
# initialize the WindowCapture class
wincap = WindowCapture()
def keypoint_detection(image_list):
counter = 0
for i in image_list:
counter += 1
needle_img = i[0]
# load image to find
objectToFind = Vision(needle_img)
# get an updated image of the screen
keypoint_haystack = wincap.get_haystack()
# crop the image
x, w, y, h = [600,700,20,50]
keypoint_haystack = keypoint_haystack[y:y+h, x:x+w]
kp1, kp2, matches, match_points = objectToFind.match_keypoints(keypoint_haystack, sliced_name, min_match_count=30)
match_image = cv.drawMatches(objectToFind.needle_img, kp1, keypoint_haystack, kp2, matches, None)
if match_points:
# find the center point of all the matched features
center_point = objectToFind.centeroid(match_points)
# account for the width of the needle image that appears on the left
center_point[0] += objectToFind.needle_w
# drawn the found center point on the output image
match_image = objectToFind.draw_crosshairs(match_image, [center_point])
sleep(3)
break
while(True):
ships_to_avoid = loadImages(avoid)
keypoint_detection(ships_to_avoid)
WindowCapture Class
import numpy as np
import win32gui, win32ui, win32con
class WindowCapture:
# properties
w = 0
h = 0
hwnd = None
cropped_x = 0
cropped_y = 0
offset_x = 0
offset_y = 0
# constructor
def __init__(self, window_name=None):
# find the handle for the window we want to capture.
# if no window name is given, capture the entire screen
if window_name is None:
self.hwnd = win32gui.GetDesktopWindow()
else:
self.hwnd = win32gui.FindWindow(None, window_name)
if not self.hwnd:
raise Exception('Window not found: {}'.format(window_name))
# get the window size
window_rect = win32gui.GetWindowRect(self.hwnd)
self.w = window_rect[2] - window_rect[0]
self.h = window_rect[3] - window_rect[1]
# account for the window border and titlebar and cut them off
border_pixels = 0
titlebar_pixels = 5
self.w = self.w - border_pixels
self.h = self.h - titlebar_pixels - border_pixels
self.cropped_x = border_pixels
self.cropped_y = titlebar_pixels
# set the cropped coordinates offset so we can translate screenshot
# images into actual screen positions
self.offset_x = window_rect[0] + self.cropped_x
self.offset_y = window_rect[1] + self.cropped_y
def get_haystack(self):
# get the window image data
wDC = win32gui.GetWindowDC(self.hwnd)
dcObj = win32ui.CreateDCFromHandle(wDC)
cDC = dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, self.w, self.h)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0, 0), (self.w, self.h), dcObj, (self.cropped_x, self.cropped_y), win32con.SRCCOPY)
# convert the raw data into a format opencv can read
# dataBitMap.SaveBitmapFile(cDC, 'debug.bmp')
signedIntsArray = dataBitMap.GetBitmapBits(True)
img = np.fromstring(signedIntsArray, dtype='uint8')
img.shape = (self.h, self.w, 4)
# free resources
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(self.hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
img = img[...,:3]
img = np.ascontiguousarray(img)
return img
#staticmethod
def list_window_names():
def winEnumHandler(hwnd, ctx):
if win32gui.IsWindowVisible(hwnd):
print(hex(hwnd), win32gui.GetWindowText(hwnd))
win32gui.EnumWindows(winEnumHandler, None)
# translate a pixel position on a screenshot image to a pixel position on the screen.
# pos = (x, y)
def get_screen_position(self, pos):
return (pos[0] + self.offset_x, pos[1] + self.offset_y)
Vision Class
import cv2 as cv
import numpy as np
class Vision:
# properties
needle_img = None
needle_w = 0
needle_h = 0
# constructor
def __init__(self, needle_img_path):
self.needle_img = needle_img_path
# Save the dimensions of the needle image
self.needle_w = self.needle_img.shape[1]
self.needle_h = self.needle_img.shape[0]
def match_keypoints(self, haystack_screenshot, name, min_match_count, patch_size=32):
orb = cv.ORB_create(edgeThreshold=0, patchSize=patch_size)
keypoints_needle, descriptors_needle = orb.detectAndCompute(self.needle_img, None)
orb2 = cv.ORB_create(edgeThreshold=0, patchSize=patch_size, nfeatures=2000)
keypoints_haystack, descriptors_haystack = orb2.detectAndCompute(haystack_screenshot, None)
FLANN_INDEX_LSH = 6
index_params = dict(algorithm=FLANN_INDEX_LSH, table_number=6, key_size=12, multi_probe_level=1)
search_params = dict(checks=50)
try:
flann = cv.FlannBasedMatcher(index_params, search_params)
matches = flann.knnMatch(descriptors_needle, descriptors_haystack, k=2)
except cv.error:
return None, None, [], []
# store all the good matches as per Lowe's ratio test.
good = []
points = []
for pair in matches:
if len(pair) == 2:
if pair[0].distance < 0.7*pair[1].distance:
good.append(pair[0])
if len(good) > min_match_count:
print(str(name) + ' - ' + '%03d keypoints matched - %03d' % (len(good), len(keypoints_needle)))
for match in good:
points.append(keypoints_haystack[match.trainIdx].pt)
return keypoints_needle, keypoints_haystack, good, points
I'm trying to detect these chevrons from four instances of bluestacks running the same game:
To make it a little easier at a glance to see the differences. These are the same chevrons zoomed. For some reason, bluestacks render them marginally different every time.
I've tried to use thresholding and different methods with OpenCV but I need to drop it so low I get too many false positives and the different methods don't seem to make a difference.
I'm using win32gui for my template:
class WindowCapture:
# properties
w = 0
h = 0
hwnd = None
cropped_x = 0
cropped_y = 0
offset_x = 0
offset_y = 0
# constructor
def __init__(self, window_name=None):
# find the handle for the window we want to capture.
# if no window name is given, capture the entire screen
if window_name is None:
self.hwnd = win32gui.GetDesktopWindow()
else:
self.hwnd = win32gui.FindWindow(None, window_name)
if not self.hwnd:
raise Exception('Window not found: {}'.format(window_name))
# get the window size
window_rect = win32gui.GetWindowRect(self.hwnd)
self.w = window_rect[2] - window_rect[0]
self.h = window_rect[3] - window_rect[1]
# account for the window border and titlebar and cut them off
border_pixels = 8
titlebar_pixels = 40
self.w = self.w - (border_pixels * 2)
self.h = self.h - titlebar_pixels - border_pixels
self.cropped_x = border_pixels
self.cropped_y = titlebar_pixels
# set the cropped coordinates offset so we can translate screenshot
# images into actual screen positions
self.offset_x = window_rect[0] + self.cropped_x
self.offset_y = window_rect[1] + self.cropped_y
def get_screenshot(self):
# get the window image data
wDC = win32gui.GetWindowDC(self.hwnd)
dcObj = win32ui.CreateDCFromHandle(wDC)
cDC = dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, self.w, self.h)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0, 0), (self.w, self.h), dcObj, (self.cropped_x, self.cropped_y), win32con.SRCCOPY)
# convert the raw data into a format opencv can read
# dataBitMap.SaveBitmapFile(cDC, 'debug.bmp')
signedIntsArray = dataBitMap.GetBitmapBits(True)
img = np.fromstring(signedIntsArray, dtype='uint8')
img.shape = (self.h, self.w, 4)
# free resources
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(self.hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
img = img[...,:3]
img = np.ascontiguousarray(img)
return img
#staticmethod
def list_window_names():
def winEnumHandler(hwnd, ctx):
if win32gui.IsWindowVisible(hwnd):
print(hex(hwnd), win32gui.GetWindowText(hwnd))
win32gui.EnumWindows(winEnumHandler, None)
# translate a pixel position on a screenshot image to a pixel position on the screen.
# pos = (x, y)
def get_screen_position(self, pos):
return (pos[0] + self.offset_x, pos[1] + self.offset_y)
And OpenCV and Numpy for my object detection:
class Vision:
# properties
needle_img = None
needle_w = 0
needle_h = 0
method = None
# constructor
def __init__(self, needle_img_path, method=cv.TM_CCOEFF_NORMED):
self.needle_img = cv.imread(needle_img_path, cv.IMREAD_UNCHANGED)
# Save the dimensions of the needle image
self.needle_w = self.needle_img.shape[1]
self.needle_h = self.needle_img.shape[0]
# There are 6 methods to choose from:
# TM_CCOEFF, TM_CCOEFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_SQDIFF, TM_SQDIFF_NORMED
self.method = method
def find(self, haystack_img, threshold=0.5, debug_mode=None):
# run the OpenCV algorithm
result = cv.matchTemplate(haystack_img, self.needle_img, self.method)
# Get the all the positions from the match result that exceed our threshold
locations = np.where(result >= threshold)
locations = list(zip(*locations[::-1]))
rectangles = []
for loc in locations:
rect = [int(loc[0]), int(loc[1]), self.needle_w, self.needle_h]
# Add every box to the list twice in order to retain single (non-overlapping) boxes
rectangles.append(rect)
rectangles.append(rect)
# Apply group rectangles
rectangles, weights = cv.groupRectangles(rectangles, groupThreshold=1, eps=0.5)
points = []
if len(rectangles):
line_color = (0, 255, 0)
line_type = cv.LINE_4
marker_color = (255, 0, 255)
marker_type = cv.MARKER_CROSS
# Loop over all the rectangles
for (x, y, w, h) in rectangles:
# Determine the center position
center_x = x + int(w/2)
center_y = y + int(h/2)
# Save the points
points.append((center_x, center_y))
if debug_mode == 'rectangles':
# Determine the box position
top_left = (x, y)
bottom_right = (x + w, y + h)
# Draw the box
cv.rectangle(haystack_img, top_left, bottom_right, color=line_color,
lineType=line_type, thickness=2)
elif debug_mode == 'points':
# Draw the center point
cv.drawMarker(haystack_img, (center_x, center_y),
color=marker_color, markerType=marker_type,
markerSize=40, thickness=2)
return points
And then I'm calling the match like this:
chevronsicon = Vision('chevrons.jpg')
# checks for chevrons
chevron = chevronsicon.find(screenshotCropped, 0.86, 'points')
# mouse controls
if chevrons :
py.moveTo(chevrons[0])
Thresholding and different OpenCV methods have worked every single time I've come across this issue previously. I was considering using a gaussian function to blur the edges and then canny to see if I could get a consistent match but I'm not sure if that's overworking what might be a very simple answer I'm simply not aware of.
Is there an easier way to detect these?
I need to make a screen capture of a specific application. If I grab the Windows desktop (hwnd = None), everything works, but I get a black screen when I try to grab the screen of a specific application, e.g. hwnd = win32gui.FindWindow(None, 'Albion Online Client').
import cv2 as cv
import numpy as np
import os
from time import time
import win32gui
import win32ui
import win32con
os.chdir(os.path.dirname(os.path.abspath(__file__)))
def window_capture():
w = 1920 # set this
h = 1080 # set this
#hwnd = None ### Everything works
hwnd = win32gui.FindWindow(None, 'Albion Online Client') ### Black Screen
wDC = win32gui.GetWindowDC(hwnd)
dcObj = win32ui.CreateDCFromHandle(wDC)
cDC = dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, w, h)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0, 0), (w, h), dcObj, (0, 0), win32con.SRCCOPY)
#Save screenshoot
#dataBitMap.SaveBitmapFile(cDC, 'debug.bmp' )
signedIntsArray = dataBitMap.GetBitmapBits(True)
img = np.fromstring(signedIntsArray, dtype='uint8')
img.shape = (h, w, 4)
# Free Resources
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
img = img[..., :3]
img = np.ascontiguousarray(img)
return img
# initialize the WindowCapture clas
loop_time = time()
while(True):
screenshot = window_capture()
cv.imshow('Computer Vision', screenshot)
# debug the loop rate
print('FPS {}'.format(1 / (time() - loop_time)))
loop_time = time()
# press 'q' with the output window focused to exit.
# waits 1 ms every loop to process key presses
if cv.waitKey(1) == ord('q'):
cv.destroyAllWindows()
break
print('Done.')
That's an OpenGL or Direct3D application. It doesn't draw to its own window. It creates textures and surfaces, and has the graphics card render its display list directly into the frame buffer.
I wrote this code to capture screen footage by taking screenshots everything second, however the OpenCv window cascades the screen when it's clicked on.
When you hover over the Opencv window, it shows me the screen without a hundred duplicates. What is exactly the bug? Picture:
The code for using the class I made:
from window_capture import Window_Capture
from time import time
import cv2 as cv
import numpy
loop_time = time()
wincap = Window_Capture()
while True:
screenshot = wincap.screenshare()
cv.imshow('Screen', screenshot)
print("FPS {}".format(round(1 / (time() - loop_time))))
loop_time = time()
if cv.waitKey(1) == ord('q'):
print("Capture Ended")
cv.destroyWindow(screenshot)
The class for screen capturing:
import pyautogui
import numpy
import cv2 as cv
from time import time
from PIL import ImageGrab
import win32gui, win32ui, win32con
from win32api import GetSystemMetrics
import time
class Window_Capture:
w = 0
h = 0
hwnd = None
def __init__(self, window_name=None):
# checking is windowname exists
if window_name is None:
print("No window set to capture: Setting to active Window")
time.sleep(3)
self.hwnd = win32gui.GetDesktopWindow()
print(win32gui.GetWindowText(win32gui.GetDesktopWindow()))
else:
# self.hwnd = win32gui.FindWindow(None, window_name)
self.hwnd = win32gui.FindWindow(None, window_name)
print("Window Found: Setting to preferred Window")
# define monitor height and width and hwnd
self.w = GetSystemMetrics(0)
self.h = GetSystemMetrics(1)
def screenshare(self):
wDC = win32gui.GetWindowDC(self.hwnd)
dcObj = win32ui.CreateDCFromHandle(wDC)
cDC = dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, self.w, self.h)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0, 0), (self.w, self.h), dcObj, (0, 0), win32con.SRCCOPY)
signedIntsArray = dataBitMap.GetBitmapBits(True)
img = numpy.frombuffer(signedIntsArray, dtype='uint8')
img.shape = (self.h, self.w, 4)
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(self.hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
img = img[..., :3]
img = numpy.ascontiguousarray(img)
return img
This bug is preventing me from moving onto the next part my image detection project.
Better solution is :
import time
import cv2
import numpy as np
from mss import mss
#import pyautogui
#width, height= pyautogui.size()
width=900
height=900
mon = {'top': 0, 'left': 0, 'width': width, 'height': height}
with mss() as sct:
# mon = sct.monitors[0]
while True:
last_time = time.time()
img = sct.grab(mon)
print('fps: {0}'.format(1 / (time.time()-last_time)))
cv2.imshow('test', np.array(img))
if cv2.waitKey(25) & 0xFF == ord('q'):
cv2.destroyAllWindows()
break
Note:
When you hover then you won't be seeing many windows beacuse you see the complete window without the opencv window opended and else you see many of them
you see a cascade because you take a screenshot of the window itself, repeatedly.
it's like standing between two mirrors.
Summary: How can I get the image data (a screenshot) of an application by its window name in Linux for further processing using Python 3.X
I need to do some image detection on GUIs running in both Linux and Windows. I have working code for Windows 10 (below) that gets an applications image data using the win32 API and puts it into a numpy array for using with OpenCV. I now need a similar approach for Linux.
Importantly, it would be greatly beneficial if the window did not have to be visible for the image data to be obtained (the windows will never be minimised, but they might be obscured by other applications, and I don't want to have to force the windows to be the active windows for the sake of what is essentially a screenshot)
I have come across this thread using the Gdk API, which works, but only for the application that invokes it. I have also read this thread, using PIL might be the way to go, but i'd appreciate some input on this matter
Example of what I need, working on Windows 10
import numpy as np
import win32gui, win32ui, win32con
import cv2 as cv
def get_img_data(window_name):
window = win32gui.FindWindow(None, window_name)
if not window:
raise Exception('Window not found: {}'.format(window_name))
# get the window size
window_rect = win32gui.GetWindowRect(window)
w = window_rect[2] - window_rect[0]
h = window_rect[3] - window_rect[1]
# account for the window border and titlebar and cut them off
border_pixels = 8
titlebar_pixels = 30
w = w - (border_pixels * 2)
h = h - titlebar_pixels - border_pixels
cropped_x = border_pixels
cropped_y = titlebar_pixels
# set the cropped coordinates offset so we can translate screenshot
# images into actual screen positions
offset_x = window_rect[0] + cropped_x
offset_y = window_rect[1] + cropped_y
wDC = win32gui.GetWindowDC(window)
dcObj = win32ui.CreateDCFromHandle(wDC)
cDC = dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, w, h)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0, 0), (w, h), dcObj, (cropped_x, cropped_y), win32con.SRCCOPY)
signedIntsArray = dataBitMap.GetBitmapBits(True)
img = np.fromstring(signedIntsArray, dtype='uint8')
img.shape = (h, w, 4)
# free resources
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(window, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
img = img[...,:3]
img = np.ascontiguousarray(img)
# cv.imwrite('./test.png', img) # uncomment to save the screenshot
return img
img_data = get_img_data('Task Manager')