Obtain image data of separate application using Python3 on Linux - python

Summary: How can I get the image data (a screenshot) of an application by its window name in Linux for further processing using Python 3.X
I need to do some image detection on GUIs running in both Linux and Windows. I have working code for Windows 10 (below) that gets an applications image data using the win32 API and puts it into a numpy array for using with OpenCV. I now need a similar approach for Linux.
Importantly, it would be greatly beneficial if the window did not have to be visible for the image data to be obtained (the windows will never be minimised, but they might be obscured by other applications, and I don't want to have to force the windows to be the active windows for the sake of what is essentially a screenshot)
I have come across this thread using the Gdk API, which works, but only for the application that invokes it. I have also read this thread, using PIL might be the way to go, but i'd appreciate some input on this matter
Example of what I need, working on Windows 10
import numpy as np
import win32gui, win32ui, win32con
import cv2 as cv
def get_img_data(window_name):
window = win32gui.FindWindow(None, window_name)
if not window:
raise Exception('Window not found: {}'.format(window_name))
# get the window size
window_rect = win32gui.GetWindowRect(window)
w = window_rect[2] - window_rect[0]
h = window_rect[3] - window_rect[1]
# account for the window border and titlebar and cut them off
border_pixels = 8
titlebar_pixels = 30
w = w - (border_pixels * 2)
h = h - titlebar_pixels - border_pixels
cropped_x = border_pixels
cropped_y = titlebar_pixels
# set the cropped coordinates offset so we can translate screenshot
# images into actual screen positions
offset_x = window_rect[0] + cropped_x
offset_y = window_rect[1] + cropped_y
wDC = win32gui.GetWindowDC(window)
dcObj = win32ui.CreateDCFromHandle(wDC)
cDC = dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, w, h)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0, 0), (w, h), dcObj, (cropped_x, cropped_y), win32con.SRCCOPY)
signedIntsArray = dataBitMap.GetBitmapBits(True)
img = np.fromstring(signedIntsArray, dtype='uint8')
img.shape = (h, w, 4)
# free resources
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(window, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
img = img[...,:3]
img = np.ascontiguousarray(img)
# cv.imwrite('./test.png', img) # uncomment to save the screenshot
return img
img_data = get_img_data('Task Manager')

Related

How do I get rid of black bar in python window?

I'm working on a Object Detection project for the game Cuphead using OpenCV and Python. Now I'm trying to capture objects in real time but when the detection window displays I get this rare black bar on the top and I don't know how to get rid of it, here's what I see, on the left my object detection window and in the right the Cuphead game window.
Here's the code for the class used for this:
import numpy as np
import win32gui, win32ui, win32con
class WindowCapture:
# define monitor's width and height
w = 0
h = 0
hwnd = None
# constructor
def __init__(self, window_name):
if window_name is None: # if we don't pass any window names capture desktop
self.hwnd = win32gui.GetDesktopWindow()
else:
# Find the game window
self.hwnd = win32gui.FindWindow(None, window_name)
if not self.hwnd:
raise Exception("Window not founnd: {}".format(window_name))
# define window's widht and height. the resolution we'll work with
window_rect = win32gui.GetWindowRect(self.hwnd)
self.w = window_rect[2] - window_rect[0]
self.h = window_rect[3] - window_rect[1]
def get_screenshot(self):
# get the window image data
wDC = win32gui.GetWindowDC(self.hwnd)
dcObj = win32ui.CreateDCFromHandle(wDC)
cDC = dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, self.w, self.h)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0,0), (self.w, self.h), dcObj, (0,0), win32con.SRCCOPY)
# create the screenshot image that we want to return to be processed
signedIntsArray = dataBitMap.GetBitmapBits(True)
img = np.fromstring(signedIntsArray, dtype='uint8')
img.shape = (self.h, self.w, 4)
# Free Resources
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(self.hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
# get rid of the alpha channel in the img
img = img[..., :3]
img = np.ascontiguousarray(img)
return img
It seems img.shape = (self.h, self.w, 4) causes the problem. As GetWindowRect and #IInspectable said,
In Windows Vista and later, the Window Rect now includes the area
occupied by the drop shadow.

Pywin32, win32gui window capture black screen

I need to make a screen capture of a specific application. If I grab the Windows desktop (hwnd = None), everything works, but I get a black screen when I try to grab the screen of a specific application, e.g. hwnd = win32gui.FindWindow(None, 'Albion Online Client').
import cv2 as cv
import numpy as np
import os
from time import time
import win32gui
import win32ui
import win32con
os.chdir(os.path.dirname(os.path.abspath(__file__)))
def window_capture():
w = 1920 # set this
h = 1080 # set this
#hwnd = None ### Everything works
hwnd = win32gui.FindWindow(None, 'Albion Online Client') ### Black Screen
wDC = win32gui.GetWindowDC(hwnd)
dcObj = win32ui.CreateDCFromHandle(wDC)
cDC = dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, w, h)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0, 0), (w, h), dcObj, (0, 0), win32con.SRCCOPY)
#Save screenshoot
#dataBitMap.SaveBitmapFile(cDC, 'debug.bmp' )
signedIntsArray = dataBitMap.GetBitmapBits(True)
img = np.fromstring(signedIntsArray, dtype='uint8')
img.shape = (h, w, 4)
# Free Resources
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
img = img[..., :3]
img = np.ascontiguousarray(img)
return img
# initialize the WindowCapture clas
loop_time = time()
while(True):
screenshot = window_capture()
cv.imshow('Computer Vision', screenshot)
# debug the loop rate
print('FPS {}'.format(1 / (time() - loop_time)))
loop_time = time()
# press 'q' with the output window focused to exit.
# waits 1 ms every loop to process key presses
if cv.waitKey(1) == ord('q'):
cv.destroyAllWindows()
break
print('Done.')
That's an OpenGL or Direct3D application. It doesn't draw to its own window. It creates textures and surfaces, and has the graphics card render its display list directly into the frame buffer.

OpenCv window cascading the screen when it's clicked on

I wrote this code to capture screen footage by taking screenshots everything second, however the OpenCv window cascades the screen when it's clicked on.
When you hover over the Opencv window, it shows me the screen without a hundred duplicates. What is exactly the bug? Picture:
The code for using the class I made:
from window_capture import Window_Capture
from time import time
import cv2 as cv
import numpy
loop_time = time()
wincap = Window_Capture()
while True:
screenshot = wincap.screenshare()
cv.imshow('Screen', screenshot)
print("FPS {}".format(round(1 / (time() - loop_time))))
loop_time = time()
if cv.waitKey(1) == ord('q'):
print("Capture Ended")
cv.destroyWindow(screenshot)
The class for screen capturing:
import pyautogui
import numpy
import cv2 as cv
from time import time
from PIL import ImageGrab
import win32gui, win32ui, win32con
from win32api import GetSystemMetrics
import time
class Window_Capture:
w = 0
h = 0
hwnd = None
def __init__(self, window_name=None):
# checking is windowname exists
if window_name is None:
print("No window set to capture: Setting to active Window")
time.sleep(3)
self.hwnd = win32gui.GetDesktopWindow()
print(win32gui.GetWindowText(win32gui.GetDesktopWindow()))
else:
# self.hwnd = win32gui.FindWindow(None, window_name)
self.hwnd = win32gui.FindWindow(None, window_name)
print("Window Found: Setting to preferred Window")
# define monitor height and width and hwnd
self.w = GetSystemMetrics(0)
self.h = GetSystemMetrics(1)
def screenshare(self):
wDC = win32gui.GetWindowDC(self.hwnd)
dcObj = win32ui.CreateDCFromHandle(wDC)
cDC = dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, self.w, self.h)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0, 0), (self.w, self.h), dcObj, (0, 0), win32con.SRCCOPY)
signedIntsArray = dataBitMap.GetBitmapBits(True)
img = numpy.frombuffer(signedIntsArray, dtype='uint8')
img.shape = (self.h, self.w, 4)
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(self.hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
img = img[..., :3]
img = numpy.ascontiguousarray(img)
return img
This bug is preventing me from moving onto the next part my image detection project.
Better solution is :
import time
import cv2
import numpy as np
from mss import mss
#import pyautogui
#width, height= pyautogui.size()
width=900
height=900
mon = {'top': 0, 'left': 0, 'width': width, 'height': height}
with mss() as sct:
# mon = sct.monitors[0]
while True:
last_time = time.time()
img = sct.grab(mon)
print('fps: {0}'.format(1 / (time.time()-last_time)))
cv2.imshow('test', np.array(img))
if cv2.waitKey(25) & 0xFF == ord('q'):
cv2.destroyAllWindows()
break
Note:
When you hover then you won't be seeing many windows beacuse you see the complete window without the opencv window opended and else you see many of them
you see a cascade because you take a screenshot of the window itself, repeatedly.
it's like standing between two mirrors.

Taking fast screenshot Winapi and Opencv

I need to take very fast screenshots of a game for an OpenCV project I am working on. I can use PIL easily for example:
def take_screenshot1(hwnd):
rect = win32gui.GetWindowRect(hwnd)
img = ImageGrab.grab(bbox=rect)
img_np = np.array(img)
return cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
But it takes on average of 0.05 seconds which isn't fast enough for real time capture.
I can use the answer posted here, but that only saves the bitmap to a file. That is over 10 times faster than by using PIL, but I am unsure of any methods within OpenCV to convert it to a bgr/hsv image.
def take_screenshot(hwnd):
wDC = win32gui.GetWindowDC(hwnd)
dcObj=win32ui.CreateDCFromHandle(wDC)
cDC=dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, 500, 500)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0, 0), (500, 500), dcObj, (0, 0), win32con.SRCCOPY)
dataBitMap.SaveBitmapFile(cDC, "foo.png")
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
im = cv2.imread("foo.png")
return cv2.cvtColor(im, cv2.COLOR_RGB2BGR)
EDIT:The size of the window is 500x500, so it is saving the same area in both examples.
Even if I save the image, and then reopen it with OpenCV it is still faster than PIL, but surely there is an easier way?
EDIT: Ok so using the comments and doing some research on winapi I now can access the bitmap data directly as follows:
def take_screenshot1(hwnd):
wDC = win32gui.GetWindowDC(hwnd)
dcObj=win32ui.CreateDCFromHandle(wDC)
cDC=dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, 500, 500)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0, 0), (500, 500), dcObj, (0, 0), win32con.SRCCOPY)
im = dataBitMap.GetBitmapBits(True) # Tried False also
img = np.array(im)
cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
print(img)
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(hwnd, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
But i'm not sure how to convert the returned bitmap to a form that OpenCV understands, as there are no methods to convert bitmap to rgb/bgr in OpenCV
I'll just show the code that works for me.
import time
import win32gui
import win32ui
import win32con
import win32api
import numpy as np
import cv2
def window_capture():
hwnd = 0
hwndDC = win32gui.GetWindowDC(hwnd)
mfcDC = win32ui.CreateDCFromHandle(hwndDC)
saveDC = mfcDC.CreateCompatibleDC()
saveBitMap = win32ui.CreateBitmap()
MoniterDev = win32api.EnumDisplayMonitors(None, None)
w = MoniterDev[0][2][2]
h = MoniterDev[0][2][3]
saveBitMap.CreateCompatibleBitmap(mfcDC, w, h)
saveDC.SelectObject(saveBitMap)
saveDC.BitBlt((0, 0), (w, h), mfcDC, (0, 0), win32con.SRCCOPY)
im = saveBitMap.GetBitmapBits(True) # Tried False also
img = np.frombuffer(im, dtype=np.uint8).reshape((h, w, 4))
cv2.imshow("demo", img)
cv2.waitKey(100)
beg = time.time()
for i in range(100):
window_capture()
end = time.time()
print(end - beg)
cv2.destroyAllWindows()

OpenCV & Python - Image too big to display

I have an image that is 6400 × 3200, while my screen is 1280 x 800. Therefore, the image needs to be resized for display only. I am using Python and OpenCV 2.4.9.
According to OpenCV Documentation,
If you need to show an image that is bigger than the screen resolution, you will need to call namedWindow("", WINDOW_NORMAL) before the imshow.
That is what I am doing, but the image is not fitted to the screen, only a portion is shown because it's too big. I've also tried with cv2.resizeWindow, but it doesn't make any difference.
import cv2
cv2.namedWindow("output", cv2.WINDOW_NORMAL) # Create window with freedom of dimensions
# cv2.resizeWindow("output", 400, 300) # Resize window to specified dimensions
im = cv2.imread("earth.jpg") # Read image
cv2.imshow("output", im) # Show image
cv2.waitKey(0) # Display the image infinitely until any keypress
Although I was expecting an automatic solution (fitting to the screen automatically), resizing solves the problem as well.
import cv2
cv2.namedWindow("output", cv2.WINDOW_NORMAL) # Create window with freedom of dimensions
im = cv2.imread("earth.jpg") # Read image
imS = cv2.resize(im, (960, 540)) # Resize image
cv2.imshow("output", imS) # Show image
cv2.waitKey(0) # Display the image infinitely until any keypress
The other answers perform a fixed (width, height) resize. If you wanted to resize to a specific size while maintaining aspect ratio, use this
def ResizeWithAspectRatio(image, width=None, height=None, inter=cv2.INTER_AREA):
dim = None
(h, w) = image.shape[:2]
if width is None and height is None:
return image
if width is None:
r = height / float(h)
dim = (int(w * r), height)
else:
r = width / float(w)
dim = (width, int(h * r))
return cv2.resize(image, dim, interpolation=inter)
Example
image = cv2.imread('img.png')
resize = ResizeWithAspectRatio(image, width=1280) # Resize by width OR
# resize = ResizeWithAspectRatio(image, height=1280) # Resize by height
cv2.imshow('resize', resize)
cv2.waitKey()
Use this for example:
cv2.namedWindow('finalImg', cv2.WINDOW_NORMAL)
cv2.imshow("finalImg",finalImg)
The only way resizeWindow worked for me was to have it after imshow. This is the order I'm using:
# Create a Named Window
cv2.namedWindow(win_name, cv2.WINDOW_NORMAL)
# Move it to (X,Y)
cv2.moveWindow(win_name, X, Y)
# Show the Image in the Window
cv2.imshow(win_name, image)
# Resize the Window
cv2.resizeWindow(win_name, width, height)
# Wait for <> miliseconds
cv2.waitKey(wait_time)
In OpenCV, cv2.namedWindow() just creates a window object, but doesn't resize the original image. You can use cv2.resize(img, resolution) to solve the problem.
Here's what it displays, a 740 * 411 resolution image.
image = cv2.imread("740*411.jpg")
cv2.imshow("image", image)
cv2.waitKey(0)
cv2.destroyAllWindows()
Here, it displays a 100 * 200 resolution image after resizing. Remember the resolution parameter use column first then is row.
image = cv2.imread("740*411.jpg")
image = cv2.resize(image, (200, 100))
cv2.imshow("image", image)
cv2.waitKey(0)
cv2.destroyAllWindows()
This code will resize the image so that it can retain it's aspect ratio and only ever take up a specified fraction of the screen area.
It will automatically adjust depending on your screen size and the size of the image.
Use the area variable to change the max screen area you want the image to be able to take up. The example shows it displayed at quarter the screen size.
import cv2
import tkinter as tk
from math import *
img = cv2.imread("test.jpg")
area = 0.25
h, w = img.shape[:2]
root = tk.Tk()
screen_h = root.winfo_screenheight()
screen_w = root.winfo_screenwidth()
vector = sqrt(area)
window_h = screen_h * vector
window_w = screen_w * vector
if h > window_h or w > window_w:
if h / window_h >= w / window_w:
multiplier = window_h / h
else:
multiplier = window_w / w
img = cv2.resize(img, (0, 0), fx=multiplier, fy=multiplier)
cv2.imshow("output", img)
cv2.waitKey(0)
I've also made a similar function where area is still a parameter but so is window height and window width.
If no area is input then it will use a defined height and width (window_h, window_w) of the window size you would like the image to fit inside.
If an input is given for all parameters then 'area' is prioritised.
import cv2
import tkinter as tk
from math import *
def resize_image(img, area=0.0, window_h=0, window_w=0):
h, w = img.shape[:2]
root = tk.Tk()
screen_h = root.winfo_screenheight()
screen_w = root.winfo_screenwidth()
if area != 0.0:
vector = math.sqrt(area)
window_h = screen_h * vector
window_w = screen_w * vector
if h > window_h or w > window_w:
if h / window_h >= w / window_w:
multiplier = window_h / h
else:
multiplier = window_w / w
img = cv2.resize(img, (0, 0), fx=multiplier, fy=multiplier)
return img
# using area
initial_image = cv2.imread("test.jpg")
resized_image = resize_image(initial_image, area=0.25))
cv2.imshow("output", resized_image)
cv2.waitKey(0)
# using window height and width
initial_image = cv2.imread("test.jpg")
resized_image = resize_image(initial_image, window_h = 480, window_w = 270))
cv2.imshow("output", resized_image)
cv2.waitKey(0)
Looks like opencv lib is pretty sensitive to parameters passed to the methods. The following code worked for me using opencv 4.3.0:
win_name = "visualization" # 1. use var to specify window name everywhere
cv2.namedWindow(win_name, cv2.WINDOW_NORMAL) # 2. use 'normal' flag
img = cv2.imread(filename)
h,w = img.shape[:2] # suits for image containing any amount of channels
h = int(h / resize_factor) # one must compute beforehand
w = int(w / resize_factor) # and convert to INT
cv2.resizeWindow(win_name, w, h) # use variables defined/computed BEFOREHAND
cv2.imshow(win_name, img)
Try this:
image = cv2.imread("img/Demo.jpg")
image = cv2.resize(image,(240,240))
The image is now resized. Displaying it will render in 240x240.
The cv2.WINDOW_NORMAL option works correctly but the first time it displays the window in an standard size.
If you resize the window like any other windows in your computer, by position the mouse over the edge of the window you want to resize and then drag the mouse to the position you want. If you do this to both width and height of the window to the size you want to obtain.
The following times you refresh the window, by executing the code, OpenCV will generate the window with the size of the last time it was shown or modified.
Try this code:
img = cv2.imread("Fab2_0.1 X 1.03MM GRID.jpg", cv2.IMREAD_GRAYSCALE)
image_scale_down = 3
x = (int)(img.shape[0]/image_scale_down)
y = (int)(img.shape[1]/image_scale_down)
image = cv2.resize(img, (x,y))
cv2.imshow("image_title", image)
cv2.waitKey(5000)
cv2.destroyAllWindows()
The most upvote answer is perfect !
I just add my code for those who want some "dynamic" resize handling depending of the ratio.
import cv2
from win32api import GetSystemMetrics
def get_resized_for_display_img(img):
screen_w, screen_h = GetSystemMetrics(0), GetSystemMetrics(1)
print("screen size",screen_w, screen_h)
h,w,channel_nbr = img.shape
# img get w of screen and adapt h
h = h * (screen_w / w)
w = screen_w
if h > screen_h: #if img h still too big
# img get h of screen and adapt w
w = w * (screen_h / h)
h = screen_h
w, h = w*0.9, h*0.9 # because you don't want it to be that big, right ?
w, h = int(w), int(h) # you need int for the cv2.resize
return cv2.resize(img, (w, h))
Try this code
img = cv2.resize(img,(1280,800))
Try with this code:
from PIL import Image
Image.fromarray(image).show()

Categories

Resources