I wish to collect the pixel location (row-i, col-i) by right-clicking the mouse when the image is displayed.
This is a simple example about a picture downloaded from the internet:
import urllib
import cv2
from win32api import GetSystemMetrics
path_image = urllib.urlretrieve("http://www.bellazon.com/main/uploads/monthly_06_2013/post-37737-0-06086500-1371727837.jpg", "local-filename.jpg")[0]
img = cv2.imread(path_image,0)
width = GetSystemMetrics(0)
height = GetSystemMetrics(1)
scale_width = width / img.shape[1]
scale_height = height / img.shape[0]
scale = min(scale_width, scale_height)
window_width = int(img.shape[1] * scale)
window_height = int(img.shape[0] * scale)
cv2.namedWindow('image', cv2.WINDOW_NORMAL)
cv2.resizeWindow('image', window_width, window_height)
cv2.imshow('image', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
At this point, I wish to understand the best way to collect and store the pixel locations in a list.
import urllib
import cv2
from win32api import GetSystemMetrics
#the [x, y] for each right-click event will be stored here
right_clicks = list()
#this function will be called whenever the mouse is right-clicked
def mouse_callback(event, x, y, flags, params):
#right-click event value is 2
if event == 2:
global right_clicks
#store the coordinates of the right-click event
right_clicks.append([x, y])
#this just verifies that the mouse data is being collected
#you probably want to remove this later
print right_clicks
path_image = urllib.urlretrieve("http://www.bellazon.com/main/uploads/monthly_06_2013/post-37737-0-06086500-1371727837.jpg", "local-filename.jpg")[0]
img = cv2.imread(path_image,0)
scale_width = 640 / img.shape[1]
scale_height = 480 / img.shape[0]
scale = min(scale_width, scale_height)
window_width = int(img.shape[1] * scale)
window_height = int(img.shape[0] * scale)
cv2.namedWindow('image', cv2.WINDOW_NORMAL)
cv2.resizeWindow('image', window_width, window_height)
#set mouse callback function for window
cv2.setMouseCallback('image', mouse_callback)
cv2.imshow('image', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
you can use an array or list to store the pixels location in it and also you can store the pixel value as well.
Here, I'm using python 3.x
You can follow the below code. In that code I'm performing the two mouse click events.
One is for getting the pixel location using left mouse click, and second is for getting the specific pixel value at particular location in RGB image.
I'm also storing the pixel location value in refPt variable.
See below is the code.
import cv2
import numpy as np
#This will display all the available mouse click events
events = [i for i in dir(cv2) if 'EVENT' in i]
print(events)
#This variable we use to store the pixel location
refPt = []
#click event function
def click_event(event, x, y, flags, param):
if event == cv2.EVENT_LBUTTONDOWN:
print(x,",",y)
refPt.append([x,y])
font = cv2.FONT_HERSHEY_SIMPLEX
strXY = str(x)+", "+str(y)
cv2.putText(img, strXY, (x,y), font, 0.5, (255,255,0), 2)
cv2.imshow("image", img)
if event == cv2.EVENT_RBUTTONDOWN:
blue = img[y, x, 0]
green = img[y, x, 1]
red = img[y, x, 2]
font = cv2.FONT_HERSHEY_SIMPLEX
strBGR = str(blue)+", "+str(green)+","+str(red)
cv2.putText(img, strBGR, (x,y), font, 0.5, (0,255,255), 2)
cv2.imshow("image", img)
#Here, you need to change the image name and it's path according to your directory
img = cv2.imread("D:/pictures/abc.jpg")
cv2.imshow("image", img)
#calling the mouse click event
cv2.setMouseCallback("image", click_event)
cv2.waitKey(0)
cv2.destroyAllWindows()
Note: One thing you need to remember that you have to put same name for the namedWindow. It should be same. In my code, I'm using the same name "image" for all the window.
You can do the same thing for multiple images as well. You just need to pass an list instead of single image.
If you want to store the pixel location in some text file, then you can also do it as follow:
Give the name of the variable where you are storing the pixel location value. I used refPt for storing the value. so, I used it here as follow:
import csv
with open("D:/pixelLocation.txt", 'w', newline='') as f:
w = csv.writer(f)
w.writerows(refPt)
Related
I am trying to write some code, where I want to save some images from the webcam on python using opencv and then I want to open each image, and on each image using a bounding box select a region of interest(ROI) that will be cropped out and save this cropped image in a new folder. For this, I am using the cv2.setMouseCallback and drawing a rectangle on each image. My code works, but the cropped images that are stored are very strange.
import cv2
# Opens the inbuilt camera of laptop to capture video.
cap = cv2.VideoCapture(0)
i = 0
training_images = []
def draw_bounding_box(click, x, y, flag_param, parameters):
global x_pt, y_pt, drawing, top_left_point, bottom_right_point, image
if click == cv2.EVENT_LBUTTONDOWN:
drawing = True
print("drawing="+str(drawing))
x_pt, y_pt = x, y
print("x_pt="+str(x_pt))
elif click == cv2.EVENT_MOUSEMOVE:
if drawing:
top_left_point, bottom_right_point = (x_pt,y_pt), (x,y)
image[y_pt:y, x_pt:x] = 255 - image[y_pt:y, x_pt:x]
cv2.rectangle(image, top_left_point, bottom_right_point, (0,255,0), 2)
elif click == cv2.EVENT_LBUTTONUP:
drawing = False
top_left_point, bottom_right_point = (x_pt,y_pt), (x,y)
copy[y_pt:y, x_pt:x] = 255 - copy[y_pt:y, x_pt:x]
cv2.rectangle(image, top_left_point, bottom_right_point, (0,255,0), 2)
bounding_box = (x_pt, y_pt, x-x_pt, y-y_pt)
cropped_im = image[y_pt:y,x_pt:x]
training_images.append(cropped_im)
if len(cropped_im)>0:
cv2.imshow("cropped",cropped_im)
cv2.imwrite('C:/Downloads/testingPictures/Frame'+str(i)+'.jpg',cropped_im)
cv2.waitKey(3000)
else:
print("could not save")
if __name__ == '__main__':
drawing = False
while(cap.isOpened() and i<10):
ret, frame = cap.read()
# This condition prevents from infinite looping
# incase video ends.
if ret == False:
break
# Save Frame by Frame into disk using imwrite method
cv2.imwrite('C:/Downloads/testingPictures/Frame'+str(i)+'.jpg', frame)
i += 1
print(i)
while(i>=1):
image = cv2.imread('C:/Users/shrut/Downloads/testingPictures/Frame'+str(i-1)+'.jpg')
copy = image.copy()
i = i-1
s = len(training_images)
cv2.namedWindow('Frame')
cv2.imshow('Frame', copy)
cv2.setMouseCallback('Frame', draw_bounding_box)
s_curr = len(training_images)
k = cv2.waitKey(5000) & 0xFF
if k == 27:
break
cv2.destroyAllWindows()
print(i)
print(len(training_images))
This is what the saved "cropped images" look like
(https://i.stack.imgur.com/KUrEs.jpg)
It looks like multiple bounding boxes superposed on the image. How can I avoid this and get the area from the image instead?
I am trying to write a python script that searching for an image on the screen using cv2.matchTemplate fucntion, and then extracting the x and y coordinates of that image and move the mouse to that location. But for some reason when I am running the code the mouse moves to the end of the screen and I do not understand why. Can someone help me figer it out?
this is my code:
import numpy as np
import mss
import cv2
import time
import pyautogui
# the templates to search for by order
TEMPLATES_BY_ORDER = ['EmailField.png',
'PasswordField.png', 'CheckBox.png', 'SignInButton.png']
def main():
time.sleep(5)
currentImage = 0
with mss.mss() as sct:
# Get the size of the primary monitor
monitor = {"top": 0, "left": 0, "width": pyautogui.size()[
0], "height": pyautogui.size()[1]}
while currentImage < 1:
# the path for the current template image
template_img_path = f"images/{TEMPLATES_BY_ORDER[currentImage]}"
# reading both the screen recording frame and the template image to search for
screen_frame = np.array(sct.grab(monitor))[:, :, :3]
template_img = cv2.imread(template_img_path, cv2.IMREAD_COLOR)
screen_frame_dementions = screen_frame.shape
resized_img = cv2.resize(
template_img, (screen_frame_dementions[1], screen_frame_dementions[0]))
# setting the next image in line
currentImage += 1
# matching the template to the screen record
result = cv2.matchTemplate(
screen_frame, resized_img, cv2.TM_CCOEFF_NORMED)
# getting the coordinates of the template in the recording
__, __, __, max_loc = cv2.minMaxLoc(result)
x, y = max_loc
pyautogui.moveTo(x, y)
if cv2.waitKey(25) & 0xFF == ord("q"):
cv2.destroyAllWindows()
break
if __name__ == "__main__":
main()
This is the screen that I am letting the code record and search in.
And this is the test.png image that I am trying to search for
I have prepared some code for it to lock when I get up from computer and get away from it
but when I use it with a casual algorithm, it turns off immediately because it does not detect my face in some movements. For this, I want it to wait 3 seconds when it does not detect my face, check it again and if it still does not detect my face, I want it to lock but when I use the time.sleep method, webcam video freezes and works as face does not exist even my face at camera, what kind of working algorithm do you suggest for this?
from multiprocessing.connection import wait
import cv2
import time
import pyautogui
import ctypes
from math import sin, cos, radians
camera = cv2.VideoCapture(0)
face = cv2.CascadeClassifier("haarcascade_frontalface_alt2.xml")
settings = {
'scaleFactor': 1.3,
'minNeighbors': 3,
'minSize': (50, 50),
'flags': cv2.CASCADE_FIND_BIGGEST_OBJECT|cv2.CASCADE_DO_ROUGH_SEARCH
}
def rotate_image(image, angle):
if angle == 0: return image
height, width = image.shape[:2]
rot_mat = cv2.getRotationMatrix2D((width/2, height/2), angle, 0.9)
result = cv2.warpAffine(image, rot_mat, (width, height), flags=cv2.INTER_LINEAR)
return result
def rotate_point(pos, img, angle):
if angle == 0: return pos
x = pos[0] - img.shape[1]*0.4
y = pos[1] - img.shape[0]*0.4
newx = x*cos(radians(angle)) + y*sin(radians(angle)) + img.shape[1]*0.4
newy = -x*sin(radians(angle)) + y*cos(radians(angle)) + img.shape[0]*0.4
return int(newx), int(newy), pos[2], pos[3]
while True:
ret, img = camera.read()
for angle in [0, -25, 25]:
rimg = rotate_image(img, angle)
detected = face.detectMultiScale(rimg, **settings)
if len(detected):
detected = [rotate_point(detected[-1], img, -angle)]
break
for x, y, w, h in detected[-1:]:
cv2.rectangle(img, (x, y), (x+w, y+h), (255,0,0), 2)
cv2.imshow('facedetect', img)
if cv2.waitKey(5) != -1:
break
if 0==(len(detected)):
time.sleep(3)
if 1==(len(detected)):
pass
else:
ctypes.windll.user32.LockWorkStation()
cv2.destroyWindow("facedetect")```
set a variable with the last timestamp where you wouldn't have detected a face. On every loop, if you detect your face again, set this variable to None, if this variable is not None and variable + 3secondes <= current timestamp, lock your station.
import time
unseen_from = None
while True:
# etc etc
detected = bool(detected) # empty list == False, True otherwise
if unseen_from is None:
detected = None if detected else time.time()
elif detected:
unseen_from = None
else if detected_from + 3 < time.time():
ctypes.windll.user32.LockWorkStation()
live coding, I don't have a windows to test this on, but the idea is there
I am trying to select the image corner's and crop it and then perform perspective transform on it.
but when I run this code, the window with an image opens up, but on double click, nothing happens so that I with double click, I could select image corners.
here's my code
from transform import four_point_transform
import imutils
from skimage.filters import threshold_adaptive
import numpy as np
import cv2
image = cv2.imread("test1.jpg")
ratio = image.shape[0] / 500.0
orig = image.copy()
window_name = "Select corner points of object"
cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
cv2.imshow(window_name, image)
pts_1 = []
def callback(event, x, y, flags, param):
if event == cv2.EVENT_LBUTTONDBLCLK:
pts_1.append((x, y))
cv2.circle(image,(x,y),10,(0,255,0),5)
cv2.imshow(window_name, image)
if(len(pts_1)==4):
pts = np.array(pts_1,dtype="float32")
print(pts_1)
warped = four_point_transform(orig, pts)
warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
warped = threshold_adaptive(warped, 251, offset = 10)
warped = warped.astype("uint8") * 255
cv2.imshow("Original", imutils.resize(orig, height = 650))
cv2.imshow("Scanned", imutils.resize(warped, height = 650))
cv2.waitKey(0)
cv2.setMouseCallback(window_name, callback)
key = cv2.waitKey(0)
Any help would be appreciated?
I have an image that is 6400 × 3200, while my screen is 1280 x 800. Therefore, the image needs to be resized for display only. I am using Python and OpenCV 2.4.9.
According to OpenCV Documentation,
If you need to show an image that is bigger than the screen resolution, you will need to call namedWindow("", WINDOW_NORMAL) before the imshow.
That is what I am doing, but the image is not fitted to the screen, only a portion is shown because it's too big. I've also tried with cv2.resizeWindow, but it doesn't make any difference.
import cv2
cv2.namedWindow("output", cv2.WINDOW_NORMAL) # Create window with freedom of dimensions
# cv2.resizeWindow("output", 400, 300) # Resize window to specified dimensions
im = cv2.imread("earth.jpg") # Read image
cv2.imshow("output", im) # Show image
cv2.waitKey(0) # Display the image infinitely until any keypress
Although I was expecting an automatic solution (fitting to the screen automatically), resizing solves the problem as well.
import cv2
cv2.namedWindow("output", cv2.WINDOW_NORMAL) # Create window with freedom of dimensions
im = cv2.imread("earth.jpg") # Read image
imS = cv2.resize(im, (960, 540)) # Resize image
cv2.imshow("output", imS) # Show image
cv2.waitKey(0) # Display the image infinitely until any keypress
The other answers perform a fixed (width, height) resize. If you wanted to resize to a specific size while maintaining aspect ratio, use this
def ResizeWithAspectRatio(image, width=None, height=None, inter=cv2.INTER_AREA):
dim = None
(h, w) = image.shape[:2]
if width is None and height is None:
return image
if width is None:
r = height / float(h)
dim = (int(w * r), height)
else:
r = width / float(w)
dim = (width, int(h * r))
return cv2.resize(image, dim, interpolation=inter)
Example
image = cv2.imread('img.png')
resize = ResizeWithAspectRatio(image, width=1280) # Resize by width OR
# resize = ResizeWithAspectRatio(image, height=1280) # Resize by height
cv2.imshow('resize', resize)
cv2.waitKey()
Use this for example:
cv2.namedWindow('finalImg', cv2.WINDOW_NORMAL)
cv2.imshow("finalImg",finalImg)
The only way resizeWindow worked for me was to have it after imshow. This is the order I'm using:
# Create a Named Window
cv2.namedWindow(win_name, cv2.WINDOW_NORMAL)
# Move it to (X,Y)
cv2.moveWindow(win_name, X, Y)
# Show the Image in the Window
cv2.imshow(win_name, image)
# Resize the Window
cv2.resizeWindow(win_name, width, height)
# Wait for <> miliseconds
cv2.waitKey(wait_time)
In OpenCV, cv2.namedWindow() just creates a window object, but doesn't resize the original image. You can use cv2.resize(img, resolution) to solve the problem.
Here's what it displays, a 740 * 411 resolution image.
image = cv2.imread("740*411.jpg")
cv2.imshow("image", image)
cv2.waitKey(0)
cv2.destroyAllWindows()
Here, it displays a 100 * 200 resolution image after resizing. Remember the resolution parameter use column first then is row.
image = cv2.imread("740*411.jpg")
image = cv2.resize(image, (200, 100))
cv2.imshow("image", image)
cv2.waitKey(0)
cv2.destroyAllWindows()
This code will resize the image so that it can retain it's aspect ratio and only ever take up a specified fraction of the screen area.
It will automatically adjust depending on your screen size and the size of the image.
Use the area variable to change the max screen area you want the image to be able to take up. The example shows it displayed at quarter the screen size.
import cv2
import tkinter as tk
from math import *
img = cv2.imread("test.jpg")
area = 0.25
h, w = img.shape[:2]
root = tk.Tk()
screen_h = root.winfo_screenheight()
screen_w = root.winfo_screenwidth()
vector = sqrt(area)
window_h = screen_h * vector
window_w = screen_w * vector
if h > window_h or w > window_w:
if h / window_h >= w / window_w:
multiplier = window_h / h
else:
multiplier = window_w / w
img = cv2.resize(img, (0, 0), fx=multiplier, fy=multiplier)
cv2.imshow("output", img)
cv2.waitKey(0)
I've also made a similar function where area is still a parameter but so is window height and window width.
If no area is input then it will use a defined height and width (window_h, window_w) of the window size you would like the image to fit inside.
If an input is given for all parameters then 'area' is prioritised.
import cv2
import tkinter as tk
from math import *
def resize_image(img, area=0.0, window_h=0, window_w=0):
h, w = img.shape[:2]
root = tk.Tk()
screen_h = root.winfo_screenheight()
screen_w = root.winfo_screenwidth()
if area != 0.0:
vector = math.sqrt(area)
window_h = screen_h * vector
window_w = screen_w * vector
if h > window_h or w > window_w:
if h / window_h >= w / window_w:
multiplier = window_h / h
else:
multiplier = window_w / w
img = cv2.resize(img, (0, 0), fx=multiplier, fy=multiplier)
return img
# using area
initial_image = cv2.imread("test.jpg")
resized_image = resize_image(initial_image, area=0.25))
cv2.imshow("output", resized_image)
cv2.waitKey(0)
# using window height and width
initial_image = cv2.imread("test.jpg")
resized_image = resize_image(initial_image, window_h = 480, window_w = 270))
cv2.imshow("output", resized_image)
cv2.waitKey(0)
Looks like opencv lib is pretty sensitive to parameters passed to the methods. The following code worked for me using opencv 4.3.0:
win_name = "visualization" # 1. use var to specify window name everywhere
cv2.namedWindow(win_name, cv2.WINDOW_NORMAL) # 2. use 'normal' flag
img = cv2.imread(filename)
h,w = img.shape[:2] # suits for image containing any amount of channels
h = int(h / resize_factor) # one must compute beforehand
w = int(w / resize_factor) # and convert to INT
cv2.resizeWindow(win_name, w, h) # use variables defined/computed BEFOREHAND
cv2.imshow(win_name, img)
Try this:
image = cv2.imread("img/Demo.jpg")
image = cv2.resize(image,(240,240))
The image is now resized. Displaying it will render in 240x240.
The cv2.WINDOW_NORMAL option works correctly but the first time it displays the window in an standard size.
If you resize the window like any other windows in your computer, by position the mouse over the edge of the window you want to resize and then drag the mouse to the position you want. If you do this to both width and height of the window to the size you want to obtain.
The following times you refresh the window, by executing the code, OpenCV will generate the window with the size of the last time it was shown or modified.
Try this code:
img = cv2.imread("Fab2_0.1 X 1.03MM GRID.jpg", cv2.IMREAD_GRAYSCALE)
image_scale_down = 3
x = (int)(img.shape[0]/image_scale_down)
y = (int)(img.shape[1]/image_scale_down)
image = cv2.resize(img, (x,y))
cv2.imshow("image_title", image)
cv2.waitKey(5000)
cv2.destroyAllWindows()
The most upvote answer is perfect !
I just add my code for those who want some "dynamic" resize handling depending of the ratio.
import cv2
from win32api import GetSystemMetrics
def get_resized_for_display_img(img):
screen_w, screen_h = GetSystemMetrics(0), GetSystemMetrics(1)
print("screen size",screen_w, screen_h)
h,w,channel_nbr = img.shape
# img get w of screen and adapt h
h = h * (screen_w / w)
w = screen_w
if h > screen_h: #if img h still too big
# img get h of screen and adapt w
w = w * (screen_h / h)
h = screen_h
w, h = w*0.9, h*0.9 # because you don't want it to be that big, right ?
w, h = int(w), int(h) # you need int for the cv2.resize
return cv2.resize(img, (w, h))
Try this code
img = cv2.resize(img,(1280,800))
Try with this code:
from PIL import Image
Image.fromarray(image).show()