I have a function which grabs an image from a camera:
img = cam.read()
Now I want to read that image with tf.read_file,
but when I do img_raw = tf.read_file(img), it doesn't work.
This is the code for the camera class:
"""camera.py
This code implements the Camera class, which encapsulates code to
handle IP CAM, USB webcam or the Jetson onboard camera. The Camera
class is further extend to take either a video or an image file as
input.
"""
import time
import logging
import threading
import numpy as np
import cv2
def open_cam_rtsp(uri, width, height, latency):
"""Open an RTSP URI (IP CAM)."""
gst_str = ('rtspsrc location={} latency={} ! '
'rtph264depay ! h264parse ! omxh264dec ! '
'nvvidconv ! '
'video/x-raw, width=(int){}, height=(int){}, '
'format=(string)BGRx ! videoconvert ! '
'appsink').format(uri, latency, width, height)
return cv2.VideoCapture(gst_str, cv2.CAP_GSTREAMER)
def open_cam_usb(dev, width, height):
"""Open a USB webcam.
We want to set width and height here, otherwise we could just do:
return cv2.VideoCapture(dev)
"""
gst_str = ('v4l2src device=/dev/video{} ! '
'video/x-raw, width=(int){}, height=(int){}, '
'format=(string)RGB ! videoconvert ! '
'appsink').format(dev, width, height)
return cv2.VideoCapture(gst_str, cv2.CAP_GSTREAMER)
def open_cam_onboard(width, height):
"""Open the Jetson onboard camera.
On versions of L4T prior to 28.1, you might need to add
'flip-method=2' into gst_str.
"""
gst_str = ('nvcamerasrc ! '
'video/x-raw(memory:NVMM), '
'width=(int)2592, height=(int)1458, '
'format=(string)I420, framerate=(fraction)30/1 ! '
'nvvidconv ! '
'video/x-raw, width=(int){}, height=(int){}, '
'format=(string)BGRx ! videoconvert ! '
'appsink').format(width, height)
return cv2.VideoCapture(gst_str, cv2.CAP_GSTREAMER)
def grab_img(cam):
"""This 'grab_img' function is designed to be run in the sub-thread.
Once started, this thread continues to grab a new image and put it
into the global 'img_handle', until 'thread_running' is set to False.
"""
while cam.thread_running:
if cam.args.use_image:
assert cam.img_handle is not None, 'img_handle is empty in use_image case!'
# keep using the same img, no need to update it
time.sleep(0.01) # yield CPU to other threads
else:
_, cam.img_handle = cam.cap.read()
if cam.img_handle is None:
logging.warning('grab_img(): cap.read() returns None...')
break
cam.thread_running = False
class Camera():
"""Camera class which supports reading images from theses video sources:
1. Video file
2. Image (jpg, png, etc.) file, repeating indefinitely
3. RTSP (IP CAM)
4. USB webcam
5. Jetson onboard camera
"""
def __init__(self, args):
self.args = args
self.is_opened = False
self.thread_running = False
self.img_handle = None
self.img_width = 0
self.img_height = 0
self.cap = None
self.thread = None
def open(self):
"""Open camera based on command line arguments."""
assert self.cap is None, 'Camera is already opened!'
args = self.args
if args.use_file:
self.cap = cv2.VideoCapture(args.filename)
# ignore image width/height settings here
elif args.use_image:
self.cap = 'OK'
self.img_handle = cv2.imread(args.filename)
# ignore image width/height settings here
if self.img_handle is not None:
self.is_opened = True
self.img_height, self.img_width, _ = self.img_handle.shape
elif args.use_rtsp:
self.cap = open_cam_rtsp(
args.rtsp_uri,
args.image_width,
args.image_height,
args.rtsp_latency
)
elif args.use_usb:
self.cap = open_cam_usb(
args.video_dev,
args.image_width,
args.image_height
)
else: # by default, use the jetson onboard camera
self.cap = open_cam_onboard(
args.image_width,
args.image_height
)
if self.cap != 'OK':
if self.cap.isOpened():
# Try to grab the 1st image and determine width and height
_, img = self.cap.read()
if img is not None:
self.img_height, self.img_width, _ = img.shape
self.is_opened = True
def start(self):
assert not self.thread_running
self.thread_running = True
self.thread = threading.Thread(target=grab_img, args=(self,))
self.thread.start()
def stop(self):
self.thread_running = False
self.thread.join()
def read(self):
if self.args.use_image:
return np.copy(self.img_handle)
else:
return self.img_handle
def release(self):
assert not self.thread_running
if self.cap != 'OK':
self.cap.release()
So in another python script I want to use this camera class in order to grab a image and load it with tf.read_file:
from utils.camera import Camera
import numpy as np
import cv2
import tensorflow as tf
cam = Camera(args)
cam.open()
cam.start()
img = cam.read()
img_raw = tf.read_file(img)#here read_file expects a string but I am passing img which is an array
No need to call tf.read_file(img).
ret_val, img = cam.read()
img = tf.convert_to_tensor(img, dtype=tf.float32)
img = tf.image.resize(img, (HEIGHT, WIDTH))
images = tf.expand_dims(img, axis=0) / 255.0
Results = model.predict(images)
Related
My goal is to get 1280x1280 frame from nvarguscamerasrc. The problem is that nvarguscamerasrc scaled the 3840x2160 frame to 1280x720. The consequence is that the bottom of the frame is always black.
JetsonCamera.py
def gstreamer_pipeline(
# Issue: the sensor format used by Raspberry Pi 4B and NVIDIA Jetson Nano B01 are different
# in Raspberry Pi 4B, this command
# $ libcamera-still --width 1280 --height 1280 --mode 1280:1280
# uses sensor format 2328x1748.
# However, v4l2-ctl --list-formats-ext do not have such format.
capture_width=1920,
capture_height=1080,
display_width=640,
display_height=360,
framerate=21,
flip_method=0,
):
return (
"nvarguscamerasrc ! "
"video/x-raw(memory:NVMM), "
"width=(int)%d, height=(int)%d, "
"format=(string)NV12, framerate=(fraction)%d/1 ! "
"nvvidconv flip-method=%d ! "
"video/x-raw, width=(int)%d, height=(int)%d, format=(string)BGRx ! "
"videoconvert ! "
"video/x-raw, format=(string)BGR ! appsink"
% (
capture_width,
capture_height,
framerate,
flip_method,
display_width,
display_height,
)
)
class Camera(object):
frame_reader = None
cap = None
previewer = None
def __init__(self, width=640, height=360):
self.open_camera(width, height)
def open_camera(self, width=640, height=360):
self.cap = cv2.VideoCapture(gstreamer_pipeline(flip_method=0, display_width=width, display_height=height), cv2.CAP_GSTREAMER)
if not self.cap.isOpened():
raise RuntimeError("Failed to open camera!")
if self.frame_reader == None:
self.frame_reader = FrameReader(self.cap, "")
self.frame_reader.daemon = True
self.frame_reader.start()
def getFrame(self, timeout = None):
return self.frame_reader.getFrame(timeout)
class FrameReader(threading.Thread):
queues = []
_running = True
camera = None
def __init__(self, camera, name):
threading.Thread.__init__(self)
self.name = name
self.camera = camera
def run(self):
while self._running:
_, frame = self.camera.read()
while self.queues:
queue = self.queues.pop()
queue.put(frame)
def addQueue(self, queue):
self.queues.append(queue)
def getFrame(self, timeout = None):
queue = Queue(1)
self.addQueue(queue)
return queue.get(timeout = timeout)
def stop(self):
self._running = False
main.py
exit_ = False
if __name__ == "__main__":
camera = Camera(width=1280, height=1280)
while not exit_:
global frame
frame = camera.getFrame(2000)
cv2.imshow("Test", frame)
key = cv2.waitKey(1)
if key == ord('q'):
exit_ = True
I am trying to save images from the Raspi cameras connected to my Jetson nano. My Code is below. However, the code shows that it is saving the files, but no matter which method I try I cannot find the images. Thanks for your help. I've included a smaller snippet of just the while loop itself so it will be easier for you all to refer to.
While loop:
while True:
_ , left_image=left_camera.read()
_ , right_image=right_camera.read()
camera_images = np.hstack((left_image, right_image))
cv2.imshow("CSI Cameras", camera_images)
t1 = datetime.now()
cntdwn_timer = countdown - int ((t1-t2).total_seconds())
# If cowntdown is zero - let's record next image
if cntdwn_timer == -1:
counter += 1
filename = './scenes/scene_'+ str(counter) + 'x'+'_'+ '.png'
#img = cv2.imread(camera_images)
#cv2.imwrite(os.path.join(os.path.expanduser('~'),'CSI-Camera', filename), camera_images)
cv2.imwrite('/home/aryan/CSI-Camera/{}'.format(filename), camera_images)
print (' monkey'+filename)
t2 = datetime.now()
time.sleep(1)
cntdwn_timer = 0 # To avoid "-1" timer display
next
# This also acts as
keyCode = cv2.waitKey(30) & 0xFF
# Stop the program on the ESC key
if keyCode == 27:
break
left_camera.stop()
left_camera.release()
right_camera.stop()
right_camera.release()
cv2.destroyAllWindows()
import cv2
import threading
import numpy as np
import time
from datetime import datetime
# gstreamer_pipeline returns a GStreamer pipeline for capturing from the CSI camera
# Flip the image by setting the flip_method (most common values: 0 and 2)
# display_width and display_height determine the size of each camera pane in the window on the screen
left_camera = None
right_camera = None
#PiCam
# Photo session settings
total_photos = 30 # Number of images to take
countdown = 5 # Interval for count-down timer, seconds
font=cv2.FONT_HERSHEY_SIMPLEX # Cowntdown timer font
class CSI_Camera:
def __init__ (self) :
# Initialize instance variables
# OpenCV video capture element
self.video_capture = None
# The last captured image from the camera
self.frame = None
self.grabbed = False
# The thread where the video capture runs
self.read_thread = None
self.read_lock = threading.Lock()
self.running = False
def open(self, gstreamer_pipeline_string):
try:
self.video_capture = cv2.VideoCapture(
gstreamer_pipeline_string, cv2.CAP_GSTREAMER
)
except RuntimeError:
self.video_capture = None
print("Unable to open camera")
print("Pipeline: " + gstreamer_pipeline_string)
return
# Grab the first frame to start the video capturing
self.grabbed, self.frame = self.video_capture.read()
def start(self):
if self.running:
print('Video capturing is already running')
return None
# create a thread to read the camera image
if self.video_capture != None:
self.running=True
self.read_thread = threading.Thread(target=self.updateCamera)
self.read_thread.start()
return self
def stop(self):
self.running=False
self.read_thread.join()
def updateCamera(self):
# This is the thread to read images from the camera
while self.running:
try:
grabbed, frame = self.video_capture.read()
with self.read_lock:
self.grabbed=grabbed
self.frame=frame
except RuntimeError:
print("Could not read image from camera")
# FIX ME - stop and cleanup thread
# Something bad happened
def read(self):
with self.read_lock:
frame = self.frame.copy()
grabbed=self.grabbed
return grabbed, frame
def release(self):
if self.video_capture != None:
self.video_capture.release()
self.video_capture = None
# Now kill the thread
if self.read_thread != None:
self.read_thread.join()
# Currently there are setting frame rate on CSI Camera on Nano through gstreamer
# Here we directly select sensor_mode 3 (1280x720, 59.9999 fps)
def gstreamer_pipeline(
sensor_id=0,
sensor_mode=3,
capture_width=1280,
capture_height=720,
display_width=1280,
display_height=720,
framerate=30,
flip_method=0,
):
return (
"nvarguscamerasrc sensor-id=%d sensor-mode=%d ! "
"video/x-raw(memory:NVMM), "
"width=(int)%d, height=(int)%d, "
"format=(string)NV12, framerate=(fraction)%d/1 ! "
"nvvidconv flip-method=%d ! "
"video/x-raw, width=(int)%d, height=(int)%d, format=(string)BGRx ! "
"videoconvert ! "
"video/x-raw, format=(string)BGR ! appsink"
% (
sensor_id,
sensor_mode,
capture_width,
capture_height,
framerate,
flip_method,
display_width,
display_height,
)
)
def start_cameras():
left_camera = CSI_Camera()
left_camera.open(
gstreamer_pipeline(
sensor_id=0,
sensor_mode=3,
flip_method=0,
display_height=540,
display_width=960,
)
)
left_camera.start()
right_camera = CSI_Camera()
right_camera.open(
gstreamer_pipeline(
sensor_id=1,
sensor_mode=3,
flip_method=0,
display_height=540,
display_width=960,
)
)
right_camera.start()
cv2.namedWindow("CSI-AV Cameras", cv2.WINDOW_AUTOSIZE)
if (
not left_camera.video_capture.isOpened()
or not right_camera.video_capture.isOpened()
):
# Cameras did not open, or no camera attached
print("Unable to open any cameras")
# TODO: Proper Cleanup
SystemExit(0)
counter = 0
t2 = datetime.now()
#Main stuff here
while True:
_ , left_image=left_camera.read()
_ , right_image=right_camera.read()
camera_images = np.hstack((left_image, right_image))
cv2.imshow("CSI Cameras", camera_images)
t1 = datetime.now()
cntdwn_timer = countdown - int ((t1-t2).total_seconds())
# If cowntdown is zero - let's record next image
if cntdwn_timer == -1:
counter += 1
filename = './scenes/scene_'+ str(counter) + 'x'+'_'+ '.png'
#img = cv2.imread(camera_images)
#cv2.imwrite(os.path.join(os.path.expanduser('~'),'CSI-Camera', filename), camera_images)
cv2.imwrite('/home/aryan/CSI-Camera/{}'.format(filename), camera_images)
print (' monkey'+filename)
t2 = datetime.now()
time.sleep(1)
cntdwn_timer = 0 # To avoid "-1" timer display
next
# This also acts as
keyCode = cv2.waitKey(30) & 0xFF
# Stop the program on the ESC key
if keyCode == 27:
break
left_camera.stop()
left_camera.release()
right_camera.stop()
right_camera.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
start_cameras()
Put breakpoint on line where you want to save image. Inspect image you want to save:
Does it have data inside?
Does it have camera_images.shape ?
What is return value of cv2.imwrite function?
Does path you are trying to write really exist?
Did you appended .png or .jpg ?
I'm trying to create a stereo vision camera on my jetson nano with 2 raspi cameras. However, I can find a lot of information and code online regarding RasPi but not jetson nano. So for example let's say I have these 2 python programs, the first for starting both cameras on Jetson nano and the second for starting both cameras on RasPi. I'm quite new to all this, so it would be great to get some advice on how I could get started on this. Thanks!
Jetson (taken from JetsonHacks):
# MIT License
# Copyright (c) 2019,2020 JetsonHacks
# See license
# A very simple code snippet
# Using two CSI cameras (such as the Raspberry Pi Version 2) connected to a
# NVIDIA Jetson Nano Developer Kit (Rev B01) using OpenCV
# Drivers for the camera and OpenCV are included in the base image in JetPack 4.3+
# This script will open a window and place the camera stream from each camera in a window
# arranged horizontally.
# The camera streams are each read in their own thread, as when done sequentially there
# is a noticeable lag
# For better performance, the next step would be to experiment with having the window display
# in a separate thread
import cv2
import threading
import numpy as np
# gstreamer_pipeline returns a GStreamer pipeline for capturing from the CSI camera
# Flip the image by setting the flip_method (most common values: 0 and 2)
# display_width and display_height determine the size of each camera pane in the window on the screen
left_camera = None
right_camera = None
class CSI_Camera:
def __init__ (self) :
# Initialize instance variables
# OpenCV video capture element
self.video_capture = None
# The last captured image from the camera
self.frame = None
self.grabbed = False
# The thread where the video capture runs
self.read_thread = None
self.read_lock = threading.Lock()
self.running = False
def open(self, gstreamer_pipeline_string):
try:
self.video_capture = cv2.VideoCapture(
gstreamer_pipeline_string, cv2.CAP_GSTREAMER
)
except RuntimeError:
self.video_capture = None
print("Unable to open camera")
print("Pipeline: " + gstreamer_pipeline_string)
return
# Grab the first frame to start the video capturing
self.grabbed, self.frame = self.video_capture.read()
def start(self):
if self.running:
print('Video capturing is already running')
return None
# create a thread to read the camera image
if self.video_capture != None:
self.running=True
self.read_thread = threading.Thread(target=self.updateCamera)
self.read_thread.start()
return self
def stop(self):
self.running=False
self.read_thread.join()
def updateCamera(self):
# This is the thread to read images from the camera
while self.running:
try:
grabbed, frame = self.video_capture.read()
with self.read_lock:
self.grabbed=grabbed
self.frame=frame
except RuntimeError:
print("Could not read image from camera")
# FIX ME - stop and cleanup thread
# Something bad happened
def read(self):
with self.read_lock:
frame = self.frame.copy()
grabbed=self.grabbed
return grabbed, frame
def release(self):
if self.video_capture != None:
self.video_capture.release()
self.video_capture = None
# Now kill the thread
if self.read_thread != None:
self.read_thread.join()
# Currently there are setting frame rate on CSI Camera on Nano through gstreamer
# Here we directly select sensor_mode 3 (1280x720, 59.9999 fps)
def gstreamer_pipeline(
sensor_id=0,
sensor_mode=3,
capture_width=1280,
capture_height=720,
display_width=1280,
display_height=720,
framerate=30,
flip_method=0,
):
return (
"nvarguscamerasrc sensor-id=%d sensor-mode=%d ! "
"video/x-raw(memory:NVMM), "
"width=(int)%d, height=(int)%d, "
"format=(string)NV12, framerate=(fraction)%d/1 ! "
"nvvidconv flip-method=%d ! "
"video/x-raw, width=(int)%d, height=(int)%d, format=(string)BGRx ! "
"videoconvert ! "
"video/x-raw, format=(string)BGR ! appsink"
% (
sensor_id,
sensor_mode,
capture_width,
capture_height,
framerate,
flip_method,
display_width,
display_height,
)
)
def start_cameras():
left_camera = CSI_Camera()
left_camera.open(
gstreamer_pipeline(
sensor_id=0,
sensor_mode=3,
flip_method=0,
display_height=540,
display_width=960,
)
)
left_camera.start()
right_camera = CSI_Camera()
right_camera.open(
gstreamer_pipeline(
sensor_id=1,
sensor_mode=3,
flip_method=0,
display_height=540,
display_width=960,
)
)
right_camera.start()
cv2.namedWindow("CSI Cameras", cv2.WINDOW_AUTOSIZE)
if (
not left_camera.video_capture.isOpened()
or not right_camera.video_capture.isOpened()
):
# Cameras did not open, or no camera attached
print("Unable to open any cameras")
# TODO: Proper Cleanup
SystemExit(0)
while cv2.getWindowProperty("CSI Cameras", 0) >= 0 :
_ , left_image=left_camera.read()
_ , right_image=right_camera.read()
camera_images = np.hstack((left_image, right_image))
cv2.imshow("CSI Cameras", camera_images)
# This also acts as
keyCode = cv2.waitKey(30) & 0xFF
# Stop the program on the ESC key
if keyCode == 27:
break
left_camera.stop()
left_camera.release()
right_camera.stop()
right_camera.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
start_cameras()
RasPi (from https://github.com/realizator/stereopi-tutorial/blob/master/1_test.py):
# Copyright (C) 2019 Eugene Pomazov, <stereopi.com>, virt2real team
#
# This file is part of StereoPi tutorial scripts.
#
# StereoPi tutorial is free software: you can redistribute it
# and/or modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# StereoPi tutorial is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with StereoPi tutorial.
# If not, see <http://www.gnu.org/licenses/>.
#
# Most of this code is updated version of 3dberry.org project by virt2real
#
# Thanks to Adrian and http://pyimagesearch.com, as there are lot of
# code in this tutorial was taken from his lessons.
#
import picamera
from picamera import PiCamera
import time
import cv2
import numpy as np
import os
from datetime import datetime
# File for captured image
filename = './scenes/photo.png'
# Camera settimgs
cam_width = 1280
cam_height = 480
# Final image capture settings
scale_ratio = 0.5
# Camera resolution height must be dividable by 16, and width by 32
cam_width = int((cam_width+31)/32)*32
cam_height = int((cam_height+15)/16)*16
print ("Used camera resolution: "+str(cam_width)+" x "+str(cam_height))
# Buffer for captured image settings
img_width = int (cam_width * scale_ratio)
img_height = int (cam_height * scale_ratio)
capture = np.zeros((img_height, img_width, 4), dtype=np.uint8)
print ("Scaled image resolution: "+str(img_width)+" x "+str(img_height))
# Initialize the camera
camera = PiCamera(stereo_mode='side-by-side',stereo_decimate=False)
camera.resolution=(cam_width, cam_height)
camera.framerate = 20
camera.hflip = True
t2 = datetime.now()
counter = 0
avgtime = 0
# Capture frames from the camera
for frame in camera.capture_continuous(capture, format="bgra", use_video_port=True, resize=(img_width,img_height)):
counter+=1
t1 = datetime.now()
timediff = t1-t2
avgtime = avgtime + (timediff.total_seconds())
cv2.imshow("pair", frame)
key = cv2.waitKey(1) & 0xFF
t2 = datetime.now()
# if the `q` key was pressed, break from the loop and save last image
if key == ord("q") :
avgtime = avgtime/counter
print ("Average time between frames: " + str(avgtime))
print ("Average FPS: " + str(1/avgtime))
if (os.path.isdir("./scenes")==False):
os.makedirs("./scenes")
cv2.imwrite(filename, frame)
break
I'm trying to add a high FPS screen recorder to my application.
I use Python 3.7 on Windows.
The modules and methods I've tried are mss (python-mss) and d3dshot, but I'm still only achieving 15-19 FPS for a long video (more than 20 seconds).
The resolution I'm recording at is 1920 x 1080.
What is the best way to optimize screen recording? I've tried to use the multiprocessing library, but it seems like it's still not fast enough. I'm not sure I'm using it in the optimal way, what are some ways I could use it to improve processing performance?
Using OBS Studio, I'm able to get 30 FPS, no matter how long the video is. My objective is to achieve the same results with my own code.
Here is what I've written so far:
from multiprocessing import Process, Queue
from time import sleep, time
import cv2
import d3dshot
import numpy as np
def grab(queue):
d = d3dshot.create(capture_output="numpy", frame_buffer_size=500)
d.capture()
sleep(0.1)
c=0
begin = time()
while time() - begin < 30:
starter = time()
frame = d.get_latest_frame()
queue.put(frame)
c+=1
ender = time()
sleep(max(0, 1/60 - (ender -starter)))
# Tell the other worker to stop
queue.put(None)
final=time()
print(c/(final-begin))
d.stop()
def save(queue):
SCREEN_SIZE = 1920, 1080
# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'DIVX') # In Windows: DIVX
out = cv2.VideoWriter(r"output.avi",fourcc, 30.0, (SCREEN_SIZE))
# type: (Queue) -> None
last_img = None
while "there are screenshots":
img = queue.get()
if img is None:
break
if img is last_img:
continue
out.write(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
last_img = img
if __name__ == "__main__":
# The screenshots queue
queue = Queue() # type: Queue
# 2 processes: one for grabing and one for saving PNG files
Process(target=grab, args=(queue,)).start()
Process(target=save, args=(queue,)).start()
The goal is to capture a game, while performing automated keyboard and mouse actions.
I have faced the same problem in trying to get high speed recording for games. This was the fastest solution I was able to find for Windows. The code is using raw buffer objects and leads to around ~27 FPS. I cannot find the original post on which this code is based, but if someone finds it I will add the reference.
Note that the framerate will significantly increase if you make the region smaller than 1920x1080.
"""
Alternative screen capture device, when there is no camera of webcam connected
to the desktop.
"""
import logging
import sys
import time
import cv2
import numpy as np
if sys.platform == 'win32':
import win32gui, win32ui, win32con, win32api
else:
logging.warning(f"Screen capture is not supported on platform: `{sys.platform}`")
from collections import namedtuple
class ScreenCapture:
"""
Captures a fixed region of the total screen. If no region is given
it will take the full screen size.
region_ltrb: Tuple[int, int, int, int]
Specific region that has to be taken from the screen using
the top left `x` and `y`, bottom right `x` and `y` (ltrb coordinates).
"""
__region = namedtuple('region', ('x', 'y', 'width', 'height'))
def __init__(self, region_ltrb=None):
self.region = region_ltrb
self.hwin = win32gui.GetDesktopWindow()
# Time management
self._time_start = time.time()
self._time_taken = 0
self._time_average = 0.04
def __getitem__(self, item):
return self.screenshot()
def __next__(self):
return self.screenshot()
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
if exc_type and isinstance(exc_val, StopIteration):
return True
return False
#staticmethod
def screen_dimensions():
""" Retrieve total screen dimensions. """
left = win32api.GetSystemMetrics(win32con.SM_XVIRTUALSCREEN)
top = win32api.GetSystemMetrics(win32con.SM_YVIRTUALSCREEN)
height = win32api.GetSystemMetrics(win32con.SM_CYVIRTUALSCREEN)
width = win32api.GetSystemMetrics(win32con.SM_CXVIRTUALSCREEN)
return left, top, height, width
#property
def fps(self):
return int(1 / self._time_average) * (self._time_average > 0)
#property
def region(self):
return self._region
#property
def size(self):
return self._region.width, self._region.height
#region.setter
def region(self, value):
if value is None:
self._region = self.__region(*self.screen_dimensions())
else:
assert len(value) == 4, f"Region requires 4 input, x, y of left top, and x, y of right bottom."
left, top, x2, y2 = value
width = x2 - left + 1
height = y2 - top + 1
self._region = self.__region(*list(map(int, (left, top, width, height))))
def screenshot(self, color=None):
"""
Takes a part of the screen, defined by the region.
:param color: cv2.COLOR_....2...
Converts the created BGRA image to the requested image output.
:return: np.ndarray
An image of the region in BGRA values.
"""
left, top, width, height = self._region
hwindc = win32gui.GetWindowDC(self.hwin)
srcdc = win32ui.CreateDCFromHandle(hwindc)
memdc = srcdc.CreateCompatibleDC()
bmp = win32ui.CreateBitmap()
bmp.CreateCompatibleBitmap(srcdc, width, height)
memdc.SelectObject(bmp)
memdc.BitBlt((0, 0), (width, height), srcdc, (left, top), win32con.SRCCOPY)
signed_ints_array = bmp.GetBitmapBits(True)
img = np.frombuffer(signed_ints_array, dtype='uint8')
img.shape = (height, width, 4)
srcdc.DeleteDC()
memdc.DeleteDC()
win32gui.ReleaseDC(self.hwin, hwindc)
win32gui.DeleteObject(bmp.GetHandle())
# This makes sure that the FPS are taken in comparison to screenshots rates and vary only slightly.
self._time_taken, self._time_start = time.time() - self._time_start, time.time()
self._time_average = self._time_average * 0.95 + self._time_taken * 0.05
if color is not None:
return cv2.cvtColor(img, color)
return img
def show(self, screenshot=None):
""" Displays an image to the screen. """
image = screenshot if screenshot is not None else self.screenshot()
cv2.imshow('Screenshot', image)
if cv2.waitKey(1) & 0xff == ord('q'):
raise StopIteration
return image
def close(self):
""" Needs to be called before exiting when `show` is used, otherwise an error will occur. """
cv2.destroyWindow('Screenshot')
def scale(self, src: np.ndarray, size: tuple):
return cv2.resize(src, size, interpolation=cv2.INTER_LINEAR_EXACT)
def save(self, path, screenshot=None):
""" Store the current screenshot in the provided path. Full path, with img name is required.) """
image = screenshot if screenshot is not None else self.screenshot
cv2.imwrite(filename=path, img=image)
if __name__ == '__main__':
# Example usage when displaying.
with ScreenCapture((0, 0, 1920, 1080)) as capture:
for _ in range(100):
capture.show()
print(f"\rCapture framerate: {capture.fps}", end='')
# Example usage as generator.
start_time = time.perf_counter()
for frame, screenshot in enumerate(ScreenCapture((0, 0, 1920, 1080)), start=1):
print(f"\rFPS: {frame / (time.perf_counter() - start_time):3.0f}", end='')
Edit
I noticed some small mistake in the window show function, and the self.screenshot calls in the __getitem__ and __next__ method. These have been resolved.
Next to the for example using the ScreenCapture as a context manager, I added an example of using it as a generator.
I'm trying to save both, the depth and color images of the Intel Realsense D435i camera in a list of 300 images. Then I will use multiprocessing to save this chunk of 300 images onto my disk. But every time I try, the program successfully appends 15 images in the list and then I get this error:
Frame didn't arrived within 5000
I made sure I had the 64 bit version on python 3.6 installed and the camera streams perfectly well when I do not try to save the images in a list. The real-sense viewer works great too. I also tried with different resolutions and frame rates but it doesn't seem to work either. What is interesting is if I only save the color images, I will not get the same error, instead I will get the same color image over and over in the list.
if __name__ == '__main__':
pipeline = rs.pipeline()
config = rs.config()
config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30)
config.enable_stream(rs.stream.color, 1280, 720, rs.format.bgr8, 30)
profile = pipeline.start(config)
depth_sensor = profile.get_device().first_depth_sensor()
depth_sensor.set_option(
rs.option.visual_preset, 3
) # Set high accuracy for depth sensor
depth_scale = depth_sensor.get_depth_scale()
align_to = rs.stream.color
align = rs.align(align_to)
# Init variables
im_count = 0
image_chunk = []
image_chunk2 = []
# sentinel = True
try:
while True:
# Wait for a coherent pair of frames: depth and color
frames = pipeline.wait_for_frames()
aligned_frames = align.process(frames)
aligned_depth_frame = aligned_frames.get_depth_frame()
color_frame = aligned_frames.get_color_frame()
if not aligned_depth_frame or not color_frame:
print("problem here")
raise RuntimeError("Could not acquire depth or color frames.")
depth_image = np.asanyarray(aligned_depth_frame.get_data())
color_image = np.asanyarray(color_frame.get_data())
image_chunk.append(color_image)
image_chunk2.append(depth_image)
except Exception as e:
print(e)
finally:
# Stop streaming
pipeline.stop()
I simply need it to save 300 images in a row, that's all, so I am quite troubled as to what is causing this issue.
Holding onto the frame locks the memory, and eventually it hits a limit, which prevents acquiring more images. Even though you are creating an image, the data is still from the frame. You need to clone the image after you create it to release the link to the frame's memory.
depth_image = np.asanyarray(aligned_depth_frame.get_data())
color_image = np.asanyarray(color_frame.get_data())
depth_image = depth_image.copy()
color_image = color_image.copy()
image_chunk.append(color_image)
image_chunk2.append(depth_image)
Read more on frames and memory management here:
https://dev.intelrealsense.com/docs/frame-management
I created a wrapper class to extract the various elements out of the frame set that can't be recreated later. It's a bit heavy, but shows some common operations that might be helpful for others:
colorizer = None
align_to_depth = None
align_to_color = None
pointcloud = rs.pointcloud()
class IntelD435ImagePacket:
"""
Class that contains image and associated processing data.
"""
#property
def frame_id(self):
return self._frame_id
#property
def timestamp(self):
return self._timestamp
#property
def image_color(self):
return self._image_color
#property
def image_depth(self):
return self._image_depth
#property
def image_color_aligned(self):
return self._image_color_aligned
#property
def image_depth_aligned(self):
return self._image_depth_aligned
#property
def image_depth_colorized(self):
if not self._image_depth_colorized:
self._image_depth_colorized = cv2.applyColorMap(self.image_depth, cv2.COLORMAP_JET);
return self._image_depth_colorized
#property
def intrinsics(self):
return self._intrinsics
#property
def pointcloud(self):
return self._pointcloud
#property
def pointcloud_texture(self):
return self._pointcloud_texture
def _rs_intrinsics_to_opencv_matrix(self, rs_intrinsics):
fx = rs_intrinsics.fx
fy = rs_intrinsics.fy
cx = rs_intrinsics.ppx
cy = rs_intrinsics.ppy
s = 0 # skew
return np.array([fx, s, cx,
0, fy, cy,
0, 0, 1]).reshape(3, 3)
def __init__(self, frame_set, frame_id=None, timestamp=None, *args, **kwargs):
global colorizer
if not colorizer:
colorizer = rs.colorizer()
colorizer.set_option(rs.option.color_scheme, 0)
global align_to_depth
if not align_to_depth:
align_to_depth = rs.align(rs.stream.depth)
global align_to_color
if not align_to_color:
align_to_color = rs.align(rs.stream.color)
global pointcloud
if not pointcloud:
pointcloud = rs.pointcloud()
# Get intrinsics
profile = frame_set.get_profile()
video_stream_profile = profile.as_video_stream_profile()
rs_intrinsics = video_stream_profile.get_intrinsics()
self._intrinsics = self._rs_intrinsics_to_opencv_matrix(rs_intrinsics)
# Get pointcloud
depth_frame = frame_set.get_depth_frame()
color_frame = frame_set.get_color_frame()
pointcloud.map_to(color_frame)
points = pointcloud.calculate(depth_frame)
vtx = np.asanyarray(points.get_vertices())
points_arr = vtx.view(np.float32).reshape(vtx.shape + (-1,)).copy()
self._pointcloud = points_arr
# Get pointcloud texture mapping
tex = np.asanyarray(points.get_texture_coordinates())
color_map_arr = tex.view(np.float32).reshape(tex.shape + (-1,)).copy()
self._pointcloud_texture = color_map_arr
# Extract color image
color_frame = frame_set.get_color_frame()
self._image_color = np.asanyarray(color_frame.get_data()).copy()
# Extract depth image
depth_frame = frame_set.get_depth_frame()
self._image_depth = np.asanyarray(depth_frame.get_data()).copy()
# Align the color frame to depth frame and extract color image
color_frame_aligned = align_to_depth.process(frame_set).get_color_frame()
self._image_color_aligned = np.asanyarray(color_frame_aligned.get_data()).copy()
# Align the depth frame to color frame and extract depth image
depth_frame_aligned = align_to_color.process(frame_set).get_depth_frame()
self._image_depth_aligned = np.asanyarray(depth_frame_aligned.get_data()).copy()
self._image_depth_colorized = None
if frame_id:
self._frame_id = frame_id
else:
self._frame_id = frame_set.frame_number
if timestamp:
self._timestamp = timestamp
else:
self._timestamp = frame_set.timestamp
self.__dict__.update(kwargs)