I'm using OpenCV (3.4.6) to capture video from a Raspberry Pi HQ camera, attached to a Raspberry Pi 4 running Raspbian 10. Bounding boxes suggested by an object detection algorithm are then drawn onto the captured frames, and the output saved to a video file.
Everything works great, except when I try to crop the video to my region of interest.
Using the following line successfully crops the image (as confirmed by cv2.imshow):
frame = frame[0:720, 280:1000]
But, it also stops the output being saved by out.write - I just get an empty file, and being new to both Python and OpenCV, I'm struggling to work out why.
I've trimmed down the code to the relevant part below (apologies - it's still a bit long - I'm not entirely sure what might be important):
# begin video capture
try:
vid = cv2.VideoCapture(int(video_path), cv2.CAP_V4L2)
except:
vid = cv2.VideoCapture(video_path)
out = None
# set capture resolution
inputcodec = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G') # forces MJPG codec
vid.set(6, inputcodec) # sets inputcodec for video capture
vid.set(3, 1280) # sets video capture width
vid.set(4, 720) # sets video capture height
if FLAGS.output:
# by default VideoCapture returns float instead of int
width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(vid.get(cv2.CAP_PROP_FPS))
codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
while True:
return_value, frame = vid.read()
frame = frame[0:720, 280:1000] # crop video to square region of interest
if return_value:
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
image = Image.fromarray(frame)
else:
print('Video has ended or failed, try a different video format!')
break
frame_size = frame.shape[:2]
image_data = cv2.resize(frame, (input_size, input_size))
image_data = image_data / 255.
image_data = image_data[np.newaxis, ...].astype(np.float32)
start_time = time.time()
if FLAGS.framework == 'tflite':
interpreter.set_tensor(input_details[0]['index'], image_data)
interpreter.invoke()
pred = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))]
if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25,
input_shape=tf.constant([input_size, input_size]))
else:
boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25,
input_shape=tf.constant([input_size, input_size]))
else:
batch_data = tf.constant(image_data)
pred_bbox = infer(batch_data)
for key, value in pred_bbox.items():
boxes = value[:, :, 0:4]
pred_conf = value[:, :, 4:]
boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
scores=tf.reshape(
pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
max_output_size_per_class=50,
max_total_size=50,
iou_threshold=FLAGS.iou,
score_threshold=FLAGS.score
)
pred_bbox = [boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy()]
image = utils.draw_bbox(frame, pred_bbox)
fps = 1.0 / (time.time() - start_time)
print("FPS: %.2f" % fps)
result = np.asarray(image)
cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE)
result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
if not FLAGS.dont_show:
cv2.imshow("result", result)
if FLAGS.output:
out.write(result)
if cv2.waitKey(1) & 0xFF == ord('q'): break
cv2.destroyAllWindows()
Any suggestions for how to resolve this would be greatly appreciated.
You created the VideoWriter and set its width and height to 1280 by 720. Later you told it to save 720 by 720 crops. That is the problem. frame = frame[0:720, 280:1000] is a 720 by 720 array.
Decide what dimensions your video should have, and use them consistently. A video stream can't change resolution.
Related
I am using IDS camera functions which I need to capture some images whenever the camera receives an external trigger.
"ueye.is_SetExternalTrigger(hcam, ueye.IS_SET_TRIGGER_HI_LO)"
I can able to connect the camera while it was triggering but I could not save the images to my local memory. The documentation explains to us that images were saved to a memory buffer for every trigger change (HI_LO/LO_HI, etc..)
Does anybody can explain how to use "ueye.is_WaitForNextImage" & "ueye.is_CameraStatus" functions.
Can anybody have experience in these modules? just help me to use them.
I am defining the memory pointer as just "mem_ptr = ueye.c_mem_p()". Does anyone know how to define a specific memory pointer in any particular way and how to access it??
Thank You.
I have tried this one and some other ways also, but I cannot write everything here.
hCam = ueye.HIDS(0) #0: first available camera; 1-254: The camera with the specified camera ID
sInfo = ueye.SENSORINFO()
cInfo = ueye.CAMINFO()
pcImageMemory = ueye.c_mem_p()
MemID = ueye.int()
rectAOI = ueye.IS_RECT()
pitch = ueye.INT()
nBitsPerPixel = ueye.INT(24) #24: bits per pixel for color mode; take 8 bits per pixel for monochrome
channels = 3 #3: channels for color mode(RGB); take 1 channel for monochrome
m_nColorMode = ueye.INT() # Y8/RGB16/RGB24/REG32
bytes_per_pixel = int(nBitsPerPixel / 8)
nRet = ueye.is_InitCamera(hCam, None)
nRet = ueye.is_SetExternalTrigger(hCam, ueye.IS_SET_TRIGGER_HI_LO)
nRet = ueye.is_SetDisplayMode(hCam, ueye.IS_SET_DM_DIB)
nRet = ueye.is_AOI(hCam, ueye.IS_AOI_IMAGE_GET_AOI, rectAOI, ueye.sizeof(rectAOI))
width = rectAOI.s32Width
height = rectAOI.s32Height
nRet = ueye.is_AllocImageMem(hCam, width, height, nBitsPerPixel, pcImageMemory, MemID)
nRet = ueye.is_CaptureVideo(hCam, ueye.IS_DONT_WAIT)
d=0
while(nRet == ueye.IS_SUCCESS):
ueye.is_WaitForNextImage(hCam, 500, pcImageMemory, MemID)
array = ueye.get_data(pcImageMemory, width, height, nBitsPerPixel, pitch, copy=False)
frame = np.reshape(array,(height.value, width.value, bytes_per_pixel))
frame = cv2.resize(frame,(0,0),fx=0.5, fy=0.5)
cv2.imshow("SimpleLive_Python_uEye_OpenCV", frame)
filename = "images/file_%d.jpg"%d
cv2.imwrite(filename, img)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cv2.destroyAllWindows()
ret = ueye.is_StopLiveVideo(hcam, ueye.IS_FORCE_VIDEO_STOP)
ret = ueye.is_ExitCamera(hcam)
When I execute following:
availableBackends = [cv2.videoio_registry.getBackendName(b) for b in cv2.videoio_registry.getBackends()]
print(availableBackends)
I get ['FFMPEG', 'GSTREAMER', 'INTEL_MFX', 'V4L2', 'CV_IMAGES', 'CV_MJPEG'].
If I now try:
print(cv2.CAP_FFMPEG)
print(cv2.CAP_GSTREAMER)
print(cv2.CAP_INTEL_MFX)
print(cv2.CAP_V4L2)
print(cv2.CAP_IMAGES)
print(cv2.CAP_MJPEG)
all work except the last one:
AttributeError: module 'cv2.cv2' has no attribute 'CAP_MJPEG'
How can I explicitly set cv2.CAP_MJPEG backend (cv2.CAP_CV_MJPEG does not work either)?
You can see all the flags here.
It looks like cv2.CAP_OPENCV_MJPEG is what you are looking for.
The following test creates MJPEG synthetic AVI video file, and reads the video using cv2.CAP_OPENCV_MJPEG backend:
import numpy as np
import cv2
#availableBackends = [cv2.videoio_registry.getBackendName(b) for b in cv2.videoio_registry.getBackends()]
#print(availableBackends)
print('cv2.CAP_OPENCV_MJPEG = ' + str(cv2.CAP_OPENCV_MJPEG))
intput_filename = 'vid.avi'
# Generate synthetic video files to be used as input:
###############################################################################
width, height, n_frames = 640, 480, 100 # 100 frames, resolution 640x480
# Use motion JPEG codec (for testing)
synthetic_out = cv2.VideoWriter(intput_filename, cv2.VideoWriter_fourcc(*'MJPG'), 25, (width, height))
for i in range(n_frames):
img = np.full((height, width, 3), 60, np.uint8)
cv2.putText(img, str(i+1), (width//2-100*len(str(i+1)), height//2+100), cv2.FONT_HERSHEY_DUPLEX, 10, (30, 255, 30), 20) # Green number
synthetic_out.write(img)
synthetic_out.release()
###############################################################################
# Read the video using CV_MJPEG backend
###############################################################################
cap = cv2.VideoCapture(intput_filename, cv2.CAP_OPENCV_MJPEG)
# Keep iterating break
while True:
ret, frame = cap.read() # Read frame from first video
if ret:
cv2.imshow('frame', frame) # Display frame for testing
cv2.waitKey(100) #Wait 100msec (for debugging)
else:
break
cap.release() #Release must be inside the outer loop
###############################################################################
I am working with a 2h-long 4K MP4 video shot at 25 fps. I am using openCV in python 3. From the video, I need to extract 3 circular ROIs.
Because of the large number of frames (212831), processing the whole video frame by frame with the code below takes over 24h on a 64 GB RAM workstation. Is there a way to speed up the processing or a workaround that does something slightly different to a similar effect?
Here is the code.
import numpy as np
import cv2
import sys
import time
# name of source video and paths
video = 'C0023_2hanalysis'
input_vidpath = 'path_to_video/' + video + '.MP4'
output_vidpath = 'path_to_video/' + video + '-withROI.MP4'
codec = 'avc1'
# set ROI coordinates extrapolated from last video frame as well as fixed parameters for analysis (radius)
x = 1188 # in pixels
y = 1204 # in pixels
radius = 75
# set parameters for output video as identical to input
fps = 25.0
scaling = 1.0 # output vs input video speed (?)
## import video
cap = cv2.VideoCapture(input_vidpath)
if cap.isOpened() == False:
sys.exit('Video file cannot be read! Please check input_vidpath to ensure it is correctly pointing to the video file')
## Video writer class to output video
fourcc = cv2.VideoWriter_fourcc(*codec) # concatenate the 4 chars to a fourcc code, i.e. the 4-char name of the codec used to compress the frames
# adjust output frame size to scaling if any is applied
#(frame shape is given as height,width , so the output needs to be re-ordered to match VideoWriter arguments)
o_height = cap.read()[1].shape[0]
o_width = cap.read()[1].shape[1]
output_framesize = (int(o_width*scaling),int(o_height*scaling))
out = cv2.VideoWriter(filename = output_vidpath, fourcc = 0x7634706d, fps = fps, frameSize = output_framesize, isColor = True)
## apply ROI frame by frame and thread them back into output video
start = time.time()
f = -1
last = 0
while(True):
# Capture frame-by-frame
ret, frame = cap.read() #'return' value (T/F) and frame
this = cap.get(1) # get 'CV_CAP_PROP_POS_FRAMES'
if ret == True:
#frame = cv2.resize(frame, None, fx = scaling, fy = scaling, interpolation = cv2.INTER_LINEAR) # no need to resize in this case
# Apply mask to area of interest
mask = np.zeros((o_height,o_width), np.uint8)
mask = cv2.circle(mask,(x,y),radius,255,thickness=-1) #image, row and column coord of centre of circle, radius, color (black), thickness
frame[mask == 0] = 0
out.write(frame)
key = cv2.waitKey(1) & 0xFF
# if the `q` key was pressed, break from the loop
if key == ord("q"):
break
f += 1
if f%1000==0:
print(f)
if last == this:
break
last = this
## When everything done, release the capture
cap.release()
out.release()
cv2.destroyAllWindows()
cv2.waitKey(1)
## End time and duration
end = time.time()
duration = end - start
print("--- %s seconds ---" %duration)
This is a common mistake. You shouldn't call waitKey(1) when you want to process the frames as fast possible. That function is basically add a short sleep after processing each frame, and that sleep time is much longer that processing time.
You just need to remove that, and still you can just kill the process if want to stop in the middle.
import numpy as np
import cv2
import sys
import time
# name of source video and paths
video = 'C0023_2hanalysis'
input_vidpath = 'path_to_video/' + video + '.MP4'
output_vidpath = 'path_to_video/' + video + '-withROI.MP4'
codec = 'avc1'
# set ROI coordinates extrapolated from last video frame as well as fixed parameters for analysis (radius)
x = 1188 # in pixels
y = 1204 # in pixels
radius = 75
# set parameters for output video as identical to input
fps = 25.0
scaling = 1.0 # output vs input video speed (?)
## import video
cap = cv2.VideoCapture(input_vidpath)
if cap.isOpened() == False:
sys.exit('Video file cannot be read! Please check input_vidpath to ensure it is correctly pointing to the video file')
## Video writer class to output video
fourcc = cv2.VideoWriter_fourcc(*codec) # concatenate the 4 chars to a fourcc code, i.e. the 4-char name of the codec used to compress the frames
# adjust output frame size to scaling if any is applied
#(frame shape is given as height,width , so the output needs to be re-ordered to match VideoWriter arguments)
o_height = cap.read()[1].shape[0]
o_width = cap.read()[1].shape[1]
output_framesize = (int(o_width*scaling),int(o_height*scaling))
out = cv2.VideoWriter(filename = output_vidpath, fourcc = 0x7634706d, fps = fps, frameSize = output_framesize, isColor = True)
## apply ROI frame by frame and thread them back into output video
start = time.time()
f = -1
last = 0
while(True):
# Capture frame-by-frame
ret, frame = cap.read() #'return' value (T/F) and frame
this = cap.get(1) # get 'CV_CAP_PROP_POS_FRAMES'
if ret == True:
#frame = cv2.resize(frame, None, fx = scaling, fy = scaling, interpolation = cv2.INTER_LINEAR) # no need to resize in this case
# Apply mask to area of interest
mask = np.zeros((o_height,o_width), np.uint8)
mask = cv2.circle(mask,(x,y),radius,255,thickness=-1) #image, row and column coord of centre of circle, radius, color (black), thickness
frame[mask == 0] = 0
out.write(frame)
f += 1
if f%1000==0:
print(f)
if last == this:
break
last = this
## When everything done, release the capture
cap.release()
out.release()
cv2.destroyAllWindows()
cv2.waitKey(1)
## End time and duration
end = time.time()
duration = end - start
print("--- %s seconds ---" %duration)
I wrote a code that takes a video and recognizes face of a human and calculate the distance of the human from the camera (which is the eye of a robot). I need the data in a txt format so I am writing it in the distance_data.txt but my file is always empty.
I had run the code in a mac computer and it worked perfectly fine and the file was not empty.
import cv2
import time
import numpy as np
person_cascade = cv2.CascadeClassifier('human_recognition.txt')
cap = cv2.VideoCapture('oct23_2.avi')
# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('oct23_2out.avi',fourcc, 20.0, (640,360))
distance_data = open('distance_data.txt','w')
timer=-1
while (cap.isOpened()):
r, frame = cap.read()
if r:
frame = cv2.resize(frame,(640,360)) # Downscale to improve frame rate
gray_frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY) # Haar-cascade classifier needs a grayscale image
cv2.imshow('gray',gray_frame)
#print(gray_frame.type())
rects = person_cascade.detectMultiScale(gray_frame)
max=0
#print(rects)
(x, y, w, h) = rects[0]
cv2.rectangle(frame, (x,y), (x+w ,y+h),(0,255,0),2)
#fshape = frame.shape
#fheight = fshape[0]
#fwidth = fshape[1]
timer=timer+1
xpos=(0.03421)*(w*w)-8.1183*w+521.841
yn=360-y
heightinvid=0.0197377*yn-0.0194
realhieght=((-0.4901)*(heightinvid* heightinvid)+4.4269*heightinvid+8.30356)/ 0.927183854
horizontalp=x+w/2
#print(timer+1 , ",", xpos, )
#if (xpos<=41 and xpos>=40):
#print("height of the human is ", realhieght+149, "or", (-0.000184)*yn*yn+0.08506*yn+8.43461+149)
cm_deviation_in_x=-0.02033898*(320-horizontalp)
#print("Horizontal position of human is ", cm_deviation_in_x, " cm.")
newxpos = format (xpos, '.3f')
distance_data.write("Hi")
distance_data.write(str(newxpos))
distance_data.write(',')
distance_data.write(str(format(cm_deviation_in_x, '.3f')))
distance_data.write("\r\n")
#print(w)
#print("x position of the human is = ",xpos)
#fourcc = cv2.VideoWriter_fourcc(*'XVID')
#out = cv2.VideoWriter('output25.avi',fourcc, 20.0, (fwidth,fheight))
cv2.imshow("preview", frame)
out.write(frame)
k = cv2.waitKey(10)
#print(w,h)
if k & 0xFF == ord("q"): # Exit condition
break
distance_data.flush()
cap.release()
cv2.destroyAllWindows()
I'm trying to send pictures to the aws rekognition from my webcam to detect the activity of the person sitting in front of it using python.
To do so I take a picture every 5 seconds and I send it to the aws.
But when I do so it seems that he's always sending back information about the first frame that I sent
cap = cv2.VideoCapture(0)
while 1:
ret, img = cap.read()
client=boto3.client('rekognition')
print("hello")
ret, fileImg=cv2.imencode('.png',img)
response = client.detect_labels(Image={'Bytes':fileImg.tobytes()})
print('Detected labels for Camera Capture')
for label in response['Labels']:
print (label['Name'] + ' : ' + str(label['Confidence']))
sleep(5)
Here is the result i get from that call:
Detected labels for Camera Capture
Human : 99.1103897095
People : 99.1103744507
Person : 99.1103897095
Face : 56.5527687073
Crypt : 51.1719360352
hello
Detected labels for Camera Capture
Human : 99.0247421265
People : 99.0247344971
Person : 99.0247421265
Face : 57.7796173096
Lighting : 51.8473701477
Crypt : 51.08152771
hello
Detected labels for Camera Capture
Human : 99.0808181763
People : 99.0808105469
Person : 99.0808181763
Face : 56.4268836975
Lighting : 54.6302490234
Crypt : 50.8622779846
hello
Knowing during the time of the call the image has changed a lot and should (at least I think) show me other results.
Here's some code that I use to put rectangles around faces in a similar way:
import cv2
import numpy as np
import boto3
# Setup
scale_factor = .15
green = (0,255,0)
red = (0,0,255)
frame_thickness = 2
cap = cv2.VideoCapture(0)
rekognition = boto3.client('rekognition')
while(True):
# Capture frame-by-frame
ret, frame = cap.read()
height, width, channels = frame.shape
# Convert frame to jpg
small = cv2.resize(frame, (int(width * scale_factor), int(height * scale_factor)))
ret, buf = cv2.imencode('.jpg', small)
# Detect faces in jpg
faces = rekognition.detect_faces(Image={'Bytes':buf.tobytes()}, Attributes=['ALL'])
# Draw rectangle around faces
for face in faces['FaceDetails']:
smile = face['Smile']['Value']
cv2.rectangle(frame,
(int(face['BoundingBox']['Left']*width),
int(face['BoundingBox']['Top']*height)),
(int((face['BoundingBox']['Left']+face['BoundingBox']['Width'])*width),
int((face['BoundingBox']['Top']+face['BoundingBox']['Height'])*height)),
green if smile else red, frame_thickness)
# Display the resulting frame
cv2.imshow('frame', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()
It scales-down the picture because Rekognition doesn't need full size to detect faces.