How to explicitly access mjpeg backend for videocapture in opencv - python

When I execute following:
availableBackends = [cv2.videoio_registry.getBackendName(b) for b in cv2.videoio_registry.getBackends()]
print(availableBackends)
I get ['FFMPEG', 'GSTREAMER', 'INTEL_MFX', 'V4L2', 'CV_IMAGES', 'CV_MJPEG'].
If I now try:
print(cv2.CAP_FFMPEG)
print(cv2.CAP_GSTREAMER)
print(cv2.CAP_INTEL_MFX)
print(cv2.CAP_V4L2)
print(cv2.CAP_IMAGES)
print(cv2.CAP_MJPEG)
all work except the last one:
AttributeError: module 'cv2.cv2' has no attribute 'CAP_MJPEG'
How can I explicitly set cv2.CAP_MJPEG backend (cv2.CAP_CV_MJPEG does not work either)?

You can see all the flags here.
It looks like cv2.CAP_OPENCV_MJPEG is what you are looking for.
The following test creates MJPEG synthetic AVI video file, and reads the video using cv2.CAP_OPENCV_MJPEG backend:
import numpy as np
import cv2
#availableBackends = [cv2.videoio_registry.getBackendName(b) for b in cv2.videoio_registry.getBackends()]
#print(availableBackends)
print('cv2.CAP_OPENCV_MJPEG = ' + str(cv2.CAP_OPENCV_MJPEG))
intput_filename = 'vid.avi'
# Generate synthetic video files to be used as input:
###############################################################################
width, height, n_frames = 640, 480, 100 # 100 frames, resolution 640x480
# Use motion JPEG codec (for testing)
synthetic_out = cv2.VideoWriter(intput_filename, cv2.VideoWriter_fourcc(*'MJPG'), 25, (width, height))
for i in range(n_frames):
img = np.full((height, width, 3), 60, np.uint8)
cv2.putText(img, str(i+1), (width//2-100*len(str(i+1)), height//2+100), cv2.FONT_HERSHEY_DUPLEX, 10, (30, 255, 30), 20) # Green number
synthetic_out.write(img)
synthetic_out.release()
###############################################################################
# Read the video using CV_MJPEG backend
###############################################################################
cap = cv2.VideoCapture(intput_filename, cv2.CAP_OPENCV_MJPEG)
# Keep iterating break
while True:
ret, frame = cap.read() # Read frame from first video
if ret:
cv2.imshow('frame', frame) # Display frame for testing
cv2.waitKey(100) #Wait 100msec (for debugging)
else:
break
cap.release() #Release must be inside the outer loop
###############################################################################

Related

Opencv fails to save video when cropped using numpy slicing

I'm using OpenCV (3.4.6) to capture video from a Raspberry Pi HQ camera, attached to a Raspberry Pi 4 running Raspbian 10. Bounding boxes suggested by an object detection algorithm are then drawn onto the captured frames, and the output saved to a video file.
Everything works great, except when I try to crop the video to my region of interest.
Using the following line successfully crops the image (as confirmed by cv2.imshow):
frame = frame[0:720, 280:1000]
But, it also stops the output being saved by out.write - I just get an empty file, and being new to both Python and OpenCV, I'm struggling to work out why.
I've trimmed down the code to the relevant part below (apologies - it's still a bit long - I'm not entirely sure what might be important):
# begin video capture
try:
vid = cv2.VideoCapture(int(video_path), cv2.CAP_V4L2)
except:
vid = cv2.VideoCapture(video_path)
out = None
# set capture resolution
inputcodec = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G') # forces MJPG codec
vid.set(6, inputcodec) # sets inputcodec for video capture
vid.set(3, 1280) # sets video capture width
vid.set(4, 720) # sets video capture height
if FLAGS.output:
# by default VideoCapture returns float instead of int
width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(vid.get(cv2.CAP_PROP_FPS))
codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
while True:
return_value, frame = vid.read()
frame = frame[0:720, 280:1000] # crop video to square region of interest
if return_value:
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
image = Image.fromarray(frame)
else:
print('Video has ended or failed, try a different video format!')
break
frame_size = frame.shape[:2]
image_data = cv2.resize(frame, (input_size, input_size))
image_data = image_data / 255.
image_data = image_data[np.newaxis, ...].astype(np.float32)
start_time = time.time()
if FLAGS.framework == 'tflite':
interpreter.set_tensor(input_details[0]['index'], image_data)
interpreter.invoke()
pred = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))]
if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25,
input_shape=tf.constant([input_size, input_size]))
else:
boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25,
input_shape=tf.constant([input_size, input_size]))
else:
batch_data = tf.constant(image_data)
pred_bbox = infer(batch_data)
for key, value in pred_bbox.items():
boxes = value[:, :, 0:4]
pred_conf = value[:, :, 4:]
boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
scores=tf.reshape(
pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
max_output_size_per_class=50,
max_total_size=50,
iou_threshold=FLAGS.iou,
score_threshold=FLAGS.score
)
pred_bbox = [boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy()]
image = utils.draw_bbox(frame, pred_bbox)
fps = 1.0 / (time.time() - start_time)
print("FPS: %.2f" % fps)
result = np.asarray(image)
cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE)
result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
if not FLAGS.dont_show:
cv2.imshow("result", result)
if FLAGS.output:
out.write(result)
if cv2.waitKey(1) & 0xFF == ord('q'): break
cv2.destroyAllWindows()
Any suggestions for how to resolve this would be greatly appreciated.
You created the VideoWriter and set its width and height to 1280 by 720. Later you told it to save 720 by 720 crops. That is the problem. frame = frame[0:720, 280:1000] is a 720 by 720 array.
Decide what dimensions your video should have, and use them consistently. A video stream can't change resolution.

I captured my video to frames. how can i get it back as same as the original one

dear brothers i have a code to convert a sequence of images captured from a video file and i want to get it back to a video file as same as the original one without losing any pixel's position in these images because i made some changes in these pixels by Encryption method (pixel value difference) in these images so how can i do that? do you have any suggestions... thank you in advance.
import cv2
import os
#import math
#from moviepy.editor import *
import numpy as np
import glob
img_array = []
x = 0
for filename in glob.glob('./data/*.png'):
filename = './data/' + str(int(x)) + ".png"
img = cv2.imread(filename)
height, width, layers = img.shape
size = (width, height)
img_array.append(img)
x+=1
out = cv2.VideoWriter('project.mp4', cv2.VideoWriter_fourcc(*'mp4v'), 30, size)
x=0
for i in img_array:
out.write(img_array[x])
x+=1
out.release()
try:
if not os.path.exists('VideoPic'):
os.makedirs('VideoPic')
except OSError:
print('Error: Creating directory of data')
cap = cv2.VideoCapture('project.mp4')
frameRate = cap.get(30) # frame rate
x = 0
while(cap.isOpened()):
frameId = cap.get(0) #current frame number
ret, frame = cap.read()
if (ret != True):
break
else:
filename = './VideoPic/' + str(int(x)) + ".png";x+=1
cv2.imwrite(filename, frame)
cap.release()
cv2.destroyAllWindows()
Taken from here:
out = cv2.VideoWriter('project.avi',cv2.VideoWriter_fourcc(*'DIVX'), 15, size)
for i in range(len(img_array)):
out.write(img_array[i])
out.release()
does it don't work for you? it looks very similair to this, explain more in details what are the mismatches..

Speed up the creation of a circular ROI on a 4K 25 fps video in python opencv

I am working with a 2h-long 4K MP4 video shot at 25 fps. I am using openCV in python 3. From the video, I need to extract 3 circular ROIs.
Because of the large number of frames (212831), processing the whole video frame by frame with the code below takes over 24h on a 64 GB RAM workstation. Is there a way to speed up the processing or a workaround that does something slightly different to a similar effect?
Here is the code.
import numpy as np
import cv2
import sys
import time
# name of source video and paths
video = 'C0023_2hanalysis'
input_vidpath = 'path_to_video/' + video + '.MP4'
output_vidpath = 'path_to_video/' + video + '-withROI.MP4'
codec = 'avc1'
# set ROI coordinates extrapolated from last video frame as well as fixed parameters for analysis (radius)
x = 1188 # in pixels
y = 1204 # in pixels
radius = 75
# set parameters for output video as identical to input
fps = 25.0
scaling = 1.0 # output vs input video speed (?)
## import video
cap = cv2.VideoCapture(input_vidpath)
if cap.isOpened() == False:
sys.exit('Video file cannot be read! Please check input_vidpath to ensure it is correctly pointing to the video file')
## Video writer class to output video
fourcc = cv2.VideoWriter_fourcc(*codec) # concatenate the 4 chars to a fourcc code, i.e. the 4-char name of the codec used to compress the frames
# adjust output frame size to scaling if any is applied
#(frame shape is given as height,width , so the output needs to be re-ordered to match VideoWriter arguments)
o_height = cap.read()[1].shape[0]
o_width = cap.read()[1].shape[1]
output_framesize = (int(o_width*scaling),int(o_height*scaling))
out = cv2.VideoWriter(filename = output_vidpath, fourcc = 0x7634706d, fps = fps, frameSize = output_framesize, isColor = True)
## apply ROI frame by frame and thread them back into output video
start = time.time()
f = -1
last = 0
while(True):
# Capture frame-by-frame
ret, frame = cap.read() #'return' value (T/F) and frame
this = cap.get(1) # get 'CV_CAP_PROP_POS_FRAMES'
if ret == True:
#frame = cv2.resize(frame, None, fx = scaling, fy = scaling, interpolation = cv2.INTER_LINEAR) # no need to resize in this case
# Apply mask to area of interest
mask = np.zeros((o_height,o_width), np.uint8)
mask = cv2.circle(mask,(x,y),radius,255,thickness=-1) #image, row and column coord of centre of circle, radius, color (black), thickness
frame[mask == 0] = 0
out.write(frame)
key = cv2.waitKey(1) & 0xFF
# if the `q` key was pressed, break from the loop
if key == ord("q"):
break
f += 1
if f%1000==0:
print(f)
if last == this:
break
last = this
## When everything done, release the capture
cap.release()
out.release()
cv2.destroyAllWindows()
cv2.waitKey(1)
## End time and duration
end = time.time()
duration = end - start
print("--- %s seconds ---" %duration)
This is a common mistake. You shouldn't call waitKey(1) when you want to process the frames as fast possible. That function is basically add a short sleep after processing each frame, and that sleep time is much longer that processing time.
You just need to remove that, and still you can just kill the process if want to stop in the middle.
import numpy as np
import cv2
import sys
import time
# name of source video and paths
video = 'C0023_2hanalysis'
input_vidpath = 'path_to_video/' + video + '.MP4'
output_vidpath = 'path_to_video/' + video + '-withROI.MP4'
codec = 'avc1'
# set ROI coordinates extrapolated from last video frame as well as fixed parameters for analysis (radius)
x = 1188 # in pixels
y = 1204 # in pixels
radius = 75
# set parameters for output video as identical to input
fps = 25.0
scaling = 1.0 # output vs input video speed (?)
## import video
cap = cv2.VideoCapture(input_vidpath)
if cap.isOpened() == False:
sys.exit('Video file cannot be read! Please check input_vidpath to ensure it is correctly pointing to the video file')
## Video writer class to output video
fourcc = cv2.VideoWriter_fourcc(*codec) # concatenate the 4 chars to a fourcc code, i.e. the 4-char name of the codec used to compress the frames
# adjust output frame size to scaling if any is applied
#(frame shape is given as height,width , so the output needs to be re-ordered to match VideoWriter arguments)
o_height = cap.read()[1].shape[0]
o_width = cap.read()[1].shape[1]
output_framesize = (int(o_width*scaling),int(o_height*scaling))
out = cv2.VideoWriter(filename = output_vidpath, fourcc = 0x7634706d, fps = fps, frameSize = output_framesize, isColor = True)
## apply ROI frame by frame and thread them back into output video
start = time.time()
f = -1
last = 0
while(True):
# Capture frame-by-frame
ret, frame = cap.read() #'return' value (T/F) and frame
this = cap.get(1) # get 'CV_CAP_PROP_POS_FRAMES'
if ret == True:
#frame = cv2.resize(frame, None, fx = scaling, fy = scaling, interpolation = cv2.INTER_LINEAR) # no need to resize in this case
# Apply mask to area of interest
mask = np.zeros((o_height,o_width), np.uint8)
mask = cv2.circle(mask,(x,y),radius,255,thickness=-1) #image, row and column coord of centre of circle, radius, color (black), thickness
frame[mask == 0] = 0
out.write(frame)
f += 1
if f%1000==0:
print(f)
if last == this:
break
last = this
## When everything done, release the capture
cap.release()
out.release()
cv2.destroyAllWindows()
cv2.waitKey(1)
## End time and duration
end = time.time()
duration = end - start
print("--- %s seconds ---" %duration)

txt file won't be written in python in windows, it works on mac

I wrote a code that takes a video and recognizes face of a human and calculate the distance of the human from the camera (which is the eye of a robot). I need the data in a txt format so I am writing it in the distance_data.txt but my file is always empty.
I had run the code in a mac computer and it worked perfectly fine and the file was not empty.
import cv2
import time
import numpy as np
person_cascade = cv2.CascadeClassifier('human_recognition.txt')
cap = cv2.VideoCapture('oct23_2.avi')
# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('oct23_2out.avi',fourcc, 20.0, (640,360))
distance_data = open('distance_data.txt','w')
timer=-1
while (cap.isOpened()):
r, frame = cap.read()
if r:
frame = cv2.resize(frame,(640,360)) # Downscale to improve frame rate
gray_frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY) # Haar-cascade classifier needs a grayscale image
cv2.imshow('gray',gray_frame)
#print(gray_frame.type())
rects = person_cascade.detectMultiScale(gray_frame)
max=0
#print(rects)
(x, y, w, h) = rects[0]
cv2.rectangle(frame, (x,y), (x+w ,y+h),(0,255,0),2)
#fshape = frame.shape
#fheight = fshape[0]
#fwidth = fshape[1]
timer=timer+1
xpos=(0.03421)*(w*w)-8.1183*w+521.841
yn=360-y
heightinvid=0.0197377*yn-0.0194
realhieght=((-0.4901)*(heightinvid* heightinvid)+4.4269*heightinvid+8.30356)/ 0.927183854
horizontalp=x+w/2
#print(timer+1 , ",", xpos, )
#if (xpos<=41 and xpos>=40):
#print("height of the human is ", realhieght+149, "or", (-0.000184)*yn*yn+0.08506*yn+8.43461+149)
cm_deviation_in_x=-0.02033898*(320-horizontalp)
#print("Horizontal position of human is ", cm_deviation_in_x, " cm.")
newxpos = format (xpos, '.3f')
distance_data.write("Hi")
distance_data.write(str(newxpos))
distance_data.write(',')
distance_data.write(str(format(cm_deviation_in_x, '.3f')))
distance_data.write("\r\n")
#print(w)
#print("x position of the human is = ",xpos)
#fourcc = cv2.VideoWriter_fourcc(*'XVID')
#out = cv2.VideoWriter('output25.avi',fourcc, 20.0, (fwidth,fheight))
cv2.imshow("preview", frame)
out.write(frame)
k = cv2.waitKey(10)
#print(w,h)
if k & 0xFF == ord("q"): # Exit condition
break
distance_data.flush()
cap.release()
cv2.destroyAllWindows()

Send multiple frame to AWS rekognition

I'm trying to send pictures to the aws rekognition from my webcam to detect the activity of the person sitting in front of it using python.
To do so I take a picture every 5 seconds and I send it to the aws.
But when I do so it seems that he's always sending back information about the first frame that I sent
cap = cv2.VideoCapture(0)
while 1:
ret, img = cap.read()
client=boto3.client('rekognition')
print("hello")
ret, fileImg=cv2.imencode('.png',img)
response = client.detect_labels(Image={'Bytes':fileImg.tobytes()})
print('Detected labels for Camera Capture')
for label in response['Labels']:
print (label['Name'] + ' : ' + str(label['Confidence']))
sleep(5)
Here is the result i get from that call:
Detected labels for Camera Capture
Human : 99.1103897095
People : 99.1103744507
Person : 99.1103897095
Face : 56.5527687073
Crypt : 51.1719360352
hello
Detected labels for Camera Capture
Human : 99.0247421265
People : 99.0247344971
Person : 99.0247421265
Face : 57.7796173096
Lighting : 51.8473701477
Crypt : 51.08152771
hello
Detected labels for Camera Capture
Human : 99.0808181763
People : 99.0808105469
Person : 99.0808181763
Face : 56.4268836975
Lighting : 54.6302490234
Crypt : 50.8622779846
hello
Knowing during the time of the call the image has changed a lot and should (at least I think) show me other results.
Here's some code that I use to put rectangles around faces in a similar way:
import cv2
import numpy as np
import boto3
# Setup
scale_factor = .15
green = (0,255,0)
red = (0,0,255)
frame_thickness = 2
cap = cv2.VideoCapture(0)
rekognition = boto3.client('rekognition')
while(True):
# Capture frame-by-frame
ret, frame = cap.read()
height, width, channels = frame.shape
# Convert frame to jpg
small = cv2.resize(frame, (int(width * scale_factor), int(height * scale_factor)))
ret, buf = cv2.imencode('.jpg', small)
# Detect faces in jpg
faces = rekognition.detect_faces(Image={'Bytes':buf.tobytes()}, Attributes=['ALL'])
# Draw rectangle around faces
for face in faces['FaceDetails']:
smile = face['Smile']['Value']
cv2.rectangle(frame,
(int(face['BoundingBox']['Left']*width),
int(face['BoundingBox']['Top']*height)),
(int((face['BoundingBox']['Left']+face['BoundingBox']['Width'])*width),
int((face['BoundingBox']['Top']+face['BoundingBox']['Height'])*height)),
green if smile else red, frame_thickness)
# Display the resulting frame
cv2.imshow('frame', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()
It scales-down the picture because Rekognition doesn't need full size to detect faces.

Categories

Resources