Send multiple frame to AWS rekognition - python

I'm trying to send pictures to the aws rekognition from my webcam to detect the activity of the person sitting in front of it using python.
To do so I take a picture every 5 seconds and I send it to the aws.
But when I do so it seems that he's always sending back information about the first frame that I sent
cap = cv2.VideoCapture(0)
while 1:
ret, img = cap.read()
client=boto3.client('rekognition')
print("hello")
ret, fileImg=cv2.imencode('.png',img)
response = client.detect_labels(Image={'Bytes':fileImg.tobytes()})
print('Detected labels for Camera Capture')
for label in response['Labels']:
print (label['Name'] + ' : ' + str(label['Confidence']))
sleep(5)
Here is the result i get from that call:
Detected labels for Camera Capture
Human : 99.1103897095
People : 99.1103744507
Person : 99.1103897095
Face : 56.5527687073
Crypt : 51.1719360352
hello
Detected labels for Camera Capture
Human : 99.0247421265
People : 99.0247344971
Person : 99.0247421265
Face : 57.7796173096
Lighting : 51.8473701477
Crypt : 51.08152771
hello
Detected labels for Camera Capture
Human : 99.0808181763
People : 99.0808105469
Person : 99.0808181763
Face : 56.4268836975
Lighting : 54.6302490234
Crypt : 50.8622779846
hello
Knowing during the time of the call the image has changed a lot and should (at least I think) show me other results.

Here's some code that I use to put rectangles around faces in a similar way:
import cv2
import numpy as np
import boto3
# Setup
scale_factor = .15
green = (0,255,0)
red = (0,0,255)
frame_thickness = 2
cap = cv2.VideoCapture(0)
rekognition = boto3.client('rekognition')
while(True):
# Capture frame-by-frame
ret, frame = cap.read()
height, width, channels = frame.shape
# Convert frame to jpg
small = cv2.resize(frame, (int(width * scale_factor), int(height * scale_factor)))
ret, buf = cv2.imencode('.jpg', small)
# Detect faces in jpg
faces = rekognition.detect_faces(Image={'Bytes':buf.tobytes()}, Attributes=['ALL'])
# Draw rectangle around faces
for face in faces['FaceDetails']:
smile = face['Smile']['Value']
cv2.rectangle(frame,
(int(face['BoundingBox']['Left']*width),
int(face['BoundingBox']['Top']*height)),
(int((face['BoundingBox']['Left']+face['BoundingBox']['Width'])*width),
int((face['BoundingBox']['Top']+face['BoundingBox']['Height'])*height)),
green if smile else red, frame_thickness)
# Display the resulting frame
cv2.imshow('frame', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()
It scales-down the picture because Rekognition doesn't need full size to detect faces.

Related

Opencv fails to save video when cropped using numpy slicing

I'm using OpenCV (3.4.6) to capture video from a Raspberry Pi HQ camera, attached to a Raspberry Pi 4 running Raspbian 10. Bounding boxes suggested by an object detection algorithm are then drawn onto the captured frames, and the output saved to a video file.
Everything works great, except when I try to crop the video to my region of interest.
Using the following line successfully crops the image (as confirmed by cv2.imshow):
frame = frame[0:720, 280:1000]
But, it also stops the output being saved by out.write - I just get an empty file, and being new to both Python and OpenCV, I'm struggling to work out why.
I've trimmed down the code to the relevant part below (apologies - it's still a bit long - I'm not entirely sure what might be important):
# begin video capture
try:
vid = cv2.VideoCapture(int(video_path), cv2.CAP_V4L2)
except:
vid = cv2.VideoCapture(video_path)
out = None
# set capture resolution
inputcodec = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G') # forces MJPG codec
vid.set(6, inputcodec) # sets inputcodec for video capture
vid.set(3, 1280) # sets video capture width
vid.set(4, 720) # sets video capture height
if FLAGS.output:
# by default VideoCapture returns float instead of int
width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(vid.get(cv2.CAP_PROP_FPS))
codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
while True:
return_value, frame = vid.read()
frame = frame[0:720, 280:1000] # crop video to square region of interest
if return_value:
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
image = Image.fromarray(frame)
else:
print('Video has ended or failed, try a different video format!')
break
frame_size = frame.shape[:2]
image_data = cv2.resize(frame, (input_size, input_size))
image_data = image_data / 255.
image_data = image_data[np.newaxis, ...].astype(np.float32)
start_time = time.time()
if FLAGS.framework == 'tflite':
interpreter.set_tensor(input_details[0]['index'], image_data)
interpreter.invoke()
pred = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))]
if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25,
input_shape=tf.constant([input_size, input_size]))
else:
boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25,
input_shape=tf.constant([input_size, input_size]))
else:
batch_data = tf.constant(image_data)
pred_bbox = infer(batch_data)
for key, value in pred_bbox.items():
boxes = value[:, :, 0:4]
pred_conf = value[:, :, 4:]
boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
scores=tf.reshape(
pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
max_output_size_per_class=50,
max_total_size=50,
iou_threshold=FLAGS.iou,
score_threshold=FLAGS.score
)
pred_bbox = [boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy()]
image = utils.draw_bbox(frame, pred_bbox)
fps = 1.0 / (time.time() - start_time)
print("FPS: %.2f" % fps)
result = np.asarray(image)
cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE)
result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
if not FLAGS.dont_show:
cv2.imshow("result", result)
if FLAGS.output:
out.write(result)
if cv2.waitKey(1) & 0xFF == ord('q'): break
cv2.destroyAllWindows()
Any suggestions for how to resolve this would be greatly appreciated.
You created the VideoWriter and set its width and height to 1280 by 720. Later you told it to save 720 by 720 crops. That is the problem. frame = frame[0:720, 280:1000] is a 720 by 720 array.
Decide what dimensions your video should have, and use them consistently. A video stream can't change resolution.

How to explicitly access mjpeg backend for videocapture in opencv

When I execute following:
availableBackends = [cv2.videoio_registry.getBackendName(b) for b in cv2.videoio_registry.getBackends()]
print(availableBackends)
I get ['FFMPEG', 'GSTREAMER', 'INTEL_MFX', 'V4L2', 'CV_IMAGES', 'CV_MJPEG'].
If I now try:
print(cv2.CAP_FFMPEG)
print(cv2.CAP_GSTREAMER)
print(cv2.CAP_INTEL_MFX)
print(cv2.CAP_V4L2)
print(cv2.CAP_IMAGES)
print(cv2.CAP_MJPEG)
all work except the last one:
AttributeError: module 'cv2.cv2' has no attribute 'CAP_MJPEG'
How can I explicitly set cv2.CAP_MJPEG backend (cv2.CAP_CV_MJPEG does not work either)?
You can see all the flags here.
It looks like cv2.CAP_OPENCV_MJPEG is what you are looking for.
The following test creates MJPEG synthetic AVI video file, and reads the video using cv2.CAP_OPENCV_MJPEG backend:
import numpy as np
import cv2
#availableBackends = [cv2.videoio_registry.getBackendName(b) for b in cv2.videoio_registry.getBackends()]
#print(availableBackends)
print('cv2.CAP_OPENCV_MJPEG = ' + str(cv2.CAP_OPENCV_MJPEG))
intput_filename = 'vid.avi'
# Generate synthetic video files to be used as input:
###############################################################################
width, height, n_frames = 640, 480, 100 # 100 frames, resolution 640x480
# Use motion JPEG codec (for testing)
synthetic_out = cv2.VideoWriter(intput_filename, cv2.VideoWriter_fourcc(*'MJPG'), 25, (width, height))
for i in range(n_frames):
img = np.full((height, width, 3), 60, np.uint8)
cv2.putText(img, str(i+1), (width//2-100*len(str(i+1)), height//2+100), cv2.FONT_HERSHEY_DUPLEX, 10, (30, 255, 30), 20) # Green number
synthetic_out.write(img)
synthetic_out.release()
###############################################################################
# Read the video using CV_MJPEG backend
###############################################################################
cap = cv2.VideoCapture(intput_filename, cv2.CAP_OPENCV_MJPEG)
# Keep iterating break
while True:
ret, frame = cap.read() # Read frame from first video
if ret:
cv2.imshow('frame', frame) # Display frame for testing
cv2.waitKey(100) #Wait 100msec (for debugging)
else:
break
cap.release() #Release must be inside the outer loop
###############################################################################

How can i captured detected images of face in opencv to make a database?

I am trying to enroll the faces of the user in opencv for facial recognition. I have finihed the detection part but what i wanted to achieve is to save the detected faces.So basically waht i wanted to achieve is :
when i see in the webcam it automatically capture 20-30 or n no images and saves it locally.
Currently i am trying to simply save 1 image when clicked some key ,The program runs fine but nothing saves locally.Here is the code
import cv2
import os
cascade = cv2.CascadeClassifier("../haar-cascade-files-master/haar-cascade-files-master/haarcascade_frontalface_alt2.xml")
cap = cv2.VideoCapture(0)
while True:
ret,frame = cap.read(0)
# gray = cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY)
faces = cascade.detectMultiScale(frame,1.1,5)
orig = frame.copy()
for (x,y,w,h) in faces:
cv2.rectangle(frame,(x,y),(x+w,y+h),(255,255,0),4)
cv2.imshow("Video ",frame)
if cv2.waitKey(2) & 0xFF == 27:
break
elif cv2.waitKey(2) & 0xFF == ord('s'):
faceimg=frame[y:y+h,x:x+w]
cv2.imwrite("../images/facec.jpeg",faceimg)
# cv2.imshow("Crop Image",faceimg)
cap.release()
cv2.destroyAllWindows()
So whats wrong in the code and how can i save n no of images automatically.
If only one person is looking at the camera in a given scenario, you can use a counter.
N = 20
cnt = 0
while True:
...
...
# If the frame contains only one face
for (x,y,w,h) in faces:
cv2.rectangle(frame,(x,y),(x+w,y+h),(255,255,0),4)
faceimg=frame[y:y+h,x:x+w]
cv2.imwrite("../images/face_"+str(cnt)+".jpeg",faceimg)
cnt = cnt + 1
if cnt == N:
cnt = 0
...
...

txt file won't be written in python in windows, it works on mac

I wrote a code that takes a video and recognizes face of a human and calculate the distance of the human from the camera (which is the eye of a robot). I need the data in a txt format so I am writing it in the distance_data.txt but my file is always empty.
I had run the code in a mac computer and it worked perfectly fine and the file was not empty.
import cv2
import time
import numpy as np
person_cascade = cv2.CascadeClassifier('human_recognition.txt')
cap = cv2.VideoCapture('oct23_2.avi')
# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('oct23_2out.avi',fourcc, 20.0, (640,360))
distance_data = open('distance_data.txt','w')
timer=-1
while (cap.isOpened()):
r, frame = cap.read()
if r:
frame = cv2.resize(frame,(640,360)) # Downscale to improve frame rate
gray_frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY) # Haar-cascade classifier needs a grayscale image
cv2.imshow('gray',gray_frame)
#print(gray_frame.type())
rects = person_cascade.detectMultiScale(gray_frame)
max=0
#print(rects)
(x, y, w, h) = rects[0]
cv2.rectangle(frame, (x,y), (x+w ,y+h),(0,255,0),2)
#fshape = frame.shape
#fheight = fshape[0]
#fwidth = fshape[1]
timer=timer+1
xpos=(0.03421)*(w*w)-8.1183*w+521.841
yn=360-y
heightinvid=0.0197377*yn-0.0194
realhieght=((-0.4901)*(heightinvid* heightinvid)+4.4269*heightinvid+8.30356)/ 0.927183854
horizontalp=x+w/2
#print(timer+1 , ",", xpos, )
#if (xpos<=41 and xpos>=40):
#print("height of the human is ", realhieght+149, "or", (-0.000184)*yn*yn+0.08506*yn+8.43461+149)
cm_deviation_in_x=-0.02033898*(320-horizontalp)
#print("Horizontal position of human is ", cm_deviation_in_x, " cm.")
newxpos = format (xpos, '.3f')
distance_data.write("Hi")
distance_data.write(str(newxpos))
distance_data.write(',')
distance_data.write(str(format(cm_deviation_in_x, '.3f')))
distance_data.write("\r\n")
#print(w)
#print("x position of the human is = ",xpos)
#fourcc = cv2.VideoWriter_fourcc(*'XVID')
#out = cv2.VideoWriter('output25.avi',fourcc, 20.0, (fwidth,fheight))
cv2.imshow("preview", frame)
out.write(frame)
k = cv2.waitKey(10)
#print(w,h)
if k & 0xFF == ord("q"): # Exit condition
break
distance_data.flush()
cap.release()
cv2.destroyAllWindows()

Issue with OpenCV in Python

I am trying to run the following code on my Raspberry Pi, but it is giving me this error:
Traceback (most recent call last):
File "video_capture_thresh.py", line 59, in
main ()
File "video_capture_thresh.py", line 11, in main
crop = frame[180:320, 0:638]
TypeError: 'NoneType' object has no attribute 'getitem
import numpy as np
import cv2
#cap=cv2.VideoCapture(0)
cap = cv2.VideoCapture(1)
def main():
while(True):
# Capture frame-by-frame
ret, frame = cap.read()
# Our operations on the frame come here
crop = frame[180:320, 0:638]
crop2=cv2.cvtColor(crop,cv2.COLOR_BGR2GRAY)
th,crop2 = cv2.threshold(crop2,0,255, cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
previous = cv2.GaussianBlur(crop2, (5,5),0)
contours, hierarchy = cv2.findContours(crop2,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
cv2.rectangle(previous,(0,0),(638,140),(0,255,0),5)
i=0
for cnt in contours:
moments = cv2.moments(cnt) # Calculate moments
if moments['m00']!=0:
cx = int(moments['m10']/moments['m00']) # cx = M10/M00
cy = int(moments['m01']/moments['m00']) # cy = M01/M00
moment_area = moments['m00'] # Contour area from moment
contour_area = cv2.contourArea(cnt) # Contour area using in_built function
perimeter = cv2.arcLength(cnt,True)
cv2.drawContours(previous, [cnt], 0, (0,255,0), 3)
px = previous[cy,cx]
if px == 255 :
i=i+1
cv2.circle(previous,(cx,cy),5,(0,0,255),-1)
cv2.imshow("Previous",previous)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
main ()
cap.release()
cv2.destroyAllWindows()
Try adding a check to make sure you actually read in OK before you do your processing
ret, frame = cap.read()
if ret==True:
crop = frame[180:320, 0:638]
The method read
This is the most convenient method for reading video files or capturing data from decode and return the just grabbed frame. If no frames has been grabbed (camera has been disconnected, or there are no more frames in video file), the methods return false and the functions return NULL pointer.
Check if the image is captured from camera well.
What kind of camera are you using?
Note that cv2.VideoCapture does not work with Raspi module camera, it just works with usb webcam.

Categories

Resources