I'm trying to do face detection in a video using Google Vision API. I'm using the following code:
import argparse
import cv2
from google.cloud import vision
from PIL import Image, ImageDraw
def detect_face(face_file, max_results=4):
"""Uses the Vision API to detect faces in the given file.
Args:
face_file: A file-like object containing an image with faces.
Returns:
An array of Face objects with information about the picture.
"""
content = face_file.read()
# [START get_vision_service]
image = vision.Client().image(content=content)
# [END get_vision_service]
return image.detect_faces()
def highlight_faces(frame, faces, output_filename):
"""Draws a polygon around the faces, then saves to output_filename.
Args:
image: a file containing the image with the faces.
faces: a list of faces found in the file. This should be in the format
returned by the Vision API.
output_filename: the name of the image file to be created, where the
faces have polygons drawn around them.
"""
im = Image.open(frame)
draw = ImageDraw.Draw(im)
for face in faces:
box = [(bound.x_coordinate, bound.y_coordinate)
for bound in face.bounds.vertices]
draw.line(box + [box[0]], width=5, fill='#00ff00')
#im.save(output_filename)
def main(input_filename, max_results):
video_capture = cv2.VideoCapture(input_filename)
while True:
# Capture frame-by-frame
ret, frame = video_capture.read()
faces = detect_face(frame, max_results)
highlight_faces(frame, faces)
cv2.imshow('Video', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='Detects faces in the given image.')
parser.add_argument(
'input_image', help='the image you\'d like to detect faces in.')
parser.add_argument(
'--max-results', dest='max_results', default=4,
help='the max results of face detection.')
args = parser.parse_args()
main(args.input_image, args.max_results)
But I'm getting the error:
content = face_file.read() AttributeError: 'numpy.ndarray' object has
no attribute 'read'
The "frames" are getting read as numpy array. But don't know how to bypass them.
Can anyone please help me?
The detect_face function is expecting a file-like object to read the data from. One possible way to do this is to convert frame (of type numpy.ndarray) into an image, and put it into a buffer, which can then be read like a file.
For example, try making the following changes to your code:
## Add some imports.
import io
import numpy as np
...
def main(input_filename, max_results):
...
while True:
# Capture frame-by-frame
ret, frame = video_capture.read()
## Convert to an image, then write to a buffer.
image_from_frame = Image.fromarray(np.unit8(frame))
buffer = io.BytesIO()
image_from_frame.save(buffer, format='PNG')
buffer.seek(0)
## Use the buffer like a file.
faces = detect_face(buffer, max_results)
...
Note: There should be a way to use image_from_frame.tobytes() as image content in the vision API client, but I could not make it work.
Related
I am testing different background segmentation algorithm from the library pybgs. Unfortunately, I am facing an error that I don't understand.
The code is :
import cv2
import pybgs as bgs
video_path = "video.mp4"
# create VideoCapture object for further video processing
captured_video = cv2.VideoCapture(video_path)
# check video capture status
if not captured_video.isOpened:
print("Unable to open: " + video_path)
exit(0)
background_sub_method = bgs.SuBSENSE()
while True:
# read video frames
ret, frame = captured_video.read()
# check whether the frames have been grabbed
if not ret:
break
# pass the frame to the background subtractor
foreground_mask = background_sub_method.apply(frame)
# obtain the background without foreground mask
img_bg_model = background_sub_method.getBackgroundModel()
cv2.imshow("Initial Frame", frame)
cv2.imshow("FG Mask", foreground_mask)
cv2.imshow("Subtraction Result", img_bg_model)
key = cv2.waitKey(10)
if key == 27:
break
Except that the algorithm don't work properly, I get this error that I don't understand.
[ERROR:0#0.002] global /home/usr/opencv-4.x/modules/core/src/persistence.cpp (505) open Can't open file: './config/SuBSense.xml' in write mode
Failed to open ./config/SuBSense.xml
In my lib pybgs, I have a config folder but there is no SuBSense.xml file.
So I don't know where this error is from, where this SuBSense.xml file is suppose to be.
So I'm trying to decode a QR code image using code from this S.O. answer. Here's the adapted code:
import cv2
# Name of the QR Code Image file
filename = r"C:\temp\2021-12-14_162414.png"
# read the QRCODE image
image = cv2.imread(filename)
# initialize the cv2 QRCode detector
detector = cv2.QRCodeDetector()
# detect and decode
data, vertices_array, binary_qrcode = detector.detectAndDecode(image)
# if there is a QR code
# print the data
if vertices_array is not None:
print("QRCode data:")
print(data)
else:
print("There was some error")
(This is the whole program; I was still experimenting.)
The PNG file itself is really small, just 43 KB in size, with resolution of 290x290 (24 bpp) containing just the QR Code.
However, I keep getting the error:
Traceback (most recent call last):
File "C:/Repos/tesqr/decod-cv2.py", line 10, in <module>
data, vertices_array, binary_qrcode = detector.detectAndDecode(image)
cv2.error: OpenCV(4.5.4) D:\a\opencv-python\opencv-python\opencv\modules\core\src\alloc.cpp:73: error: (-4:Insufficient memory) Failed to allocate 54056250000 bytes in function 'cv::OutOfMemoryError'
Why is alloc.cpp asking for 54 GB of RAM ???
I'm new with OpenCV, so please help me troubleshoot what went wrong.
The library I'm using is:
$ pip3 freeze | grep opencv
opencv-contrib-python-headless==4.5.4.60
the input image:
Short Answer:
Try WeChatQRCode
Long Answer:
There are several open memory issues about decoding with QRCodeDetector. I hope that in future versions it will be fixed. Meanwhile you can try WeChatQRCode also from cv2.
WeChatQRCode includes two CNN-based models: A object detection model and a super resolution model. Object detection model is applied to detect QRCode with the bounding box. super resolution model is applied to zoom in QRCode when it is small.
Your code modified:
import cv2
# Name of the QR Code Image file
filename = "2021-12-14_162414.png"
# read the QRCODE image
image = cv2.imread(filename)
# initialize the cv2 QRCode detector
detector =cv2.wechat_qrcode_WeChatQRCode(detector_prototxt_path = "detect.prototxt", detector_caffe_model_path = "detect.caffemodel", super_resolution_prototxt_path = "sr.prototxt", super_resolution_caffe_model_path = "sr.caffemodel")
# detect and decode
data, vertices_array = detector.detectAndDecode(image)
# if there is a QR code
# print the data
if vertices_array is not None:
print("QRCode data:")
print(data)
else:
print("There was some error")
Output:
QRCode data:
('PK\x03\x04\n',)
As you can see, it needs prototxt and caffemodel files. You can find them here.
My goal is to capture some images from my laptop's webcam to use them later. The main problem is that cv2.imwrite doesn't seem to work, the directory is created succesfully but I cannot find a way to save the image. I am currently on Manjaro Linux. I've alredy checked that the frame is not empty and in other Python scripts I've been able to show the image properly, the only problem seems to be when I try to save the image.
Is there any other way to save the image or is something wrong with my code?
I have the following Python code:
import cv2 #opencv
import os
import time
import uuid
IMAGES_PATH = 'Tensorflor/workspace/images/collectedimages'
labels = ['hola', 'gracias', 'si', 'no', 'tequiero']
img_number = 15
for label in labels:
!mkdir {'Tensorflow/workspace/images/collectedimages/'+label}
cap = cv2.VideoCapture(0)
print('Collecting images for {}'.format(label))
time.sleep(5)
for numimg in range(img_number):
ret, frame = cap.read()
img_name = os.path.join(IMAGES_PATH, label, label+'.'+'{}.jpg'.format(str(uuid.uuid1())))
cv2.imwrite(img_name,frame)
cv2.imshow('Frame',frame)
time.sleep(2)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
I'm trying to convert heic file in jpeg importing also all metadadata (like gps info and other stuff), unfurtunately with the code below the conversion is ok but no metadata are stored on the jpeg file created.
Anyone can describe me what I need to add in the conversion method?
heif_file = pyheif.read("/transito/126APPLE_IMG_6272.HEIC")
image = Image.frombytes(
heif_file.mode,
heif_file.size,
heif_file.data,
"raw",
heif_file.mode,
heif_file.stride,
)
image.save("/transito/126APPLE_IMG_6272.JPEG", "JPEG")
Thanks, i found a solution, I hope can help others:
# Open the file
heif_file = pyheif.read(file_path_heic)
# Creation of image
image = Image.frombytes(
heif_file.mode,
heif_file.size,
heif_file.data,
"raw",
heif_file.mode,
heif_file.stride,
)
# Retrive the metadata
for metadata in heif_file.metadata or []:
if metadata['type'] == 'Exif':
exif_dict = piexif.load(metadata['data'])
# PIL rotates the image according to exif info, so it's necessary to remove the orientation tag otherwise the image will be rotated again (1° time from PIL, 2° from viewer).
exif_dict['0th'][274] = 0
exif_bytes = piexif.dump(exif_dict)
image.save(file_path_jpeg, "JPEG", exif=exif_bytes)
HEIF to JPEG:
from PIL import Image
import pillow_heif
if __name__ == "__main__":
pillow_heif.register_heif_opener()
img = Image.open("any_image.heic")
img.save("output.jpeg")
JPEG to HEIF:
from PIL import Image
import pillow_heif
if __name__ == "__main__":
pillow_heif.register_heif_opener()
img = Image.open("any_image.jpg")
img.save("output.heic")
Rotation (EXIF of XMP) will be removed automatically when needed.
Call to register_heif_opener can be replaced by importing pillow_heif.HeifImagePlugin instead of pillow_heif
Metadata can be edited in Pillow's "info" dictionary and will be saved when saving to HEIF.
Here is an other approach to convert iPhone HEIC images to JPG preserving exif data
Pyhton 3.9 (I'm on Rasperry PI 4 64 bit)
install pillow_heif (0.8.0)
And run following code and you'll find exif data in the new JPEG image.
The trick is to get the dictionary information. No additional conversion required.
This is sample code, built your own wrapper around.
from PIL import Image
import pillow_heif
# open the image file
heif_file = pillow_heif.read_heif("/mnt/pictures/test/IMG_0001.HEIC")
#create the new image
image = Image.frombytes(
heif_file.mode,
heif_file.size,
heif_file.data,
"raw",
heif_file.mode,
heif_file.stride,
)
print(heif_file.info.keys())
dictionary=heif_file.info
exif_dict=dictionary['exif']
# debug
print(exif_dict)
image.save('/tmp/test000.JPG', "JPEG", exif=exif_dict)
I have no problem getting the opencv face detection using haar feature based cascades working on saved images:
from PIL import Image
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
eye_cascade = cv2.CascadeClassifier('haarcascade_eye.xml')
img = cv2.imread('pic.png')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.3, 5)
but I can't figure out how to open a url image and pass it into face_cascade. I've been playing around with cStringIO, but I don't know what to do with it...
import cv2.cv as cv
import urllib, cStringIO
img = 'http://scontent-b.cdninstagram.com/hphotos-prn/t51.2885-15/10424498_582114441904402_1105042543_n.png'
file = cStringIO.StringIO(urllib.urlopen(img).read())
source = Image.open(file).convert("RGB")
bitmap = cv.CreateImageHeader(source.size, cv.IPL_DEPTH_8U, 3)
cv.SetData(bitmap, source.tostring())
cv.CvtColor(bitmap, bitmap, cv.CV_RGB2BGR)
is it possible to work with a numpy array instead?
source2 = Image.open(file)
imarr=numpy.array(source2,dtype=numpy.uint8)
I'm a beginner, so I apologize for the poor explanation.
thanks a lot in advance!!
In your first example you are using OpenCV2.imread to read your image in the second you are presumably using PIL.Image then trying to convert.
Why not simply save the file to a temp directory and then use OpenCV2.imread again?
Or in another way you can use VideoCapture() class to open url image.
See the C++ code below,
VideoCapture cap;
if(!cap.open("http://docs.opencv.org/trunk/_downloads/opencv-logo.png")){
cout<<"Cannot open image"<<endl;
return -1;
}
Mat src;
cap>>src;
imshow("src",src);
waitKey();