The program is written for the application and displays the broadcast from the camera video, however, I came across the fact that the output is in some unspecified encoding (format) look photo
class Worker1(QThread):
ImageUpdate = pyqtSignal(QImage)
def run(self):
self.ThreadActive = True
Capture = cv2.VideoCapture(0)
while self.ThreadActive:
face_cascade = cv2.CascadeClassifier('cascade/haarcascade_russian_plate_number.xml')
ret, frame = Capture.read()
if ret:
image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
with_cascade = face_cascade.detectMultiScale(image, 1.3, 7)
for i, (x, y, w, h) in enumerate(with_cascade):
roi_color = frame[y:y + h, x:x + w]
r = 300.0 / roi_color.shape[1]
dim = (400, int(roi_color.shape[0] * r))
resized = cv2.resize(roi_color, dim, interpolation=cv2.INTER_AREA)
w_resized = resized.shape[0]
h_resized = resized.shape[1]
frame[380:380 + w_resized, 235:235 + h_resized] = resized
convert_to_qt_format = QImage(frame.data, frame.shape[1], frame.shape[0], QImage.Format_RGB888)
self.ImageUpdate.emit(convert_to_qt_format)
def image_update_slot(self, Image):
self.ui.Cam.setPixmap(QPixmap.fromImage(Image))
self.ui.Cam.setScaledContents(True)
How can i fix this problem?
photo with problem:
Maybe there are problems with the image format, but if I change it, the programme will be closed
Related
I'm currently detecting emotion and I want to implement a 20-second interval before the program can recognize another emotion. I've tried timer threading but the program still continuously detecting emotion.
Here's the portion of my code where it detects emotion and where I implemented the timer threading:
def load_video(self, *args):
ret, frame = self.capture.read()
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = self.faceCascade.detectMultiScale(gray,
scaleFactor=1.2,
minNeighbors=10,
minSize=(64, 64),
flags=cv2.CASCADE_SCALE_IMAGE)
for (x,y,w,h) in faces:
cv2.rectangle(frame, (x, y), (x + w, y + h),(0,255,0), 2)
faceROI = gray[y:y+h,x:x+w]
resized_img = resize(faceROI, (128,64))
fd, hog_image = hog(resized_img, orientations=9, pixels_per_cell=(8, 8),cells_per_block=(2, 2),
visualize=True, multichannel=False)
hist = self.desc.describe(resized_img)
feat = np.hstack([fd,hist])
self.l = [feat]
self.recognition()
buffer = cv2.flip(frame, 0).tostring()
texture = Texture.create(size=(frame.shape[1], frame.shape[0]), colorfmt='bgr')
texture.blit_buffer(buffer, colorfmt='bgr', bufferfmt='ubyte')
self.image.texture = texture
def recognition(self):
threading.Timer(20.0, self.recognition).start() #20-Second Interval
print("The predicted image is : "+self.Categories[self.model.predict(self.l)[0]])
I am trying to set a time interval when predicting the face that has been detected in a live camera feed, however the camera is not showing up in the interface.
Here's my current code as of the moment:
def load_video(self, *args):
ret, frame = self.capture.read()
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
while(True):
faces = self.faceCascade.detectMultiScale(gray, scaleFactor=1.1,minNeighbors=5,minSize=(60, 60),flags=cv2.CASCADE_SCALE_IMAGE)
for (x,y,w,h) in faces:
cv2.rectangle(frame, (x, y), (x + w, y + h),(0,255,0), 2)
faceROI = gray[y:y+h,x:x+w]
resized_img = resize(faceROI, (128,64))
fd, hog_image = hog(resized_img, orientations=9, pixels_per_cell=(8, 8),cells_per_block=(2, 2), visualize=True, multichannel=False)
hist = self.desc.describe(resized_img)
feat = np.hstack([fd,hist])
l = [feat]
print("The predicted image is : "+self.Categories[self.model.predict(l)[0]])
time.sleep(20)
buffer = cv2.flip(frame, 0).tostring()
texture = Texture.create(size=(frame.shape[1], frame.shape[0]), colorfmt='bgr')
texture.blit_buffer(buffer, colorfmt='bgr', bufferfmt='ubyte')
self.image.texture = texture
I tried moving the last 4 lines before the while loop, but it is still not showing up. Same goes when putting it at the end (inside end) of the while loop. Any suggestions?
I have a simple python code using OpenCV and Keras that performs some detections on frames (follow-up from my previous question here). But when I want to record and save the frames as a video using video_writer, the generated video is empty.
What is wrong in the video_writer?
#........some code
# start the webcam feed
cap = cv2.VideoCapture(1)
canvasImageOriginal = cv2.imread("fg2.png")
canvasImage = cv2.imread("fg2.png")
canvasHappy = cv2.imread("fg2happy.png")
canvasSad = cv2.imread("fg2sad.png")
x0, x1 = 330, 1290
y0, y1 = 155, 700
#=========
w=960#int(cap.get(cv2.CV_CAP_PROP_FRAME_WIDTH ))
h=540#int(cap.get(cv2.CV_CAP_PROP_FRAME_HEIGHT ))
# video recorder
fourcc = cv2.VideoWriter_fourcc(*'XVID')
video_writer = cv2.VideoWriter('output.avi', fourcc, 25.0, (w, h))
#=========
prediction_history = []
LOOKBACK = 5 # how far you want to look back
counter = 0
while True:
# Find haar cascade to draw bounding box around face
ret, frame = cap.read()
frame=cv2.flip(frame,3)
if not ret:
break
facecasc = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = facecasc.detectMultiScale(gray,scaleFactor=1.3, minNeighbors=5)
for (x, y, w, h) in faces:
cv2.rectangle(frame, (x, y-50), (x+w, y+h+10), (255, 0, 0), 2)
roi_gray = gray[y:y + h, x:x + w]
cropped_img = np.expand_dims(np.expand_dims(cv2.resize(roi_gray, (48, 48)), -1), 0)
prediction = model.predict(cropped_img)
maxindex = int(np.argmax(prediction))
text = emotion_dict[maxindex]
print(prediction[0][3])
prediction_history.append(maxindex)
most_common_index = max(set(prediction_history[-LOOKBACK:][::-1]), key = prediction_history.count)
text = emotion_dict[most_common_index]
#if ("Sad" in text) or ("Angry" in text) or ("Disgusted" in text):
# text = "Sad"
if ("Happy" in text) or ("Sad" in text) :
cv2.putText(frame, text, (x+20, y-60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
if ("Happy" in text):
counter= counter+1
if counter == 10:
#print("Happy!")
canvasImage = canvasHappy
else:
counter = 0
canvasImage = canvasImageOriginal
dim = (800,480)
frame_shrunk = cv2.resize(frame, (x1 - x0, y1 - y0))
canvasImage[y0:y1, x0:x1] = frame_shrunk
#cv2.imshow('Video', cv2.resize(frame,dim,interpolation = cv2.INTER_CUBIC))
cv2.imshow('Demo', canvasImage)
video_writer.write(frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
video_writer.release()
cv2.destroyAllWindows()
As it is mentioned above, please check print(frame.shape).
When I did it, I saw (300,450,3), and I changed the resolution of videowriter as (450,300) and it worked for me. As a result, I can say that frame.shape=(y, x, maybe color) but the resolution of videowriter=(x,y).
I'm trying to write a script that anonymized faces on videos.
here is my code (python):
import cv2
from mtcnn.mtcnn import MTCNN
ksize = (101, 101)
def decode_fourcc(cc):
return "".join([chr((int(cc) >> 8 * i) & 0xFF) for i in range(4)])
def find_face_MTCNN(color, result_list):
for result in result_list:
x, y, w, h = result['box']
roi = color[y:y+h, x:x+w]
cv2.rectangle(color, (x, y), (x+w, y+h), (0, 155, 255), 5)
detectedFace = cv2.GaussianBlur(roi, ksize, 0)
color[y:y+h, x:x+w] = detectedFace
return color
detector = MTCNN()
video_capture = cv2.VideoCapture("basic.mp4")
width = int(video_capture.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
length = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
fps = int(video_capture.get(cv2.CAP_PROP_FPS))
video_out = cv2.VideoWriter(
"mtcnn.mp4", cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height))
while length:
_, color = video_capture.read()
faces = detector.detect_faces(color)
detectFaceMTCNN = find_face_MTCNN(color, faces)
video_out.write(detectFaceMTCNN)
cv2.imshow("Video", detectFaceMTCNN)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
fourccIN = video_capture.get(cv2.CAP_PROP_FOURCC)
fourccOUT = video_out.get(cv2.CAP_PROP_FOURCC)
print(f"input fourcc is: {fourccIN, decode_fourcc(fourccIN)}")
print(f"output fourcc is: {fourccOUT, decode_fourcc(fourccOUT)}")
video_capture.release()
cv2.destroyAllWindows()
I'll get a perfect working window with the anonymization, so imshow() works fine. But the new saved video "mtcnn.mp4" can't be opened. I found out the problem is the fourcc format of the new video since my output is:
input fourcc is: (828601953.0, 'avc1')
output fourcc is: (-1.0, 'ÿÿÿÿ')
'ÿÿÿÿ' stands for unreadable so thats the core of the matter...
Can someone help me please?
They are facing probably the same problem:
Using MTCNN with a webcam via OpenCV
And I used this to encode the fourcc:
What is the opposite of cv2.VideoWriter_fourcc?
I've changes this line:
video_out = cv2.VideoWriter(
"mtcnn.mp4", cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height))
to:
video_out = cv2.VideoWriter(
"mtcnn.avi", cv2.VideoWriter_fourcc(*'XVID'), fps, (width, height))
And now it works for me
What I'm trying to make:
A OpenCV program that can record a video and mask my face by using an image of a mask.
My Code:
import numpy as np
import cv2
import os
import time
import face_recognition as fr
import pkg_resources
filename = "THIS_IS_A_TEST.mp4"
frames_per_seconds = 24.0
my_res = "720p"
face_cascade = cv2.CascadeClassifier('C:\\Users\\jack\\Desktop\\haarcascade_frontalface_default.xml')
#eyes_cascade = cv2.CascadeClassifier('C:\\Users\\jack\\Desktop\\frontalEyes35x16.xml')
mask = cv2.imread("C:\\Users\\jack\\Desktop\\Blogger_Video_Recorder\\TheMask.png", -1)
def change_res(cap, width, height):
cap.set(3, width)
cap.set(4, height)
STD_DIMENSIONS = {
"480p": (640,480),
"720p": (1280, 720),
"1080p": (1920, 1080),
"4k": (3840, 2160),
}
def image_resize(image, width = None, height = None, inter = cv2.INTER_AREA):
# initialize the dimensions of the image to be resized and
# grab the image size
dim = None
(h, w) = image.shape[:2]
# if both the width and height are None, then return the
# original image
if width is None and height is None:
return image
# check to see if the width is None
if width is None:
# calculate the ratio of the height and construct the
# dimensions
r = height / float(h)
dim = (int(w * r), height)
# otherwise, the height is None
else:
# calculate the ratio of the width and construct the
# dimensions
r = width / float(w)
dim = (width, int(h * r))
# resize the image
resized = cv2.resize(image, dim, interpolation = inter)
# return the resized image
return resized
def get_dims(cap, res="1080p"):
width, height = STD_DIMENSIONS["480p"]
if res in STD_DIMENSIONS:
width, height = STD_DIMENSIONS[res]
change_res(cap, width, height)
return width, height
VIDEO_TYPE = {
"mp4": cv2.VideoWriter_fourcc(*"XVID")
}
def get_video_type(filename):
filename, ext = os.path.splitext(filename)
if ext in VIDEO_TYPE:
return VIDEO_TYPE[ext]
return VIDEO_TYPE["mp4"]
cap = cv2.VideoCapture(0)
dims = get_dims(cap, res = my_res)
video_type_cv2 = get_video_type(filename)
out = cv2.VideoWriter(filename, video_type_cv2, frames_per_seconds, dims)
while True:
ret, frame = cap.read()
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, scaleFactor=1.5, minNeighbors=5)
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA)
for (x, y, w, h) in faces:
roi_gray = gray[y:y+h, x:x+h]
roi_color = frame[y:y+h, x:x+h]
roi_faces = roi_gray[y:y+h, x:x+w]
mask2 = image_resize(mask.copy(), width=w)
mw, mh, mc = mask2.shape
for i in range(0,mw):
for j in range(0, mh):
if mask2[i, j][3] != 0:
roi_color[y + i, x + j] = mask2[i, j]
frame = cv2.cvtColor(frame, cv2.COLOR_BGRA2BGR)
out.write(frame)
cv2.imshow("frame", frame)
if cv2.waitKey(20) & 0xFF == ord("q"):
break
cap.release()
out.release()
cv2.destroyAllWindows()
Traceback (most recent call last):
File "C:\Users\jack\Desktop\Blogger_Video_Recorder\tutorial#1.py", line 93, in <module>
roi_color[y + i, x + j] = mask2[i, j]
IndexError: index 426 is out of bounds for axis 0 with size 273
BTW I am also running this on the Python IDLE, so any help is appreciated :)
You do not have to do most of the existing phases. You can use the following method:
draw[y:y+h, x:x+w] = mask2
But in reality there are several errors, the first of which is that the height of the mask is less than the height of the face, so we will not be able to use the values surrounding the face that were found
However, it is possible to know the mask height and reset the print dimensions on the basic frame.
The next line
mask2 = image_resize(mask.copy(), width=w)
mw, mh, mc = mask2.shape
draw[y:y+mw, x:x+w] = mask2
Note Please do not use 'cv2.COLOR_BGR2BGRA' Or the conversion line:
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA)
Except for necessity, because it increases the depth of the image to 4 dimensions, and an error will occur when you start printing the mask with the three dimensions,Like:
ValueError: could not broadcast input array from shape (273,410,3) into shape (273,410,4)
Of course it is possible to find another solution but it remains unlikely because you did not use BGRA at all in this code.
Imagine ->full code:
import numpy as np
import cv2
import os
import time
import face_recognition as fr
import pkg_resources
filename = "THIS_IS_A_TEST.mp4"
frames_per_seconds = 24.0
my_res = "720p"
face_cascade = cv2.CascadeClassifier(os.path.join(cv2.data.haarcascades ,'haarcascade_frontalface_default.xml'))
#eyes_cascade = cv2.CascadeClassifier(os.path.join(cv2.data.haarcascades ,'frontalEyes35x16.xml')
mask = cv2.imread("test.jpg", -1)
def change_res(cap, width, height):
cap.set(3, width)
cap.set(4, height)
STD_DIMENSIONS = {
"480p": (640,480),
"720p": (1280, 720),
"1080p": (1920, 1080),
"4k": (3840, 2160),
}
def image_resize(image, width = None, height = None, inter = cv2.INTER_AREA):
# initialize the dimensions of the image to be resized and
# grab the image size
dim = None
(h, w) = image.shape[:2]
# if both the width and height are None, then return the
# original image
if width is None and height is None:
return image
# check to see if the width is None
if width is None:
# calculate the ratio of the height and construct the
# dimensions
r = height / float(h)
dim = (int(w * r), height)
# otherwise, the height is None
else:
# calculate the ratio of the width and construct the
# dimensions
r = width / float(w)
dim = (width, int(h * r))
# resize the image
resized = cv2.resize(image, dim, interpolation = inter)
# return the resized image
return resized
def get_dims(cap, res="1080p"):
width, height = STD_DIMENSIONS["480p"]
if res in STD_DIMENSIONS:
width, height = STD_DIMENSIONS[res]
change_res(cap, width, height)
return width, height
VIDEO_TYPE = {
"mp4": cv2.VideoWriter_fourcc(*"XVID")
}
def get_video_type(filename):
filename, ext = os.path.splitext(filename)
if ext in VIDEO_TYPE:
return VIDEO_TYPE[ext]
return VIDEO_TYPE["mp4"]
cap = cv2.VideoCapture(0)
dims = get_dims(cap, res = my_res)
video_type_cv2 = get_video_type(filename)
out = cv2.VideoWriter(filename, video_type_cv2, frames_per_seconds, dims)
while True:
ret, frame = cap.read()
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, scaleFactor=1.5, minNeighbors=5)
draw = frame.copy()
for (x, y, w, h) in faces:
roi_gray = gray[y:y+h, x:x+w]
roi_color = frame[y:y+h, x:x+w]
roi_faces = roi_gray[y:y+h, x:x+w]
mask2 = image_resize(mask.copy(), width=w)
mw, mh, mc = mask2.shape
draw[y:y+mw, x:x+w] = mask2
out.write(draw)
cv2.imshow("frame", draw)
if cv2.waitKey(5) & 0xFF == ord("q"):
break
cap.release()
out.release()
cv2.destroyAllWindows()
Side note:
It is best to use the expression cv2.data.haarcascades to find the path of the xml files included in the library
can be used os.path.join(cv2.data.haarcascades ,'u_file.xml') to find a valid path for most operating systems.
example of this
import os
import cv2
root_data = cv2.data.haarcascades
face_cascade = cv2.CascadeClassifier(os.path.join(root_data ,'haarcascade_frontalface_default.xml'))
eyes_cascade = cv2.CascadeClassifier(os.path.join(root_data ,'frontalEyes35x16.xml'))
Done..