I want to record the audio and store the same while analyzing the emotion using Opencv. But unfortunately, the audio which is being recorded is of minimal length and the voice is also not clear. I want to analyze the voice emotion too for which I need to audio. Could somebody help me in resolving the same?
def main():
cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
p = pyaudio.PyAudio()
stream = p.open(format=audio_format, channels=channels,rate=16000, input=True,frames_per_buffer=1024)
start_time = time.time()
aud = True
while aud:
ret, frame = cap.read()
data = stream.read(chunk, exception_on_overflow = False)
frames.append(data)
if not ret:
break
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, scaleFactor=1.3, minNeighbors= 5, flags=cv2.CASCADE_SCALE_IMAGE)
result = DeepFace.analyze(img_path = frame , actions = ["emotion"], enforce_detection=False,detector_backend='ssd')
for (x,y,w,h) in faces:
if w > 130: #trick: ignore small faces
draw_border(frame, (x, y), (x + w, y + h), (255, 0, 105),4, 15, 10) ## draw rectangle around face.
detected_face = frame[int(y):int(y+h), int(x):int(x+w)] #crop detected face
detected_face = cv2.cvtColor(detected_face, cv2.COLOR_BGR2GRAY) #transform to gray scale
detected_face = cv2.resize(detected_face, (48, 48)) #resize to 48x48
img_pixels = img_to_array(detected_face)
img_pixels = np.expand_dims(img_pixels, axis = 0)
img_pixels /= 255 #pixels are in scale of [0, 255]. normalize all pixels in scale of [0, 1]
emotion = result["dominant_emotion"]
txt = str(emotion)
cv2.putText(frame,txt,(50,50),cv2.FONT_HERSHEY_SIMPLEX,1,(0,0,255),3)
cv2.imshow(file, frame)
if cv2.waitKey(1) & 0xFF == ord("q"):
aud=False
break
stream.stop_stream()
stream.close()
p.terminate()
wf = wave.open('/Users/xyz/Documents/Audio/wv.wav', 'wb')
wf.setnchannels(1)
wf.setsampwidth(p.get_sample_size(audio_format))
wf.setframerate(sample_rate)
wf.writeframes(b''.join(frames))
wf.close()
cap.release()
cv2.destroyAllWindows()
Related
This is my code, I've looked at some tutorials but can't find what I'm looking for
I want to overlay the Frame.png image on my webcam. I tried to add the image directly but it didn't work either. If possible, Is there a way to add an image, not to overlay but to keep the image at a certain coordinate in the live webcam window
import cv2
import numpy as np
def detect_and_save():
alpha = 0.2
beta = 1-alpha
cap = cv2.VideoCapture(0)
sciframe = cv2.imread('Frame.png')
classifier = cv2.CascadeClassifier("haarcascade_frontalface_default.xml")
while True:
ret ,frame = cap.read()
overlay = frame.copy()
output = frame.copy()
gray = cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY)
faces = classifier.detectMultiScale(gray,1.5,5)
cv2.putText(output, "HUD Test",(175, 50), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 50, 50), 3)
cv2
for face in faces:
x,y,w,h = face
cv2.rectangle(overlay,(x,y),(x+w,y+h),(255,200,0),-1)
cv2.rectangle(overlay,(x,y),(x+w,y+h),(255,0,0),1)
cv2.rectangle(overlay,(x,y-20),(x+w,y),(25,20,0),-1)
cv2.addWeighted(overlay,alpha,output,beta,0,output)
cv2.putText(output,"Human",(x+10,y-10),cv2.FONT_HERSHEY_SIMPLEX,
0.35, (0, 0, 255), 1)
if not ret:
continue
cv2.imshow("HUD",output)
key = cv2.waitKey(1)
if key == ord('q'):
break
elif key == ord('s'):
cv2.imwrite('./images/CID_{}.png'.format(time.strftime('%d%m%y_%H_%M_%S')),output)
cap.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
import time
detect_and_save()
You can directly add one image on top of another one at any coordinate easily in opencv.
cap = cv2.VideoCapture(0)
im_height = 50 #define your top image size here
im_width = 50
im = cv2.resize(cv2.imread("Frame.png"), (im_width, im_height))
while (True):
ret, frame = cap.read()
frame[0:im_width, 0:im_height] = im #for top-left corner, 0:50 and 0:50 for my image; select your region here like 200:250
cv2.imshow("live camera", frame)
if cv2.waitKey(1) == ord("q"):
break
cap.release()
cv2.destroyAllWindows()
I am recording 1080p video to file, but I need to show to user 1280*720 size. how can I do this ? When it's 1080p, it doesn't fit on the screen, so I want to show 720p to the user and get the recording in 1080p.
This is my source code:
cap = cv2.VideoCapture('rtsp://secret:secret#192.168.1.64/1')
filename = user_id +'.mp4' #.avi .mp4
frames_per_seconds = 25 #this is the standard for the movie or films
config = CFEVideoConf(cap, filepath = filename, res='1080p')
out = cv2.VideoWriter(filename, config.video_type, frames_per_seconds,config.dims)
print(config.dims)
img_path = 'ap_logo.png'
logo = cv2.imread(img_path,-1)
watermark = image_resize(logo, height=50)
watermark = cv2.cvtColor(watermark, cv2.COLOR_BGR2BGRA)
while(True):
ret, frame = cap.read()
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA)
frame = cv2.flip(frame, 1)
frame = cv2.rotate(frame, cv2.ROTATE_90_CLOCKWISE)
frame_h, frame_w, frame_c = frame.shape
# # overlay with 4 channel BGR and Alpha
overlay = np.zeros((frame_h, frame_w, 4), dtype='uint8')
watermark_h, watermark_w, watermark_c = watermark.shape
for i in range(0, watermark_h):
for j in range(0, watermark_w):
if watermark[i, j][3] != 0:
h_offset = frame_h - watermark_h
w_offset = frame_w - watermark_w
overlay[h_offset + i, w_offset + j] = watermark[i, j]
cv2.addWeighted(overlay, 0.25, frame, 1.0, 0, frame)
# Display the resulting frame
frame = cv2.cvtColor(frame, cv2.COLOR_BGRA2BGR)
if ret:
# cv2.resize(1280, 720)
cv2.imshow('Frame', frame)
# cv2.resize(frame, 720,1280, interpolation=cv2.INTER_AREA)
# cv2.namedWindow('Frame', cv2.WINDOW_KEEPRATIO)
# cv2.resizeWindow('Frame', 720, 1280)
out.write(frame) # file a ilgili frame yazılıyor
if cv2.waitKey(1) & 0xFF == ord('q'):
break
#When everything done, relase the capture
cap.release()
out.release() # saved
cv2.destroyAllWindows()
I'm currently practicing opencv and tried the face recognition code and it's working fine. I'd like to get notified if I'm away from the screen for more than 2 mins. I'm trying to play an audio file when I'm away and stop it when I'm back.
import cv2 as cv
import sys
cascPath = sys.argv[1]
faceCascade = cv2.CascadeClassifier(cascPath)
video_capture = cv.VideoCapture(0)
while True:
# Capture frame-by-frame
ret, frame = video_capture.read()
gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
faces = faceCascade.detectMultiScale(
gray,
scaleFactor=1.1,
minNeighbors=5,
minSize=(30, 30),
flags=cv.CASCADE_SCALE_IMAGE
)
# Draw a rectangle around the faces
for (x, y, w, h) in faces:
cv.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
# Display the resulting frame
cv.imshow('Video', frame)
# check if the tuple faces is empty
if len(faces) == 0:
start_time = time.time()
while len(faces) == 0:
print('person is away for ',time.time()-start_time)
d_time = time.time()
if d_time-start_time > 120:
pygame.mixer.init()
sound = pygame.mixer.Sound("Recording.mp3")
sound.play(5)
if cv.waitKey(1) & 0xFF == ord('q'):
break
video_capture.release()
cv.destroyAllWindows()
I have a simple python code using OpenCV and Keras that performs some detections on frames (follow-up from my previous question here). But when I want to record and save the frames as a video using video_writer, the generated video is empty.
What is wrong in the video_writer?
#........some code
# start the webcam feed
cap = cv2.VideoCapture(1)
canvasImageOriginal = cv2.imread("fg2.png")
canvasImage = cv2.imread("fg2.png")
canvasHappy = cv2.imread("fg2happy.png")
canvasSad = cv2.imread("fg2sad.png")
x0, x1 = 330, 1290
y0, y1 = 155, 700
#=========
w=960#int(cap.get(cv2.CV_CAP_PROP_FRAME_WIDTH ))
h=540#int(cap.get(cv2.CV_CAP_PROP_FRAME_HEIGHT ))
# video recorder
fourcc = cv2.VideoWriter_fourcc(*'XVID')
video_writer = cv2.VideoWriter('output.avi', fourcc, 25.0, (w, h))
#=========
prediction_history = []
LOOKBACK = 5 # how far you want to look back
counter = 0
while True:
# Find haar cascade to draw bounding box around face
ret, frame = cap.read()
frame=cv2.flip(frame,3)
if not ret:
break
facecasc = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = facecasc.detectMultiScale(gray,scaleFactor=1.3, minNeighbors=5)
for (x, y, w, h) in faces:
cv2.rectangle(frame, (x, y-50), (x+w, y+h+10), (255, 0, 0), 2)
roi_gray = gray[y:y + h, x:x + w]
cropped_img = np.expand_dims(np.expand_dims(cv2.resize(roi_gray, (48, 48)), -1), 0)
prediction = model.predict(cropped_img)
maxindex = int(np.argmax(prediction))
text = emotion_dict[maxindex]
print(prediction[0][3])
prediction_history.append(maxindex)
most_common_index = max(set(prediction_history[-LOOKBACK:][::-1]), key = prediction_history.count)
text = emotion_dict[most_common_index]
#if ("Sad" in text) or ("Angry" in text) or ("Disgusted" in text):
# text = "Sad"
if ("Happy" in text) or ("Sad" in text) :
cv2.putText(frame, text, (x+20, y-60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
if ("Happy" in text):
counter= counter+1
if counter == 10:
#print("Happy!")
canvasImage = canvasHappy
else:
counter = 0
canvasImage = canvasImageOriginal
dim = (800,480)
frame_shrunk = cv2.resize(frame, (x1 - x0, y1 - y0))
canvasImage[y0:y1, x0:x1] = frame_shrunk
#cv2.imshow('Video', cv2.resize(frame,dim,interpolation = cv2.INTER_CUBIC))
cv2.imshow('Demo', canvasImage)
video_writer.write(frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
video_writer.release()
cv2.destroyAllWindows()
As it is mentioned above, please check print(frame.shape).
When I did it, I saw (300,450,3), and I changed the resolution of videowriter as (450,300) and it worked for me. As a result, I can say that frame.shape=(y, x, maybe color) but the resolution of videowriter=(x,y).
It's my beginning with coding and this site. I'm working on project, where I want to use openCV, but I've got an issue with that. I need to resize output frame, for recognizing object. I have read, that frame should be in size 416x416, but when I'm trying to release the frame, it's still in regular size.
Here's the code:
import pafy
import youtube_dl
import cv2
import numpy as np
url = "https://www.youtube.com/watch?v=WOn7m0_aYBw"
video = pafy.new(url)
best = video.getbest(preftype="mp4")
cap = cv2.VideoCapture()
cap.open(best.url)
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
classes = []
with open("coco.names", "r") as f:
classes = [line.strip() for line in f.readlines()]
layer_names = net.getLayerNames()
output_layers =[layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
colors = np.random.uniform(0, 255, size=(len(classes), 3))
while True:
ret, frame = cap.read()
# if ret == True:
img = cv2.imshow('frame',frame)
#cap.set(cv2.CAP_PROP_FRAME_WIDTH, 416)
#cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 416)
width = 416
height = 416
dim = (width, height)
img = cv2.resize(frame, dim, interpolation = cv2.INTER_AREA)
print(img.shape)
if cv2.waitKey(20) & 0xFF == ord('q'):
break
blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
print(img.shape) returns correct size, but I think I'm releasing wrong window. How to change this code to releasing window in correct size?
You were showing the frame before resizing
while True:
ret, frame = cap.read()
width = 416
height = 416
dim = (width, height)
img = cv2.resize(frame, dim, interpolation = cv2.INTER_AREA)
print(img.shape)
cv2.imshow('frame',img)
if cv2.waitKey(20) & 0xFF == ord('q'):
break