I have a simple python code using OpenCV and Keras that performs some detections on frames (follow-up from my previous question here). But when I want to record and save the frames as a video using video_writer, the generated video is empty.
What is wrong in the video_writer?
#........some code
# start the webcam feed
cap = cv2.VideoCapture(1)
canvasImageOriginal = cv2.imread("fg2.png")
canvasImage = cv2.imread("fg2.png")
canvasHappy = cv2.imread("fg2happy.png")
canvasSad = cv2.imread("fg2sad.png")
x0, x1 = 330, 1290
y0, y1 = 155, 700
#=========
w=960#int(cap.get(cv2.CV_CAP_PROP_FRAME_WIDTH ))
h=540#int(cap.get(cv2.CV_CAP_PROP_FRAME_HEIGHT ))
# video recorder
fourcc = cv2.VideoWriter_fourcc(*'XVID')
video_writer = cv2.VideoWriter('output.avi', fourcc, 25.0, (w, h))
#=========
prediction_history = []
LOOKBACK = 5 # how far you want to look back
counter = 0
while True:
# Find haar cascade to draw bounding box around face
ret, frame = cap.read()
frame=cv2.flip(frame,3)
if not ret:
break
facecasc = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = facecasc.detectMultiScale(gray,scaleFactor=1.3, minNeighbors=5)
for (x, y, w, h) in faces:
cv2.rectangle(frame, (x, y-50), (x+w, y+h+10), (255, 0, 0), 2)
roi_gray = gray[y:y + h, x:x + w]
cropped_img = np.expand_dims(np.expand_dims(cv2.resize(roi_gray, (48, 48)), -1), 0)
prediction = model.predict(cropped_img)
maxindex = int(np.argmax(prediction))
text = emotion_dict[maxindex]
print(prediction[0][3])
prediction_history.append(maxindex)
most_common_index = max(set(prediction_history[-LOOKBACK:][::-1]), key = prediction_history.count)
text = emotion_dict[most_common_index]
#if ("Sad" in text) or ("Angry" in text) or ("Disgusted" in text):
# text = "Sad"
if ("Happy" in text) or ("Sad" in text) :
cv2.putText(frame, text, (x+20, y-60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
if ("Happy" in text):
counter= counter+1
if counter == 10:
#print("Happy!")
canvasImage = canvasHappy
else:
counter = 0
canvasImage = canvasImageOriginal
dim = (800,480)
frame_shrunk = cv2.resize(frame, (x1 - x0, y1 - y0))
canvasImage[y0:y1, x0:x1] = frame_shrunk
#cv2.imshow('Video', cv2.resize(frame,dim,interpolation = cv2.INTER_CUBIC))
cv2.imshow('Demo', canvasImage)
video_writer.write(frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
video_writer.release()
cv2.destroyAllWindows()
As it is mentioned above, please check print(frame.shape).
When I did it, I saw (300,450,3), and I changed the resolution of videowriter as (450,300) and it worked for me. As a result, I can say that frame.shape=(y, x, maybe color) but the resolution of videowriter=(x,y).
Related
I followed a video online about motion detection using openCV however I came across the problem that the findContours function is not returning a value. Any help is appreceated.
Here is the code:
import cv2
import time
import datetime
import imutils
def motion_detection():
video_capture = cv2.VideoCapture(0, cv2.CAP_DSHOW)
time.sleep(2)
first_frame = None
while True:
frame = video_capture.read()[1]
text = 'Unoccupied'
greyscale_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
gaussian_frame = cv2.GaussianBlur(greyscale_frame, (21, 21), 0)
blur_frame = cv2.blur(gaussian_frame, (5, 5))
greyscale_image = blur_frame
if first_frame is None:
first_frame = greyscale_image
else:
pass
frame = imutils.resize(frame, width=500)
frame_delta = cv2.absdiff(first_frame, greyscale_image)
# edit the ** thresh ** depending on the light/dark in room,
# change the 100(anything pixel value over 100 will become 255(white)
thresh = cv2.threshold(frame_delta, 100, 255, cv2.THRESH_BINARY)[1]
dilate_image = cv2.dilate(thresh, None, iterations=2)
cnt = cv2.findContours(dilate_image.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[1]
for c in cnt:
if cv2.contourArea(c) > 800:
(x, y, w, h) = cv2.boundingRect(
c)
cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
text = 'Occupied'
# text that appears when there is motion in video feed
else:
pass
''' now draw text and timestamp on security feed '''
font = cv2.FONT_HERSHEY_SIMPLEX
cv2.putText(frame, '{+} Room Status: %s' % text, (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
cv2.putText(frame, datetime.datetime.now().strftime('%A %d %B %Y %I:%M:%S%p'),
(10, frame.shape[0] - 10), font, 0.35, (0, 0, 255), 1)
cv2.imshow('Security Feed', frame)
cv2.imshow('Threshold(foreground mask)', dilate_image)
cv2.imshow('Frame_delta', frame_delta)
key = cv2.waitKey(1) & 0xFF
if key == ord('q'):
cv2.destroyAllWindows()
break
if __name__ == '__main__':
motion_detection()
I have tried to debug and find the problem the code is exactly what the video said to write and I have had no luck.
import numpy as np
import cv2
from pyzbar import pyzbar
def read_barcodes(frame):
processed =[]
barcodes = pyzbar.decode(frame)
for barcode in barcodes:
x, y , w, h = barcode.rect
barcode_info = barcode.data.decode('utf-8')
cv2.rectangle(frame, (x, y),(x+w, y+h), (0, 255, 0), 2)
font = cv2.FONT_HERSHEY_DUPLEX
cv2.putText(frame, barcode_info, (x + 6, y - 6), font, 2.0, (255, 255, 255), 1)
#my_list = [barcode_info]
#myFinalList = np.unique(my_list).tolist()
#print(myFinalList)
#print(barcode_info)
with open("barcode_result.txt", mode ='w') as file:
file.write("Recognized Barcode:" + barcode_info)
return frame
def main():
camera = cv2.VideoCapture(0)
ret, frame = camera.read()
while ret:
ret, frame = camera.read()
frame = read_barcodes(frame)
cv2.imshow('Barcode/QR code reader', frame)
if cv2.waitKey(1) & 0xFF == 27:
break
camera.release()
cv2.destroyAllWindows()
if __name__ == '__main__':
main()
how to break the loop from printing the repeating values
I have this simple Python code that makes predictions on the emotions of the face (refer to here in case you need to run it), whether the person is happy, sad, etc. It uses cv2 and Keras. Now, I would like to visualize and place a meter on the frame based on the probability of each frame (prob value below which is a percentage). How can I do that?
Something like this. Don't worry about the colors for now.
cap = cv2.VideoCapture(1)
canvasImage = cv2.imread("fg2.png")
x0, x1 = 330, 1290
y0, y1 = 155, 700
prediction_history = []
LOOKBACK = 5 # how far you want to look back
counter = 0
while True:
# Find haar cascade to draw bounding box around face
ret, frame = cap.read()
frame=cv2.flip(frame,3)
if not ret:
break
facecasc = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = facecasc.detectMultiScale(gray,scaleFactor=1.3, minNeighbors=5)
for (x, y, w, h) in faces:
cv2.rectangle(frame, (x, y-50), (x+w, y+h+10), (255, 0, 0), 2)
roi_gray = gray[y:y + h, x:x + w]
cropped_img = np.expand_dims(np.expand_dims(cv2.resize(roi_gray, (48, 48)), -1), 0)
prediction = model.predict(cropped_img)
maxindex = int(np.argmax(prediction))
text = emotion_dict[maxindex]
prob = round(prediction[0][3]*100, 2)
prediction_history.append(maxindex)
most_common_index = max(set(prediction_history[-LOOKBACK:][::-1]), key = prediction_history.count)
text = emotion_dict[most_common_index]
#if ("Sad" in text) or ("Angry" in text) or ("Disgusted" in text):
# text = "Sad"
if ("Happy" in text) or ("Sad" in text) :
cv2.putText(frame, text+": "+str(prob), (x+20, y-60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
dim = (800,480)
frame_shrunk = cv2.resize(frame, (x1 - x0, y1 - y0))
canvasImage[y0:y1, x0:x1] = frame_shrunk
#cv2.imshow('Video', cv2.resize(frame,dim,interpolation = cv2.INTER_CUBIC))
cv2.imshow('Demo', canvasImage)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
There is no built in function in OpenCV for drawing meters, here is a helper function that you can use to draw a meter over an image:
def draw_indicator(img, percentage):
def percentage_to_color(p):
return 0, 255 * p, 255 - (255 * p)
# config
levels = 10
indicator_width = 80
indicator_height = 220
level_width = indicator_width - 20
level_height = int((indicator_height - 20) / levels - 5)
# draw
img_levels = int(percentage * levels)
cv2.rectangle(img, (10, img.shape[0] - (indicator_height + 10)), (10 + indicator_width, img.shape[0] - 10), (0, 0, 0), cv2.FILLED)
for i in range(img_levels):
level_y_b = int(img.shape[0] - (20 + i * (level_height + 5)))
cv2.rectangle(img, (20, level_y_b - level_height), (20 + level_width, level_y_b), percentage_to_color(i / levels), cv2.FILLED)
# test code
img = cv2.imread('a.jpg')
draw_indicator(img, 0.7)
cv2.imshow("test", img)
cv2.waitKey(10000)
I was trying to capture am image on the webcam and extract the text information on it using the language of python.
Here is the code:
import cv2
import matplotlib.pyplot as plt
import numpy as np
import pytesseract
from PIL import Image
from pytesseract import image_to_string
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
def main():
# Use the attached camera to capture images
# 0 stands for the first one
cap = cv2.VideoCapture(0)
if cap.isOpened():
ret, frame = cap.read()
print(ret)
print(frame)
else:
ret = False
img1 = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# img = Image.open('image.jpg')
text = pytesseract.image_to_string(img1)
print(text)
# plt.imshow(img1)
# plt.title('Color Image RGB')
# plt.xticks([])
# plt.yticks([])
# plt.show()
cap.release()
if __name__ == "__main__":
main()
The code didn't work. I watched a couple of videos on Youtube, and I saw that people typically use Image.open("image.jpg") to open an image that is located on the computer. But I need to capture the image from the webcam and extract the information on it. So that method won't work in my situation. Is there a way to combine these two methods? Like capture the image using cv2 and extract the information using pytesseract.image_to_string()?
Can you please try by replacing the below code of line,
text = pytesseract.image_to_string(img1)
With the code,
text = pytesseract.image_to_string(Image.fromarray(img1))
Or have a working code snippet here, (Copied your code and updated a little),
def main():
# Use the attached camera to capture images
# 0 stands for the first one
cap = cv2.VideoCapture(0)
while cap.isOpened():
ret, frame = cap.read()
img1 = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
text = pytesseract.image_to_string(Image.fromarray(img1))
cv2.imshow('frame', img1)
if cv2.waitKey(0) & 0xFF == ord('q'):
return None
print("Extracted Text: ", text)
cap.release()
Hope This will help you.
I used while look because with if condtion I did not get result, trying to figure it out.
import cv2
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'
framewidth = 640
frameheight = 480
cap = cv2.VideoCapture(0)
cap.set(3, framewidth)
cap.set(4, frameheight)
while True:
success, img = cap.read( )
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# print(pytesseract.image_to_string(img))
## detecting characters
# hImg,wImg,_= img.shape
# boxes=pytesseract.image_to_boxes(img)
# for b in boxes.splitlines():
# # print(b)
# b=b.split(' ')
# print(b)
# x,y,w,h = int(b[1]),int(b[2]),int(b[3]),int(b[4])
# cv2.rectangle(img,(x,hImg-y),(w,hImg-h),(0,0,255),3)
# cv2.putText(img,b[0],(x,hImg-y+25),cv2.FONT_HERSHEY_COMPLEX,1,(50,100,255),2)
# ## detecting words
hImg, wImg, _ = img.shape
boxes = pytesseract.image_to_data(img)
for x, b in enumerate(boxes.splitlines( )):
if x != 0:
b = b.split( )
print(b)
if len(b)==12:
x, y, w, h = int(b[6]), int(b[7]), int(b[8]), int(b[9])
cv2.rectangle(img, (x, y), (w+x, h+y), (0, 0, 255), 3)
cv2.putText(img, b[11], (x, y), cv2.FONT_HERSHEY_COMPLEX, 1, (50, 100, 255), 2)
## detecting digits
# hImg, wImg, _ = img.shape
# cong= r'--oem 3 --psm 6 outputbase digits'
# boxes = pytesseract.image_to_data(img,config=cong)
#
# for x, b in enumerate(boxes.splitlines( )):
#
# if x != 0:
# b = b.split( )
# print(b)
# if len(b) == 12:
# x, y, w, h = int(b[6]), int(b[7]), int(b[8]), int(b[9])
# cv2.rectangle(img, (x, y), (w + x, h + y), (0, 0, 255), 3)
# cv2.putText(img, b[11], (x, y), cv2.FONT_HERSHEY_COMPLEX, 1, (50, 100, 255), 2)
# reading text don't delete it
# print(pytesseract.image_to_boxes(img))
cv2.imshow("video", img)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
#I don't no how to give answer but i have my code and it's working fine enjoy
I am using Raspberry Pi3 for face recognition and this is my code to detect the faces but the real time recognition ran slowly
cam = cv2.VideoCapture(0)
rec = cv2.face.LBPHFaceRecognizer_create();
rec.read(...'/data/recognizer/trainingData.yml')
getId = 0
font = cv2.FONT_HERSHEY_SIMPLEX
userId = 0
i = 0
while (cam.isOpened() and i<91):
i=i+1
ret, img = cam.read()
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = faceDetect.detectMultiScale(gray, 1.3, 5)
for (x, y, w, h) in faces:
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
getId, conf = rec.predict(gray[y:y + h, x:x + w]) # This will predict the id of the face
# print conf;
if conf < 50:
userId = getId
cv2.putText(img, "Detected", (x, y + h), font, 2, (0, 255, 0), 2)
record = Records.objects.get(id=userId)
record.is_present = True
record.save()
else:
cv2.putText(img, "Unknown", (x, y + h), font, 2, (0, 0, 255), 2)
# Printing that number below the face
# #Prams cam image, id, location,font style, color, stroke
cv2.imshow("Face", img)
cv2.waitKey(50)`
How to correct it please ? Thanks for your helping hand.
You should use threads to mazimize performance. imutils is a library that lets you use threading both on picamera and webcam capture. The issue here is that there are too many Input output operations being performed in between frames.
Here is the article that helped increase my fps:
https://www.pyimagesearch.com/2015/12/28/increasing-raspberry-pi-fps-with-python-and-opencv/
And this is the code you can add:
import imutils
from imutils.video.pivideostream import PiVideoStream
Then instead of cam = cv2.VideoCapture(0)
use cam = PiVideoStream().start()
and instead of ret, img = cam.read()
use im = cam.read()
and to release the camera use:
cam.stop()