How to fix open cv2 cam from crashing on startup? - python

I have this machine learning code for recognizing hand gestures. It uses webcam to detect hand gestures. Problem is however whenever I run this code it opens up the webcam (which works for a second) and it crashes immediately with the error ""is not responding.
Here is my opencv.py
import cv2
import numpy as np
from keras.models import load_model
from skimage.transform import resize, pyramid_reduce
model = load_model('model.h5')
while True:
cam_capture = cv2.VideoCapture(0)
_, image_frame = cam_capture.read()
# Select ROI
im2 = crop_image(image_frame, 300,300,300,300)
image_grayscale = cv2.cvtColor(im2, cv2.COLOR_BGR2GRAY)
image_grayscale_blurred = cv2.GaussianBlur(image_grayscale, (15,15), 0)
#resized_img = image_resize(image_grayscale_blurred, width = 28, height = 28, inter = cv2.INTER_AREA)
#resized_img = keras_process_image(image_grayscale_blurred)
resized_img = cv2.resize(image_grayscale_blurred,(28,28))
#ar = np.array(resized_img)
ar = resized_img.reshape(1,784)
pred_probab, pred_class = keras_predict(model, ar )
print(pred_class, pred_probab)
# Display cropped image
cv2.imshow("Image2",im2)
cv2.imshow("Image4",resized_img)
cv2.imshow("Image3",image_grayscale_blurred)
if cv2.waitKey(25) & 0xFF == ord('q'):
cv2.destroyAllWindows()
break
cam_capture.release()
cv2.destroyAllWindows()
I have deleted some redundant code. My main file is ROIinOpenCv.py
import cv2
import numpy as np
from keras.models import load_model
from skimage.transform import resize, pyramid_reduce
model = load_model('model.h5')
def get_square(image, square_size):
height, width = image.shape
if(height > width):
differ = height
else:
differ = width
differ += 4
mask = np.zeros((differ, differ), dtype = "uint8")
x_pos = int((differ - width) / 2)
y_pos = int((differ - height) / 2)
mask[y_pos: y_pos + height, x_pos: x_pos + width] = image[0: height, 0: width]
if differ / square_size > 1:
mask = pyramid_reduce(mask, differ / square_size)
else:
mask = cv2.resize(mask, (square_size, square_size), interpolation = cv2.INTER_AREA)
return mask
def keras_predict(model, image):
data = np.asarray( image, dtype="int32" )
pred_probab = model.predict(data)[0]
pred_class = list(pred_probab).index(max(pred_probab))
return max(pred_probab), pred_class
def keras_process_image(img):
image_x = 28
image_y = 28
#img = cv2.resize(img, (28,28), interpolation = cv2.INTER_AREA)
img = get_square(img, 28)
img = np.reshape(img, (image_x, image_y))
return img
def crop_image(image, x, y, width, height):
return image[y:y + height, x:x + width]
while True:
cam_capture = cv2.VideoCapture(0)
_, image_frame = cam_capture.read()
# Select ROI
im2 = crop_image(image_frame, 300,300,300,300)
image_grayscale = cv2.cvtColor(im2, cv2.COLOR_BGR2GRAY)
image_grayscale_blurred = cv2.GaussianBlur(image_grayscale, (15,15), 0)
#resized_img = image_resize(image_grayscale_blurred, width = 28, height = 28, inter = cv2.INTER_AREA)
#resized_img = keras_process_image(image_grayscale_blurred)
resized_img = cv2.resize(image_grayscale_blurred,(28,28))
#ar = np.array(resized_img)
ar = resized_img.reshape(1,784)
pred_probab, pred_class = keras_predict(model, ar )
print(pred_class, pred_probab)
# Display cropped image
cv2.imshow("Image2",im2)
cv2.imshow("Image4",resized_img)
cv2.imshow("Image3",image_grayscale_blurred)
if cv2.waitKey(25) & 0xFF == ord('q'):
cv2.destroyAllWindows()
break
cam_capture.release()
cv2.destroyAllWindows()
ps: I am using Ubuntu do you think by any chance it could be an error of granting permission to access the webcam ?

You are opening your camera in each iteration of the while loop. That is causing the error. The fix is just creating your cam_capture object outside the while loop.
Please refer below.
import cv2
import numpy as np
from keras.models import load_model
from skimage.transform import resize, pyramid_reduce
model = load_model('model.h5')
cam_capture = cv2.VideoCapture(0) # create camera object outside while-loop
while True:
_, image_frame = cam_capture.read()
# Select ROI
im2 = crop_image(image_frame, 300,300,300,300)
image_grayscale = cv2.cvtColor(im2, cv2.COLOR_BGR2GRAY)
image_grayscale_blurred = cv2.GaussianBlur(image_grayscale, (15,15), 0)
resized_img = cv2.resize(image_grayscale_blurred,(28,28))
ar = resized_img.reshape(1,784)
pred_probab, pred_class = keras_predict(model, ar )
print(pred_class, pred_probab)
# Display cropped image
cv2.imshow("Image2",im2)
cv2.imshow("Image4",resized_img)
cv2.imshow("Image3",image_grayscale_blurred)
if cv2.waitKey(25) & 0xFF == ord('q'):
cv2.destroyAllWindows()
break
cam_capture.release()
cv2.destroyAllWindows()

Related

Why is the video output so slow (frame by frame) through OpenCV (Python)?

I'm building a neural network model and the detection.py file is really slow, regarding the speed of the output video (it is literally frame by frame).
I tried adjusting a few things, though still the same result - slow video. What can be the problem? Thank you.
import numpy as np
import random as rnd
import cv2
from utils import *
from model import *
seed = 11
rnd.seed(seed)
np.random.seed(seed)
videofile = "files/cardriving.mp4"
cap = cv2.VideoCapture(videofile)
model = make_model()
model.load_weights("weights/weights_best.h5")
lower = np.array([0, 0, 0])
upper = np.array([100, 100, 100])
stepSize=30
while True:
ret, frame = cap.read()
if ret == False:
print("Done")
break
#convert image to HSV from BGR
img_hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
#find the pixels that correspond to the road
img_out = cv2.inRange(img_hsv, lower, upper)
#clean from noisy pixels and keep the largest connected segment
img_out = post_process(img_out)
image_masked = frame.copy()
#get masked image
image_masked[img_out==0] = (0,0,0)
s=0.25
#resize images for computational efficiency
frame = cv2.resize(frame, None, fx=s, fy=s)
image_masked = cv2.resize(image_masked, None, fx=s, fy=s)
#run the sliding window detection process
bbox_list, totalWindows, correct, score = detectionProcess(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), model, winH=50, winW=50, depth=3, nb_images=1, scale=1, stepSize=stepSize, thres_score=0.05)
#draw the detections
drawBoxes(frame, bbox_list)
#draw detections and road masks
cv2.imshow("video", sidebyside(frame, image_masked))
k = cv2.waitKey(3)
if(k & 0xFF == ord('q')):
cv2.destroyWindow("video")
break
cap.release()
cv2.destroyAllWindows()
here is the utils.py file where I wrote the post_process and detectionProcess:
import numpy as np
import cv2
def scale_to_image(x, a=0, b=255):
#min-max scaling for grayscale images
ma=(np.max(x))
if(ma==0):
return x.astype(np.uint8)
mi=(np.min(x))
normalized_data = ((x.astype(np.float)-float(mi))/float(ma)) #normalize 0-1
normalized_data = (normalized_data*b + a*(1-normalized_data)) #scale values
return normalized_data.astype(np.uint8)
def nothing(x):
pass
def channels3(x):
#stack grayscale images together to increase the color channels to 3
return np.dstack((x,x,x))
def sidebyside(x,y):
#concatenate images side by side (horizontally)
return np.concatenate((x,y), axis=1)
def updown(x,y):
#concatenate images up and down (vertically)
return np.concatenate((x,y), axis=0)
def extractLargerSegment(maskROAD):
contours, hierarchy = cv2.findContours(maskROAD.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE, contours=None, hierarchy=None)
maxA = 0
maskTemp = np.zeros_like(maskROAD)
if(len(contours) > 0):
for h, cnt in enumerate(contours):
if(cv2.contourArea(cnt) > maxA):
cntMax = cnt
maxA = cv2.contourArea(cnt)
mask = np.zeros(maskROAD.shape, np.uint8)
cv2.drawContours(maskTemp, [cntMax], 0, 255, -1)
maskROAD = cv2.bitwise_and(maskROAD, maskTemp)
return maskROAD
def post_process(img):
kernel = np.ones((5,5), np.uint8)
img_out = cv2.erode(img, kernel, iterations=3)
kernel = np.ones((20,20), np.uint8)
img_out = cv2.dilate(img_out, kernel, iterations=5)
img_out = extractLargerSegment(img_out)
return img_out
def display(img_init, img_hsv, img_out2, img_out):
mask = scale_to_image(np.dstack((img_out, np.zeros_like(img_out), np.zeros_like(img_out))))
cv2.imshow("Output", updown(sidebyside(cv2.addWeighted(img_init, 1, mask, 0.3, 0), img_hsv), sidebyside(channels3(img_out), channels3(img_out2))))
def detectionProcess(frame, model, winH=32, winW=32, depth=1, nb_images=2, scale=1.2, stepSize=10, thres_score=0):
index=0
totalWindows=0
correct=0
bbox_list = []
score = []
for resized in pyramid(frame, scale=scale, minSize=(winH, winW), nb_images=nb_images):
scale = frame.shape[0]/resized.shape[0]
for (x,y,window) in sliding_window(resized, stepSize=stepSize, windowSize=(winH, winW)):
if window.shape[0] != winH or window.shape[1] != winW:
continue
if(depth==1):
window=cv2.cvtColor(window, cv2.COLOR_BGR2GRAY)
window=np.expand_dims(window,3)
window = window[None, :, :, :]
totalWindows += 1
class_out = model.predict((window.astype(np.float32))/255., batch_size=1)[0]
if(class_out<thres_score):
bbox_list.append(((int(x*scale)), int(y*scale), int((x+winW)*scale), int((y+winH)*scale)))
score.append(class_out)
correct+=1
index+=1
return bbox_list, totalWindows, correct, score
def sliding_window(image, stepSize, windowSize):
#slide a window accross the image
for y in range(0, image.shape[0], stepSize):
for x in range(0, image.shape[1], stepSize):
yield(x, y, image[y:y + windowSize[1], x:x + windowSize[0]])
def pyramid(image, scale=1.5, minSize=(30,30), nb_images=3):
yield image
count=0
#keep looping over the pyramid
while True:
#compute new dimensions of an image and resize it
w = int(image.shape[1]/scale)
h = int(image.shape[0]/scale)
image = cv2.resize(image, (w,h))
count += 1
scale = np.power((1/scale), count)
if image.shape[0] < minSize[1] or image.shape[1] < minSize[0] or (count == nb_images):
break
yield image
def drawBoxes(frame, bbox_list):
for i in range(len(bbox_list)):
box = bbox_list[i]
cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), (0, 255, 255), 2)
return frame

how to show multiple videoCapture in one frame?

i have an SLR (sign language Recognition) task, and i want to show the preprocessing part, here is my code :
import numpy as np
import cv2
import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.models import load_model
import tensorflow as tf
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)
#Load CNN Model
model = load_model("VGG16withALLTRAINABLE(NO BACKGROUND).h5")
#Creating ROI frame for capturing hand
top_ROI = 100
btm_ROI = 300
right_ROI = 50
left_ROI = 250
#Creating Background Removal Parameters
blur_size = 5
canny_low = 25
# min_area = 0
# max_area = 0
canny_high = 150
dilate_iter = 10
erode_iter = 10
mask_color = (0.0,0.0,0.0)
#Video Capture
cap = cv2.VideoCapture(0)
while True:
ret,frame = cap.read()
#flipping frame
# frame = cv2.flip(frame, 1)
#Create ROI inside Frame
roi = frame[top_ROI:btm_ROI, right_ROI:left_ROI]
cv2.rectangle(frame, (left_ROI, top_ROI), (right_ROI,btm_ROI), (255,128,0), 3) #Visual Rectangle for ROI
#Resizing and Reshaping to equalize model input size and shape
roi = cv2.resize(roi, (300, 300))
blurred_roi = cv2.GaussianBlur(roi, (blur_size,blur_size) , 0)
gray_roi = cv2.cvtColor(blurred_roi, cv2.COLOR_BGR2GRAY)
_,threshed = cv2.threshold(gray_roi, 100, 255, cv2.THRESH_BINARY_INV)
# edge = cv2.Canny(gray_roi, canny_low, canny_high)
# edge = cv2.dilate(edge, None)
# edge = cv2.erode(edge, None)
cntr = []
cntr_area = []
contours,_= cv2.findContours(threshed, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
contour_info = []
for c in contours:
contour_info.append((c,cv2.contourArea(c), ))
contour_info = np.array(contour_info)
contour_info = sorted(contour_info, key=lambda x: x[1], reverse=True)
max_contour = contour_info[0]
mask = np.zeros(threshed.shape)
cv2.fillConvexPoly(mask, max_contour[0], (255))
mask = cv2.dilate(mask, None, iterations=dilate_iter)
mask = cv2.erode(mask, None, iterations=erode_iter)
mask = cv2.GaussianBlur(mask, (blur_size, blur_size), 0)
mask_stack = np.dstack([mask]*3) # Create 3-channel alpha mask
#-- Blend masked img into MASK_COLOR background --------------------------------------
mask_stack = mask_stack.astype('float32') / 255.0 # Use float matrices,
roi = roi.astype('float32') / 255.0 # for easy blending
masked = (mask_stack * roi) + ((1-mask_stack) * mask_color) # Blend
masked = (masked * 255).astype('uint8') # Convert back to 8-bit
print(mask.shape)
print(mask_stack.shape)
print(masked.shape)
cv2.imshow("Frame", frame)
cv2.imshow("ROI", gray_roi)
cv2.imshow("Thresed", threshed)
cv2.imshow('Mask', masked)
key = cv2.waitKey(1)
if key == 27:
break
cap.release()
cv2.destroyAllWindows()
This is my current result [Result in diffrent Frames]
My question is, can i make all the result in one frames (one frame with multiple videos) ?
i have tried once with this code, but it wont work while i add the second video stream functions (video_stream2()) :
from tkinter import *
from PIL import ImageTk, Image
import cv2
#Creating ROI frame for capturing hand
top_ROI = 100
btm_ROI = 300
right_ROI = 50
left_ROI = 250
root = Tk()
root.geometry("1920x1080")
# Create a frame
Main_video = Frame(root, highlightbackground='grey', highlightthicknes=3)
Main_video.grid(row=0, column= 0, padx=450, pady=150, ipadx= 0, ipady=0)
Roi_video = Frame(root, highlightbackground='grey', highlightthicknes=3)
Roi_video.grid(row=0, column= 0, padx=0, pady=0, ipadx= 0, ipady=0)
# Create a label in the frame
label_main = Label(Main_video)
label_main.grid()
label_roi = Label(Roi_video)
label_roi.grid()
# Capture from camera
cap = cv2.VideoCapture(0)
# function for video streaming
def video_stream():
_, frame = cap.read()
#Create ROI inside Frame
cv2image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
#Main Video
img = Image.fromarray(cv2image)
imgtk = ImageTk.PhotoImage(image=img)
label_main.imgtk = imgtk
label_main.configure(image=imgtk)
label_main.after(1, video_stream)
def video_stream2():
_, frame = cap.read()
#Create ROI inside Frame
roi = frame[top_ROI:btm_ROI, right_ROI:left_ROI]
cv2.rectangle(frame, (left_ROI, top_ROI), (right_ROI,btm_ROI), (255,128,0), 3) #Visual Rectangle for ROI
cv2roi_gray = cv2.cvtColor(roi, cv2.COLOR_RGB2GRAY)
#Roi Video
roi_img = Image.fromarray(cv2roi_gray)
imgtk_roi= ImageTk.PhotoImage(image=roi_img)
label_roi.imgtk_roi = imgtk_roi
label_roi.configure(image=imgtk_roi)
label_roi.after(1, video_stream2)
video_stream()
video_stream2()
root.mainloop()
The procedure to combine several images (windows) to one like that:
...is easy by following the example code:
import numpy as np
import cv2
import time
#Video Capture
cap = cv2.VideoCapture(0)
while(True):
ret,frame = cap.read()
frame_uus=cv2.resize(frame,(240,160))
#let's simulate the images...
#frame=np.random.randint(0,255,[320,480,3],dtype='uint8')
gray_roi=0.5*np.random.randint(0,255,[160,240,1],dtype='uint8')+0.5*frame_uus[:,:,0:1]
threshed=0.1+0*np.random.randint(0,255,[160,240,3],dtype='uint8')+0.3*frame_uus
masked=0.5*np.random.randint(0,255,[160,240,3],dtype='uint8')+0.2*frame_uus
#make sure all data is in uint8-format suitable for cv2..
gray_roi=gray_roi.astype(np.uint8)
threshed=threshed.astype(np.uint8)
masked=masked.astype(np.uint8)
#show separate images...
cv2.imshow("Frame", frame)
cv2.imshow("ROI", gray_roi)
cv2.imshow("Thresed", threshed)
cv2.imshow('Mask', masked)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
#Define space between images...
vali=2
#let's combine the images...
max_height=np.max([len(frame),len(gray_roi),len(threshed),len(masked)])
#Let's calculate total width for the combined image...remember to add space between images...
total_width=len(frame[0])+len(gray_roi[0])+len(threshed[0])+len(masked[0])+4*vali
#For clearness let's make a green background image
baseimage=np.zeros([max_height,total_width,3],'uint8')
baseimage[:,:,1]=255
#let's add separate images to the baseimage
baseimage[0:len(frame),0:len(frame[0]),:]=frame
#Take into account the grayscale...
alku=len(frame[0])+vali
loppu=alku+len(gray_roi[0])
baseimage[0:len(gray_roi),alku:loppu,0:1]=gray_roi
baseimage[0:len(gray_roi),alku:loppu,1:2]=gray_roi
baseimage[0:len(gray_roi),alku:loppu,2:3]=gray_roi
#Add next image...
alku=loppu+vali
loppu=alku+len(threshed[0])
baseimage[0:len(threshed),alku:loppu,:]=threshed
#And the last one...
alku=loppu+vali
loppu=alku+len(masked[0])
baseimage[0:len(masked),alku:loppu,:]=masked
#And finally let's show the baseimage...
cv2.imshow('Combined', baseimage)
cap.release()
cv2.destroyAllWindows()

OpenCV Index Error While Building a Face Masking App

What I'm trying to make:
A OpenCV program that can record a video and mask my face by using an image of a mask.
My Code:
import numpy as np
import cv2
import os
import time
import face_recognition as fr
import pkg_resources
filename = "THIS_IS_A_TEST.mp4"
frames_per_seconds = 24.0
my_res = "720p"
face_cascade = cv2.CascadeClassifier('C:\\Users\\jack\\Desktop\\haarcascade_frontalface_default.xml')
#eyes_cascade = cv2.CascadeClassifier('C:\\Users\\jack\\Desktop\\frontalEyes35x16.xml')
mask = cv2.imread("C:\\Users\\jack\\Desktop\\Blogger_Video_Recorder\\TheMask.png", -1)
def change_res(cap, width, height):
cap.set(3, width)
cap.set(4, height)
STD_DIMENSIONS = {
"480p": (640,480),
"720p": (1280, 720),
"1080p": (1920, 1080),
"4k": (3840, 2160),
}
def image_resize(image, width = None, height = None, inter = cv2.INTER_AREA):
# initialize the dimensions of the image to be resized and
# grab the image size
dim = None
(h, w) = image.shape[:2]
# if both the width and height are None, then return the
# original image
if width is None and height is None:
return image
# check to see if the width is None
if width is None:
# calculate the ratio of the height and construct the
# dimensions
r = height / float(h)
dim = (int(w * r), height)
# otherwise, the height is None
else:
# calculate the ratio of the width and construct the
# dimensions
r = width / float(w)
dim = (width, int(h * r))
# resize the image
resized = cv2.resize(image, dim, interpolation = inter)
# return the resized image
return resized
def get_dims(cap, res="1080p"):
width, height = STD_DIMENSIONS["480p"]
if res in STD_DIMENSIONS:
width, height = STD_DIMENSIONS[res]
change_res(cap, width, height)
return width, height
VIDEO_TYPE = {
"mp4": cv2.VideoWriter_fourcc(*"XVID")
}
def get_video_type(filename):
filename, ext = os.path.splitext(filename)
if ext in VIDEO_TYPE:
return VIDEO_TYPE[ext]
return VIDEO_TYPE["mp4"]
cap = cv2.VideoCapture(0)
dims = get_dims(cap, res = my_res)
video_type_cv2 = get_video_type(filename)
out = cv2.VideoWriter(filename, video_type_cv2, frames_per_seconds, dims)
while True:
ret, frame = cap.read()
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, scaleFactor=1.5, minNeighbors=5)
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA)
for (x, y, w, h) in faces:
roi_gray = gray[y:y+h, x:x+h]
roi_color = frame[y:y+h, x:x+h]
roi_faces = roi_gray[y:y+h, x:x+w]
mask2 = image_resize(mask.copy(), width=w)
mw, mh, mc = mask2.shape
for i in range(0,mw):
for j in range(0, mh):
if mask2[i, j][3] != 0:
roi_color[y + i, x + j] = mask2[i, j]
frame = cv2.cvtColor(frame, cv2.COLOR_BGRA2BGR)
out.write(frame)
cv2.imshow("frame", frame)
if cv2.waitKey(20) & 0xFF == ord("q"):
break
cap.release()
out.release()
cv2.destroyAllWindows()
Traceback (most recent call last):
File "C:\Users\jack\Desktop\Blogger_Video_Recorder\tutorial#1.py", line 93, in <module>
roi_color[y + i, x + j] = mask2[i, j]
IndexError: index 426 is out of bounds for axis 0 with size 273
BTW I am also running this on the Python IDLE, so any help is appreciated :)
You do not have to do most of the existing phases. You can use the following method:
draw[y:y+h, x:x+w] = mask2
But in reality there are several errors, the first of which is that the height of the mask is less than the height of the face, so we will not be able to use the values surrounding the face that were found
However, it is possible to know the mask height and reset the print dimensions on the basic frame.
The next line
mask2 = image_resize(mask.copy(), width=w)
mw, mh, mc = mask2.shape
draw[y:y+mw, x:x+w] = mask2
Note Please do not use 'cv2.COLOR_BGR2BGRA' Or the conversion line:
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA)
Except for necessity, because it increases the depth of the image to 4 dimensions, and an error will occur when you start printing the mask with the three dimensions,Like:
ValueError: could not broadcast input array from shape (273,410,3) into shape (273,410,4)
Of course it is possible to find another solution but it remains unlikely because you did not use BGRA at all in this code.
Imagine ->full code:
import numpy as np
import cv2
import os
import time
import face_recognition as fr
import pkg_resources
filename = "THIS_IS_A_TEST.mp4"
frames_per_seconds = 24.0
my_res = "720p"
face_cascade = cv2.CascadeClassifier(os.path.join(cv2.data.haarcascades ,'haarcascade_frontalface_default.xml'))
#eyes_cascade = cv2.CascadeClassifier(os.path.join(cv2.data.haarcascades ,'frontalEyes35x16.xml')
mask = cv2.imread("test.jpg", -1)
def change_res(cap, width, height):
cap.set(3, width)
cap.set(4, height)
STD_DIMENSIONS = {
"480p": (640,480),
"720p": (1280, 720),
"1080p": (1920, 1080),
"4k": (3840, 2160),
}
def image_resize(image, width = None, height = None, inter = cv2.INTER_AREA):
# initialize the dimensions of the image to be resized and
# grab the image size
dim = None
(h, w) = image.shape[:2]
# if both the width and height are None, then return the
# original image
if width is None and height is None:
return image
# check to see if the width is None
if width is None:
# calculate the ratio of the height and construct the
# dimensions
r = height / float(h)
dim = (int(w * r), height)
# otherwise, the height is None
else:
# calculate the ratio of the width and construct the
# dimensions
r = width / float(w)
dim = (width, int(h * r))
# resize the image
resized = cv2.resize(image, dim, interpolation = inter)
# return the resized image
return resized
def get_dims(cap, res="1080p"):
width, height = STD_DIMENSIONS["480p"]
if res in STD_DIMENSIONS:
width, height = STD_DIMENSIONS[res]
change_res(cap, width, height)
return width, height
VIDEO_TYPE = {
"mp4": cv2.VideoWriter_fourcc(*"XVID")
}
def get_video_type(filename):
filename, ext = os.path.splitext(filename)
if ext in VIDEO_TYPE:
return VIDEO_TYPE[ext]
return VIDEO_TYPE["mp4"]
cap = cv2.VideoCapture(0)
dims = get_dims(cap, res = my_res)
video_type_cv2 = get_video_type(filename)
out = cv2.VideoWriter(filename, video_type_cv2, frames_per_seconds, dims)
while True:
ret, frame = cap.read()
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, scaleFactor=1.5, minNeighbors=5)
draw = frame.copy()
for (x, y, w, h) in faces:
roi_gray = gray[y:y+h, x:x+w]
roi_color = frame[y:y+h, x:x+w]
roi_faces = roi_gray[y:y+h, x:x+w]
mask2 = image_resize(mask.copy(), width=w)
mw, mh, mc = mask2.shape
draw[y:y+mw, x:x+w] = mask2
out.write(draw)
cv2.imshow("frame", draw)
if cv2.waitKey(5) & 0xFF == ord("q"):
break
cap.release()
out.release()
cv2.destroyAllWindows()
Side note:
It is best to use the expression cv2.data.haarcascades to find the path of the xml files included in the library
can be used os.path.join(cv2.data.haarcascades ,'u_file.xml') to find a valid path for most operating systems.
example of this
import os
import cv2
root_data = cv2.data.haarcascades
face_cascade = cv2.CascadeClassifier(os.path.join(root_data ,'haarcascade_frontalface_default.xml'))
eyes_cascade = cv2.CascadeClassifier(os.path.join(root_data ,'frontalEyes35x16.xml'))
Done..

How to resize output frame from live stream from youtube using opencv

It's my beginning with coding and this site. I'm working on project, where I want to use openCV, but I've got an issue with that. I need to resize output frame, for recognizing object. I have read, that frame should be in size 416x416, but when I'm trying to release the frame, it's still in regular size.
Here's the code:
import pafy
import youtube_dl
import cv2
import numpy as np
url = "https://www.youtube.com/watch?v=WOn7m0_aYBw"
video = pafy.new(url)
best = video.getbest(preftype="mp4")
cap = cv2.VideoCapture()
cap.open(best.url)
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
classes = []
with open("coco.names", "r") as f:
classes = [line.strip() for line in f.readlines()]
layer_names = net.getLayerNames()
output_layers =[layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
colors = np.random.uniform(0, 255, size=(len(classes), 3))
while True:
ret, frame = cap.read()
# if ret == True:
img = cv2.imshow('frame',frame)
#cap.set(cv2.CAP_PROP_FRAME_WIDTH, 416)
#cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 416)
width = 416
height = 416
dim = (width, height)
img = cv2.resize(frame, dim, interpolation = cv2.INTER_AREA)
print(img.shape)
if cv2.waitKey(20) & 0xFF == ord('q'):
break
blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
print(img.shape) returns correct size, but I think I'm releasing wrong window. How to change this code to releasing window in correct size?
You were showing the frame before resizing
while True:
ret, frame = cap.read()
width = 416
height = 416
dim = (width, height)
img = cv2.resize(frame, dim, interpolation = cv2.INTER_AREA)
print(img.shape)
cv2.imshow('frame',img)
if cv2.waitKey(20) & 0xFF == ord('q'):
break

How to use the webcam to capture an image and extract the information on it using python?

I was trying to capture am image on the webcam and extract the text information on it using the language of python.
Here is the code:
import cv2
import matplotlib.pyplot as plt
import numpy as np
import pytesseract
from PIL import Image
from pytesseract import image_to_string
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
def main():
# Use the attached camera to capture images
# 0 stands for the first one
cap = cv2.VideoCapture(0)
if cap.isOpened():
ret, frame = cap.read()
print(ret)
print(frame)
else:
ret = False
img1 = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# img = Image.open('image.jpg')
text = pytesseract.image_to_string(img1)
print(text)
# plt.imshow(img1)
# plt.title('Color Image RGB')
# plt.xticks([])
# plt.yticks([])
# plt.show()
cap.release()
if __name__ == "__main__":
main()
The code didn't work. I watched a couple of videos on Youtube, and I saw that people typically use Image.open("image.jpg") to open an image that is located on the computer. But I need to capture the image from the webcam and extract the information on it. So that method won't work in my situation. Is there a way to combine these two methods? Like capture the image using cv2 and extract the information using pytesseract.image_to_string()?
Can you please try by replacing the below code of line,
text = pytesseract.image_to_string(img1)
With the code,
text = pytesseract.image_to_string(Image.fromarray(img1))
Or have a working code snippet here, (Copied your code and updated a little),
def main():
# Use the attached camera to capture images
# 0 stands for the first one
cap = cv2.VideoCapture(0)
while cap.isOpened():
ret, frame = cap.read()
img1 = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
text = pytesseract.image_to_string(Image.fromarray(img1))
cv2.imshow('frame', img1)
if cv2.waitKey(0) & 0xFF == ord('q'):
return None
print("Extracted Text: ", text)
cap.release()
Hope This will help you.
I used while look because with if condtion I did not get result, trying to figure it out.
import cv2
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'
framewidth = 640
frameheight = 480
cap = cv2.VideoCapture(0)
cap.set(3, framewidth)
cap.set(4, frameheight)
while True:
success, img = cap.read( )
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# print(pytesseract.image_to_string(img))
## detecting characters
# hImg,wImg,_= img.shape
# boxes=pytesseract.image_to_boxes(img)
# for b in boxes.splitlines():
# # print(b)
# b=b.split(' ')
# print(b)
# x,y,w,h = int(b[1]),int(b[2]),int(b[3]),int(b[4])
# cv2.rectangle(img,(x,hImg-y),(w,hImg-h),(0,0,255),3)
# cv2.putText(img,b[0],(x,hImg-y+25),cv2.FONT_HERSHEY_COMPLEX,1,(50,100,255),2)
# ## detecting words
hImg, wImg, _ = img.shape
boxes = pytesseract.image_to_data(img)
for x, b in enumerate(boxes.splitlines( )):
if x != 0:
b = b.split( )
print(b)
if len(b)==12:
x, y, w, h = int(b[6]), int(b[7]), int(b[8]), int(b[9])
cv2.rectangle(img, (x, y), (w+x, h+y), (0, 0, 255), 3)
cv2.putText(img, b[11], (x, y), cv2.FONT_HERSHEY_COMPLEX, 1, (50, 100, 255), 2)
## detecting digits
# hImg, wImg, _ = img.shape
# cong= r'--oem 3 --psm 6 outputbase digits'
# boxes = pytesseract.image_to_data(img,config=cong)
#
# for x, b in enumerate(boxes.splitlines( )):
#
# if x != 0:
# b = b.split( )
# print(b)
# if len(b) == 12:
# x, y, w, h = int(b[6]), int(b[7]), int(b[8]), int(b[9])
# cv2.rectangle(img, (x, y), (w + x, h + y), (0, 0, 255), 3)
# cv2.putText(img, b[11], (x, y), cv2.FONT_HERSHEY_COMPLEX, 1, (50, 100, 255), 2)
# reading text don't delete it
# print(pytesseract.image_to_boxes(img))
cv2.imshow("video", img)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
#I don't no how to give answer but i have my code and it's working fine enjoy

Categories

Resources