i have an SLR (sign language Recognition) task, and i want to show the preprocessing part, here is my code :
import numpy as np
import cv2
import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.models import load_model
import tensorflow as tf
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)
#Load CNN Model
model = load_model("VGG16withALLTRAINABLE(NO BACKGROUND).h5")
#Creating ROI frame for capturing hand
top_ROI = 100
btm_ROI = 300
right_ROI = 50
left_ROI = 250
#Creating Background Removal Parameters
blur_size = 5
canny_low = 25
# min_area = 0
# max_area = 0
canny_high = 150
dilate_iter = 10
erode_iter = 10
mask_color = (0.0,0.0,0.0)
#Video Capture
cap = cv2.VideoCapture(0)
while True:
ret,frame = cap.read()
#flipping frame
# frame = cv2.flip(frame, 1)
#Create ROI inside Frame
roi = frame[top_ROI:btm_ROI, right_ROI:left_ROI]
cv2.rectangle(frame, (left_ROI, top_ROI), (right_ROI,btm_ROI), (255,128,0), 3) #Visual Rectangle for ROI
#Resizing and Reshaping to equalize model input size and shape
roi = cv2.resize(roi, (300, 300))
blurred_roi = cv2.GaussianBlur(roi, (blur_size,blur_size) , 0)
gray_roi = cv2.cvtColor(blurred_roi, cv2.COLOR_BGR2GRAY)
_,threshed = cv2.threshold(gray_roi, 100, 255, cv2.THRESH_BINARY_INV)
# edge = cv2.Canny(gray_roi, canny_low, canny_high)
# edge = cv2.dilate(edge, None)
# edge = cv2.erode(edge, None)
cntr = []
cntr_area = []
contours,_= cv2.findContours(threshed, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
contour_info = []
for c in contours:
contour_info.append((c,cv2.contourArea(c), ))
contour_info = np.array(contour_info)
contour_info = sorted(contour_info, key=lambda x: x[1], reverse=True)
max_contour = contour_info[0]
mask = np.zeros(threshed.shape)
cv2.fillConvexPoly(mask, max_contour[0], (255))
mask = cv2.dilate(mask, None, iterations=dilate_iter)
mask = cv2.erode(mask, None, iterations=erode_iter)
mask = cv2.GaussianBlur(mask, (blur_size, blur_size), 0)
mask_stack = np.dstack([mask]*3) # Create 3-channel alpha mask
#-- Blend masked img into MASK_COLOR background --------------------------------------
mask_stack = mask_stack.astype('float32') / 255.0 # Use float matrices,
roi = roi.astype('float32') / 255.0 # for easy blending
masked = (mask_stack * roi) + ((1-mask_stack) * mask_color) # Blend
masked = (masked * 255).astype('uint8') # Convert back to 8-bit
print(mask.shape)
print(mask_stack.shape)
print(masked.shape)
cv2.imshow("Frame", frame)
cv2.imshow("ROI", gray_roi)
cv2.imshow("Thresed", threshed)
cv2.imshow('Mask', masked)
key = cv2.waitKey(1)
if key == 27:
break
cap.release()
cv2.destroyAllWindows()
This is my current result [Result in diffrent Frames]
My question is, can i make all the result in one frames (one frame with multiple videos) ?
i have tried once with this code, but it wont work while i add the second video stream functions (video_stream2()) :
from tkinter import *
from PIL import ImageTk, Image
import cv2
#Creating ROI frame for capturing hand
top_ROI = 100
btm_ROI = 300
right_ROI = 50
left_ROI = 250
root = Tk()
root.geometry("1920x1080")
# Create a frame
Main_video = Frame(root, highlightbackground='grey', highlightthicknes=3)
Main_video.grid(row=0, column= 0, padx=450, pady=150, ipadx= 0, ipady=0)
Roi_video = Frame(root, highlightbackground='grey', highlightthicknes=3)
Roi_video.grid(row=0, column= 0, padx=0, pady=0, ipadx= 0, ipady=0)
# Create a label in the frame
label_main = Label(Main_video)
label_main.grid()
label_roi = Label(Roi_video)
label_roi.grid()
# Capture from camera
cap = cv2.VideoCapture(0)
# function for video streaming
def video_stream():
_, frame = cap.read()
#Create ROI inside Frame
cv2image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
#Main Video
img = Image.fromarray(cv2image)
imgtk = ImageTk.PhotoImage(image=img)
label_main.imgtk = imgtk
label_main.configure(image=imgtk)
label_main.after(1, video_stream)
def video_stream2():
_, frame = cap.read()
#Create ROI inside Frame
roi = frame[top_ROI:btm_ROI, right_ROI:left_ROI]
cv2.rectangle(frame, (left_ROI, top_ROI), (right_ROI,btm_ROI), (255,128,0), 3) #Visual Rectangle for ROI
cv2roi_gray = cv2.cvtColor(roi, cv2.COLOR_RGB2GRAY)
#Roi Video
roi_img = Image.fromarray(cv2roi_gray)
imgtk_roi= ImageTk.PhotoImage(image=roi_img)
label_roi.imgtk_roi = imgtk_roi
label_roi.configure(image=imgtk_roi)
label_roi.after(1, video_stream2)
video_stream()
video_stream2()
root.mainloop()
The procedure to combine several images (windows) to one like that:
...is easy by following the example code:
import numpy as np
import cv2
import time
#Video Capture
cap = cv2.VideoCapture(0)
while(True):
ret,frame = cap.read()
frame_uus=cv2.resize(frame,(240,160))
#let's simulate the images...
#frame=np.random.randint(0,255,[320,480,3],dtype='uint8')
gray_roi=0.5*np.random.randint(0,255,[160,240,1],dtype='uint8')+0.5*frame_uus[:,:,0:1]
threshed=0.1+0*np.random.randint(0,255,[160,240,3],dtype='uint8')+0.3*frame_uus
masked=0.5*np.random.randint(0,255,[160,240,3],dtype='uint8')+0.2*frame_uus
#make sure all data is in uint8-format suitable for cv2..
gray_roi=gray_roi.astype(np.uint8)
threshed=threshed.astype(np.uint8)
masked=masked.astype(np.uint8)
#show separate images...
cv2.imshow("Frame", frame)
cv2.imshow("ROI", gray_roi)
cv2.imshow("Thresed", threshed)
cv2.imshow('Mask', masked)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
#Define space between images...
vali=2
#let's combine the images...
max_height=np.max([len(frame),len(gray_roi),len(threshed),len(masked)])
#Let's calculate total width for the combined image...remember to add space between images...
total_width=len(frame[0])+len(gray_roi[0])+len(threshed[0])+len(masked[0])+4*vali
#For clearness let's make a green background image
baseimage=np.zeros([max_height,total_width,3],'uint8')
baseimage[:,:,1]=255
#let's add separate images to the baseimage
baseimage[0:len(frame),0:len(frame[0]),:]=frame
#Take into account the grayscale...
alku=len(frame[0])+vali
loppu=alku+len(gray_roi[0])
baseimage[0:len(gray_roi),alku:loppu,0:1]=gray_roi
baseimage[0:len(gray_roi),alku:loppu,1:2]=gray_roi
baseimage[0:len(gray_roi),alku:loppu,2:3]=gray_roi
#Add next image...
alku=loppu+vali
loppu=alku+len(threshed[0])
baseimage[0:len(threshed),alku:loppu,:]=threshed
#And the last one...
alku=loppu+vali
loppu=alku+len(masked[0])
baseimage[0:len(masked),alku:loppu,:]=masked
#And finally let's show the baseimage...
cv2.imshow('Combined', baseimage)
cap.release()
cv2.destroyAllWindows()
Related
I'm struggling to understand how to overlay a .png with transparency to a video stream.
For some reason, the transparent area is always displayed as black.
Here's what I do:
Loading the image and setting up the environment
import cv2
import numpy as np
from PIL import Image
cap = cv2.VideoCapture(0)
cv2.namedWindow("window", cv2.WND_PROP_FULLSCREEN)
cv2.setWindowProperty("window",cv2.WND_PROP_FULLSCREEN,cv2.WINDOW_FULLSCREEN)
dim = (640,480)
alpha=0.0
foreground = cv2.imread('png.png',cv2.IMREAD_UNCHANGED)
rows,cols,channels = foreground.shape
Adding an artificial alhpa layer to the frame and overlaying the loaded image
def logoOverlay(image,logo,alpha=1.0,x=0, y=0, scale=1.0):
(h, w) = image.shape[:2]
image = np.dstack([image, np.ones((h, w), dtype="uint8") * 255])
overlay = cv2.resize(logo, None,fx=scale,fy=scale)
(wH, wW) = overlay.shape[:2]
output = image.copy()
# blend the two images together using transparent overlays
try:
if x<0 : x = w+x
if y<0 : y = h+y
if x+wW > w: wW = w-x
if y+wH > h: wH = h-y
overlay=cv2.addWeighted(output[y:y+wH, x:x+wW],alpha,overlay[:wH,:wW],1-alpha,0)
output[y:y+wH, x:x+wW ] = overlay
except Exception as e:
print("Error: Logo position is overshooting image!")
print(e)
output= output[:,:,:3]
return output
Calling this function every frame:
while(True):
ret, frame = cap.read()
frame = cv2.flip(frame,1)
frame = cv2.resize(frame, dim, interpolation = cv2.INTER_AREA)
frame = logoOverlay(frame,foreground,alpha=alpha,scale=1,y=100,x=100)
cv2.imshow('window',frame)
thanks for your help, highly appreciated!
FP
Currently, I'm trying to perform motion detection with OpenCV. With each new frame, I use this bellow function to do compare with the previouse frame:
def detect(new_frame, kernel_size):
frame=cv2.cvtColor(new_frame,cv2.COLOR_BGR2GRAY) #Grayscale conversion of the frame
frame=cv2.GaussianBlur(frame, (kernel_size, kernel_size),0)
deltaFrame=cv2.absdiff(old_frame, frame)
old_frame = frame
threshFrame=cv2.threshold(deltaFrame, 5, 255, cv2.THRESH_BINARY)[1]
threshFrame=cv2.dilate(threshFrame, None, iterations=2)
(cnts,_)=cv2.findContours(threshFrame.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
return cnts
My problem is, I have to detect motion of objects of two types, each of them has it's own efficient value of kernel size parameter for that function (ie: 5 and 11). So I must to use that function 2 times with each new frame. But my device has resource limitations, so I want to reduce this process as much as I can. How can I do it?
Try bitwise functions over masks . Detect every pixel is moving. It's fast.
The trick for me is work with little resized images of frame .
import numpy as np
import cv2 as cv2
fid=0
video_path="videos/example.mp4"
cap = cv2.VideoCapture(video_path)
# Some characteristics from the original video
w_frame, h_frame = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps, num_frames = cap.get(cv2.CAP_PROP_FPS), cap.get(cv2.CAP_PROP_FRAME_COUNT)
print(fps,w_frame,h_frame)
x,y,h,w = 0,0,h_frame,w_frame
fnum=0
while(True):
ret, frame = cap.read()
if ret == None: pass
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
edges = gray
if fnum==0:
last_edges = edges.copy()
ret, mask1 = cv2.threshold(edges, 127, 255, cv2.THRESH_BINARY)
ret, mask2 = cv2.threshold(last_edges, 127 , 255, cv2.THRESH_BINARY)
dst1 = cv2.bitwise_and(mask2,mask1)
dst2 = cv2.bitwise_not(dst1)
dst4 = cv2.bitwise_and(dst2,dst2,mask=mask1)
scale_percent = 10 # percent of original size
width = int(dst4.shape[1] * scale_percent / 100)
height = int(dst4.shape[0] * scale_percent / 100)
dim = (width, height)
# resize image
mini = cv2.resize(dst4, dim, interpolation = cv2.INTER_AREA)
h,w = mini.shape
th=30 #my threshold
points=[]
for y in range(0, len(mini),4):
for x in range(0,len(mini[y]),4):
c1 = mini[y][x] > th and mini[y][x+1] > th and mini[y][x+2] > th and mini[y][x+3] > th
c2 = mini[y][x] > th and mini[y+1][x] > th and mini[y+2][x] > th and mini[y+3][x] > th
if c1 or c2:
start_point=(x*scale_percent,y*scale_percent)
points.append(start_point)
color1=(0,0,255)
color2=(0,255,255)
thickness=2
cv2.circle(frame, start_point, 20, color1, thickness)
if len(points) >= 2:
cx1 , cy1 = points[0][0] , points[0][1]
cx2 , cy2 = points[-1][0] , points[-1][1]
cxmin = min(cx1,cx2)
cymin = min(cy1,cy2)
cxmax = max(cx1,cx2)
cymax = max(cy1,cy2)
print(cymin,cymax , '--' , cxmin,cxmax)
cv2.rectangle(frame, (cxmin,cymin) , (cxmax,cymax), color2, thickness)
# Display the resulting frame
cv2.imshow('frame4', frame)
cv2.imshow('framemin', mini)
last_edges = edges.copy()
fnum+=1
if cv2.waitKey(33) & 0xFF == ord('q'):
break
# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()
you can apply your own masks to detect one obj or another playing with blur values too.
It's my beginning with coding and this site. I'm working on project, where I want to use openCV, but I've got an issue with that. I need to resize output frame, for recognizing object. I have read, that frame should be in size 416x416, but when I'm trying to release the frame, it's still in regular size.
Here's the code:
import pafy
import youtube_dl
import cv2
import numpy as np
url = "https://www.youtube.com/watch?v=WOn7m0_aYBw"
video = pafy.new(url)
best = video.getbest(preftype="mp4")
cap = cv2.VideoCapture()
cap.open(best.url)
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
classes = []
with open("coco.names", "r") as f:
classes = [line.strip() for line in f.readlines()]
layer_names = net.getLayerNames()
output_layers =[layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
colors = np.random.uniform(0, 255, size=(len(classes), 3))
while True:
ret, frame = cap.read()
# if ret == True:
img = cv2.imshow('frame',frame)
#cap.set(cv2.CAP_PROP_FRAME_WIDTH, 416)
#cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 416)
width = 416
height = 416
dim = (width, height)
img = cv2.resize(frame, dim, interpolation = cv2.INTER_AREA)
print(img.shape)
if cv2.waitKey(20) & 0xFF == ord('q'):
break
blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
print(img.shape) returns correct size, but I think I'm releasing wrong window. How to change this code to releasing window in correct size?
You were showing the frame before resizing
while True:
ret, frame = cap.read()
width = 416
height = 416
dim = (width, height)
img = cv2.resize(frame, dim, interpolation = cv2.INTER_AREA)
print(img.shape)
cv2.imshow('frame',img)
if cv2.waitKey(20) & 0xFF == ord('q'):
break
I have this machine learning code for recognizing hand gestures. It uses webcam to detect hand gestures. Problem is however whenever I run this code it opens up the webcam (which works for a second) and it crashes immediately with the error ""is not responding.
Here is my opencv.py
import cv2
import numpy as np
from keras.models import load_model
from skimage.transform import resize, pyramid_reduce
model = load_model('model.h5')
while True:
cam_capture = cv2.VideoCapture(0)
_, image_frame = cam_capture.read()
# Select ROI
im2 = crop_image(image_frame, 300,300,300,300)
image_grayscale = cv2.cvtColor(im2, cv2.COLOR_BGR2GRAY)
image_grayscale_blurred = cv2.GaussianBlur(image_grayscale, (15,15), 0)
#resized_img = image_resize(image_grayscale_blurred, width = 28, height = 28, inter = cv2.INTER_AREA)
#resized_img = keras_process_image(image_grayscale_blurred)
resized_img = cv2.resize(image_grayscale_blurred,(28,28))
#ar = np.array(resized_img)
ar = resized_img.reshape(1,784)
pred_probab, pred_class = keras_predict(model, ar )
print(pred_class, pred_probab)
# Display cropped image
cv2.imshow("Image2",im2)
cv2.imshow("Image4",resized_img)
cv2.imshow("Image3",image_grayscale_blurred)
if cv2.waitKey(25) & 0xFF == ord('q'):
cv2.destroyAllWindows()
break
cam_capture.release()
cv2.destroyAllWindows()
I have deleted some redundant code. My main file is ROIinOpenCv.py
import cv2
import numpy as np
from keras.models import load_model
from skimage.transform import resize, pyramid_reduce
model = load_model('model.h5')
def get_square(image, square_size):
height, width = image.shape
if(height > width):
differ = height
else:
differ = width
differ += 4
mask = np.zeros((differ, differ), dtype = "uint8")
x_pos = int((differ - width) / 2)
y_pos = int((differ - height) / 2)
mask[y_pos: y_pos + height, x_pos: x_pos + width] = image[0: height, 0: width]
if differ / square_size > 1:
mask = pyramid_reduce(mask, differ / square_size)
else:
mask = cv2.resize(mask, (square_size, square_size), interpolation = cv2.INTER_AREA)
return mask
def keras_predict(model, image):
data = np.asarray( image, dtype="int32" )
pred_probab = model.predict(data)[0]
pred_class = list(pred_probab).index(max(pred_probab))
return max(pred_probab), pred_class
def keras_process_image(img):
image_x = 28
image_y = 28
#img = cv2.resize(img, (28,28), interpolation = cv2.INTER_AREA)
img = get_square(img, 28)
img = np.reshape(img, (image_x, image_y))
return img
def crop_image(image, x, y, width, height):
return image[y:y + height, x:x + width]
while True:
cam_capture = cv2.VideoCapture(0)
_, image_frame = cam_capture.read()
# Select ROI
im2 = crop_image(image_frame, 300,300,300,300)
image_grayscale = cv2.cvtColor(im2, cv2.COLOR_BGR2GRAY)
image_grayscale_blurred = cv2.GaussianBlur(image_grayscale, (15,15), 0)
#resized_img = image_resize(image_grayscale_blurred, width = 28, height = 28, inter = cv2.INTER_AREA)
#resized_img = keras_process_image(image_grayscale_blurred)
resized_img = cv2.resize(image_grayscale_blurred,(28,28))
#ar = np.array(resized_img)
ar = resized_img.reshape(1,784)
pred_probab, pred_class = keras_predict(model, ar )
print(pred_class, pred_probab)
# Display cropped image
cv2.imshow("Image2",im2)
cv2.imshow("Image4",resized_img)
cv2.imshow("Image3",image_grayscale_blurred)
if cv2.waitKey(25) & 0xFF == ord('q'):
cv2.destroyAllWindows()
break
cam_capture.release()
cv2.destroyAllWindows()
ps: I am using Ubuntu do you think by any chance it could be an error of granting permission to access the webcam ?
You are opening your camera in each iteration of the while loop. That is causing the error. The fix is just creating your cam_capture object outside the while loop.
Please refer below.
import cv2
import numpy as np
from keras.models import load_model
from skimage.transform import resize, pyramid_reduce
model = load_model('model.h5')
cam_capture = cv2.VideoCapture(0) # create camera object outside while-loop
while True:
_, image_frame = cam_capture.read()
# Select ROI
im2 = crop_image(image_frame, 300,300,300,300)
image_grayscale = cv2.cvtColor(im2, cv2.COLOR_BGR2GRAY)
image_grayscale_blurred = cv2.GaussianBlur(image_grayscale, (15,15), 0)
resized_img = cv2.resize(image_grayscale_blurred,(28,28))
ar = resized_img.reshape(1,784)
pred_probab, pred_class = keras_predict(model, ar )
print(pred_class, pred_probab)
# Display cropped image
cv2.imshow("Image2",im2)
cv2.imshow("Image4",resized_img)
cv2.imshow("Image3",image_grayscale_blurred)
if cv2.waitKey(25) & 0xFF == ord('q'):
cv2.destroyAllWindows()
break
cam_capture.release()
cv2.destroyAllWindows()
I have to track a window in a video and need to paste an image on window,I have used camshift to track the window, but it did not track it correct.My window is in brown color so I have given the following color range.
np.array((110,0,0)--lower
np.array((130,255,255)--higher..
I have red many documents in opencv but not able to figure out which method to follow.I am using opencv2.4.9 with python.
Below is the code which I tried.Please help me out to figure out the exact location of window.emphasized text
#!/usr/bin/env python
import numpy as np
import cv2
cap = cv2.VideoCapture("myvideo.mp4")
# take first frame of the video
ret,frame = cap.read()
#print frame
#print ret
# setup initial location of window
r,h,c,w = 157,40,337,40
track_window = (c,r,w,h)
# set up the ROI for tracking
roi = frame[r:r+h, c:c+w]
hsv_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
mask = cv2.inRange(hsv_roi, np.array((110,0,0)), np.array((130,255,255)))
roi_hist = cv2.calcHist([hsv_roi],[0],mask,[255],[0,255])
cv2.imshow('img2',roi_hist)
#print roi_hist
cv2.normalize(roi_hist,roi_hist,0,255,cv2.NORM_MINMAX)
# Setup the termination criteria, either 10 iteration or move by at least 1 pt
term_crit = ( cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 1 )
i = 1
while(1):
ret ,frame = cap.read()
if ret == True:
i += 1
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
dst = cv2.calcBackProject([hsv],[0],roi_hist,[0,255],1)
# apply meanshift to get the new location
ret, track_window = cv2.CamShift(dst, track_window, term_crit)
#print track_window
# Draw it on image
x,y,w,h = track_window
img2 = cv2.rectangle(frame, (x,y), (x+w,y+h), 255,2)
cv2.imshow('img2',frame)
k = cv2.waitKey(200) & 0xff
if k == 27:
break
else:
# print "comes here2";
cv2.imwrite(str(i)+"test.jpg",frame)
#break
else:
break
cv2.destroyAllWindows()
cap.release()