I am trying to generate the CLEVR dataset with some random images of rectangles and circle. When I am debugging the code for every plt.imshow() statement I can see a new image with the shape generated alongwith the shapes generated in the last executed loops. Although, to my surprise everytime the 5th image generated never shows during the debugging mode.
Also when I save the images locally I see only two objects. Please find the images for reference purposes.
import matplotlib.pyplot as plt
import cv2
import numpy as np
colors = [
(0, 0, 255), # r
(0, 255, 0), # g
(255, 0, 0), # b
(0, 156, 255), # o
(128, 128, 128), # k
(0, 255, 255) # y
]
img_size = 75
size = 5
def center_generate(objects):
while True:
pas = True
center = np.random.randint(0+size, img_size - size, 2)
if len(objects) > 0:
for name, c, shape in objects:
"""this condition checks if the new object generated is not completely covered by the already
generated objects"""
if ((center - c) ** 2).sum() < ((size * 2) ** 2):
pas = False
if pas:
return center
def build_dataset(dataset_type, loopnumber):
objects = []
img = np.ones((img_size, img_size, 3)) * 255
for color_id, color in enumerate(colors):
center = center_generate(objects)
if random.random() < 0.5:
start = (center[0]-size, center[1]-size)
end = (center[0]+size, center[1]+size)
img = cv2.rectangle(img, start, end, color, -1)
plt.imshow(img)
objects.append((color_id, center, 'r'))
else:
center_ = (center[0], center[1])
img = cv2.circle(img, center_, size, color, -1)
plt.imshow(img)
objects.append((color_id, center, 'c'))
if dataset_type == 'test':
plt.imsave('..\\img\\test\\img_' + str(loop) + '.jpeg', img)
else:
plt.imsave('..\\img\\train\\img_' + str(loop) + '.jpeg', img)
The image generated during debugging.
Image stored locally.
Could anyone explain the issue here?
Thanks
To save the image use cv2.imwrite():
cv2.imwrite('..\\img\\train\\img_' + str(loop) + '.jpeg', img)
Also to correctly save it with plt.imsave, you need to divide by 255:
plt.imsave('..\\img\\train\\img_' + str(loop) + '.jpeg', img/255.0)
or convert them to 'uint8':
img = img.astype('uint8')
plt.imsave('..\\img\\train\\img_' + str(loop) + '.jpeg', img)
Related
I follow a simple head pose estimation tutorial in python, I try to make some modification in the code but I got stuck for days now, I just want to know how long the user been looking to the left or right, if the user is detected to be looking to the left or right for a long time let say 2-3 mins, then the program should give a warning or print a simple message saying how long his/her been looking to the left or right. How do I achieve this? any ideas how to do this?
sorry for my bad english
any help will be appreciated :)
here's my code:
import cv2
import mediapipe as mp
import numpy as np
import time
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(min_detection_confidence=0.5, min_tracking_confidence=0.5)
mp_drawing = mp.solutions.drawing_utils
drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
cap = cv2.VideoCapture(0)
while cap.isOpened():
success, image = cap.read()
image = cv2.resize(image, (780, 350))
image = cv2.flip(image, 1)
#start = time.time()
# Flip the image horizontally for a later selfie-view display
# Also convert the color space from BGR to RGB
image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
# To improve performance
image.flags.writeable = False
# Get the result
results = face_mesh.process(image)
# To improve performance
image.flags.writeable = True
# Convert the color space from RGB to BGR
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
img_h, img_w, img_c = image.shape
face_3d = []
face_2d = []
if results.multi_face_landmarks:
for face_landmarks in results.multi_face_landmarks:
for idx, lm in enumerate(face_landmarks.landmark):
if idx == 33 or idx == 263 or idx == 1 or idx == 61 or idx == 291 or idx == 199:
if idx == 1:
nose_2d = (lm.x * img_w, lm.y * img_h)
nose_3d = (lm.x * img_w, lm.y * img_h, lm.z * 3000)
x, y = int(lm.x * img_w), int(lm.y * img_h)
# Get the 2D Coordinates
face_2d.append([x, y])
# Get the 3D Coordinates
face_3d.append([x, y, lm.z])
# Convert it to the NumPy array
face_2d = np.array(face_2d, dtype=np.float64)
# Convert it to the NumPy array
face_3d = np.array(face_3d, dtype=np.float64)
# The camera matrix
focal_length = 1 * img_w
cam_matrix = np.array([ [focal_length, 0, img_h / 2],
[0, focal_length, img_w / 2],
[0, 0, 1]])
# The distortion parameters
dist_matrix = np.zeros((4, 1), dtype=np.float64)
# Solve PnP
success, rot_vec, trans_vec = cv2.solvePnP(face_3d, face_2d, cam_matrix, dist_matrix)
# Get rotational matrix
rmat, jac = cv2.Rodrigues(rot_vec)
# Get angles
angles, mtxR, mtxQ, Qx, Qy, Qz = cv2.RQDecomp3x3(rmat)
# Get the y rotation degree
x = angles[0] * 360
y = angles[1] * 360
z = angles[2] * 360
# See where the user's head tilting
if y < -10:
# i want to know how long this guy been looking to his left or right or up or down
text = "Looking Left"
elif y > 10:
text = "Looking Right"
elif x < -10:
text = "Looking Down"
elif x > 10:
text = "Looking Up"
else:
text = "Forward"
# Display the nose direction
nose_3d_projection, jacobian = cv2.projectPoints(nose_3d, rot_vec, trans_vec, cam_matrix, dist_matrix)
p1 = (int(nose_2d[0]), int(nose_2d[1]))
p2 = (int(nose_2d[0] + y * 10) , int(nose_2d[1] - x * 10))
cv2.line(image, p1, p2, (255, 0, 0), 3)
# Add the text on the image
cv2.putText(image, text, (20, 50), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 0), 2)
cv2.putText(image, "x: " + str(np.round(x,2)), (500, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
cv2.putText(image, "y: " + str(np.round(y,2)), (500, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
cv2.putText(image, "z: " + str(np.round(z,2)), (500, 150), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
# need some fix here
# end = time.time()
# totalTime = end - start
# fps = 1 / totalTime
#print("FPS: ", fps)
#cv2.putText(image, f'FPS: {int(fps)}', (20,450), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0,255,0), 2)
mp_drawing.draw_landmarks(
image=image,
landmark_list=face_landmarks,
connections=mp_face_mesh.FACEMESH_CONTOURS,
landmark_drawing_spec=drawing_spec,
connection_drawing_spec=drawing_spec)
cv2.imshow('Head Pose Estimation', image)
if cv2.waitKey(5) & 0xFF == 27:
break
cap.release()
I have the code, where it draws the line over the image, but here I can see directly the result.
How can I change the code in order to observe how the line is drawing.
import cv2
path = r'test.jpeg'
image = cv2.imread(path)
window_name = 'Image'
start_point = (0, 0)
end_point = (250, 250)
color = (0, 255, 0)
thickness = 9
image = cv2.line(image, start_point, end_point, color, thickness)
cv2.imshow(window_name, image)
cv2.waitKey()
Try this for example. I did not add image. You can suit yourself.
import cv2
import numpy as np
import time
Frame_out = np.zeros((500, 640, 3),np.uint8)
a = 1
while a < 255:
cv2.line(Frame_out,(0,0),(a*2,a*2),(0,0,255-a),9)
time.sleep(0.05)
cv2.imshow('Animation', Frame_out)
cv2.line(Frame_out,(0,0),(a*2,a*2),(0,0,0),9)
a += 2
if(a > 254):
a = 1
k = cv2.waitKey(10)
if k == 27:
break
cv2.destroyAllWindows()
I am making a object detection project.
I have my code. And I have written it by following a tutorial. In the tutorial, the guy drew a rectangle in opencv for every single object which is detected.
But I want to change the rectangle to triangle or Arrow.
let me explain with code===>
In my function, I detect objects.
And here I draw rectangle for detected objects==>
cv2.rectangle(img, (x, y), (x+w,y+h), (255, 0 , 255), 2)
But I want to change this rectangle to a triangle.(And I want to set position of triangle to above of object.
Just like in these images:::
This is the object detection with triangle
[![enter image description here][1]][1]
This is the thing that what I want to make instead of rectangle:::
[![enter image description here][2]][2]
How Can I make a triangle/arrow with positions of my detected objects?
All of my code is here==>
from os.path import sep
import cv2 as cv2
import numpy as np
import json
# Camera feed
cap_cam = cv2.VideoCapture(0)
ret, frame_cam = cap_cam.read()
hey = 0
print(cv2. __version__)
whT = 320
confThreshold =0.5
nmsThreshold= 0.2
classesFile = "coco.names"
classNames = []
with open(classesFile, 'rt') as f:
classNames = f.read().rstrip('\n').split('\n')
print(classNames)
## Model Files
modelConfiguration = "custom-yolov4-tiny-detector.cfg"
modelWeights = "custom-yolov4-tiny-detector_last.weights"
net = cv2.dnn.readNetFromDarknet(modelConfiguration, modelWeights)
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)
def findObjects(outputs,img):
global hey
global previousHey
hT, wT, cT = img.shape
bbox = []
classIds = []
confs = []
for output in outputs:
for det in output:
scores = det[5:]
classId = np.argmax(scores)
confidence = scores[classId]
if confidence > confThreshold:
w,h = int(det[2]*wT) , int(det[3]*hT)
x,y = int((det[0]*wT)-w/2) , int((det[1]*hT)-h/2)
bbox.append([x,y,w,h])
classIds.append(classId)
confs.append(float(confidence))
global indicates
indices = cv2.dnn.NMSBoxes(bbox, confs, confThreshold, nmsThreshold)
hey = 0
for i in indices:
i = i[0]
box = bbox[i]
x, y, w, h = box[0], box[1], box[2], box[3]
# print(x,y,w,h)
cv2.rectangle(img, (x, y), (x+w,y+h), (255, 0 , 255), 2)
#cv2.line(img, (350,400), (x, y), (255,0,0), 4)
#cv2.line(img, (400,400), (x + 50 , y), (255,0,0), 4)
#cv.putText(img,f'{classNames[classIds[i]].upper()} {int(confs[i]*100)}%',
#(x, y-10), cv.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 2)
print('success')
hey = 1
video_frame_counter = 0
while cap_cam.isOpened():
img = cv2.imread('photos' + sep + 'lutfen.jpg')
#BURADA OK VİDEOSU OYNATILACAK
#if not decetiona diye dene yarın.
blob = cv2.dnn.blobFromImage(img, 1 / 255, (whT, whT), [0, 0, 0], 1, crop=False)
net.setInput(blob)
layersNames = net.getLayerNames()
outputNames = [(layersNames[i[0] - 1]) for i in net.getUnconnectedOutLayers()]
outputs = net.forward(outputNames)
findObjects(outputs,img)
cv2.imshow('Image', img)
# Video feed
if hey == 1:
filename = 'photos' + sep + 'Baslksz-3.mp4'
cap_vid = cv2.VideoCapture(filename)
if hey == 0:
filename = 'photos' + sep + 'vid2.mp4'
cap_vid = cv2.VideoCapture(filename)
print(hey)
ret, frame_vid = cap_vid.read()
#cap_cam.set(cv2.CAP_PROP_FRAME_WIDTH, 1920)
#cap_cam.set(cv2.CAP_PROP_FRAME_HEIGHT, 1080)
# Resize the camera frame to the size of the video
height = int(cap_vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
width = int(cap_vid.get(cv2.CAP_PROP_FRAME_WIDTH))
# Capture the next frame from camera
ret, frame_cam = cap_cam.read()
video_frame_counter += 1
if video_frame_counter == cap_vid.get(cv2.CAP_PROP_FRAME_COUNT):
video_frame_counter = 0
cap_vid.set(cv2.CAP_PROP_POS_FRAMES, 0)
frame_cam = cv2.resize(frame_cam, (width, height), interpolation = cv2.INTER_AREA)
#ret = cap_vid.set(cv2.CAP_PROP_POS_MSEC, time_passed)
ret, frame_vid = cap_vid.read()
if not ret:
print('Cannot read from video stream')
break
# Blend the two images and show the result
tr = 0.4 # transparency between 0-1, show camera if 0
frame = ((1-tr) * frame_cam.astype(np.float) + tr * frame_vid.astype(np.float)).astype(np.uint8)
cv2.imshow('Transparent result', frame)
if cv2.waitKey(1) == 27: # ESC is pressed
break
cap_cam.release()
cap_vid.release()
cv2.destroyAllWindows()
The easy way
You can use the cv.arrowedLine() function that will draw something similar to what you want. For example, to draw a red arrow above your rectangle:
center_x = x + w//2
cv2.arrowedLine(img, (center_x, y-50), (center_x, y-5), (0,0,255), 2, 8, 0, 0.5)
which should give a result similar to the image below. Take a look at the OpenCV documentation for the description of the parameters of the function. You can change its size, thickness, color, etc.
Custom arrow shape
If you want more control over the shape of your arrow, you can define a contour (vertex by vertex) and use cv.drawContours() to render it. For example:
# define the arrow shape
shape = np.array([[[0,0],[-25,-25],[-10,-25],[-10,-50],
[10,-50],[10,-25],[25,-25]]])
# move it to the desired position
cx = x + w // 2
cy = y - 5
shape[:,:,0] += cx
shape[:,:,1] += cy
# draw it
cv2.drawContours(img, shape, -1, (0, 255, 0), -1)
This snippet will give you the image below. You can adjust the shape by altering the vertices in the shape array, or look at the documentation to change the way OpenCV draws it.
Is there any way i can make my own train set for face recognition in python ? To be more specific i want to make a train set like an AT&T Face database. I want my camera to take 20 images of each person(30 max) and store it in the separate folders by the name of each person.
import cv2, sys, numpy, os
size = 4
fn_haar = 'haarcascade_frontalface_default.xml'
fn_dir = 'att_faces'
fn_name = sys.argv[1]
path = os.path.join(fn_dir, fn_name)
if not os.path.isdir(path):
os.mkdir(path)
(im_width, im_height) = (112, 92)
haar_cascade = cv2.CascadeClassifier(fn_haar)
webcam = cv2.VideoCapture(0)
# The program loops until it has 20 images of the face.
count = 0
while count < 20:
(rval, im) = webcam.read()
im = cv2.flip(im, 1, 0)
gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
mini = cv2.resize(gray, (gray.shape[1] / size, gray.shape[0] / size))
faces = haar_cascade.detectMultiScale(mini)
faces = sorted(faces, key=lambda x: x[3])
if faces:
face_i = faces[0]
(x, y, w, h) = [v * size for v in face_i]
face = gray[y:y + h, x:x + w]
face_resize = cv2.resize(face, (im_width, im_height))
pin=sorted([int(n[:n.find('.')]) for n in os.listdir(path)
if n[0]!='.' ]+[0])[-1] + 1
cv2.imwrite('%s/%s.png' % (path, pin), face_resize)
cv2.rectangle(im, (x, y), (x + w, y + h), (0, 255, 0), 3)
cv2.putText(im, fn_name, (x - 10, y - 10), cv2.FONT_HERSHEY_PLAIN,
1,(0, 255, 0))
count += 1
cv2.imshow('OpenCV', im)
key = cv2.waitKey(10)
if key == 27:
break
For this, you just need to provide a particular path to save all the image with (.png) or (.bmp) or (.jpg) extension in a sorted manner.
# train.py
import cv2, sys, numpy, os
size = 4
fn_haar = 'haarcascade_frontalface_default.xml'
fn_dir = 'face_data'
fn_name = sys.argv[0]
path = os.path.join(fn_dir, fn_name)
(im_width, im_height) = (112, 92)
haar_cascade = cv2.CascadeClassifier(fn_haar)
webcam = cv2.VideoCapture(0)
# Generate name for image file
pin=sorted([int(n[:n.find('.')]) for n in os.listdir(path)
if n[0]!='.' ]+[0])[-1] + 1
# Beginning message
print("\n\033[94mThe program will save 20 samples. \
Move your head around to increase while it runs.\033[0m\n")
# The program loops until it has 20 images of the face.
count = 0
pause = 0
count_max = 20
while count < count_max:
# Loop until the camera is working
rval = False
while(not rval):
# Put the image from the webcam into 'frame'
(rval, frame) = webcam.read()
if(not rval):
print("Failed to open webcam. Trying again...")
# Get image size
height, width, channels = frame.shape
# Flip frame
frame = cv2.flip(frame, 1, 0)
# Convert to grayscale
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# Scale down for speed
mini = cv2.resize(gray, (int(gray.shape[1] / size), int(gray.shape[0] / size)))
# Detect faces
faces = haar_cascade.detectMultiScale(mini)
# We only consider largest face
faces = sorted(faces, key=lambda x: x[3])
if faces:
face_i = faces[0]
(x, y, w, h) = [v * size for v in face_i]
face = gray[y:y + h, x:x + w]
face_resize = cv2.resize(face, (im_width, im_height))
# Draw rectangle and write name
cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 3)
cv2.putText(frame, fn_name, (x - 10, y - 10), cv2.FONT_HERSHEY_PLAIN,
1,(0, 255, 0))
# Remove false positives
if(w * 6 < width or h * 6 < height):
print("Face too small")
else:
# To create diversity, only save every fith detected image
if(pause == 0):
print("Saving training sample "+str(count+1)+"/"+str(count_max))
# Save image file
cv2.imwrite('%s/%s.png' % (path, pin), face_resize)
pin += 1
count += 1
pause = 1
if(pause > 0):
pause = (pause + 1) % 5
cv2.imshow('OpenCV', frame)
key = cv2.waitKey(10)
if key == 27:
break
This code will help you to get the cropped images from the webcam and store them in a directory name as face_data for training purpose.
In case, you don't want to train your dataset from webcam, you can simply do one thing:
that just create a directory and create 5-6 sub-directory in it as in Happy, Sad, Angry, Neutral, Calm, etc.
Download the images and put them in corresponding folders for training purpose, now follow this code.
## This program first ensures if the face of a person exists in the given image or
not then if it exists, it crops
## the image of the face and saves to the given directory.
## Importing Modules
import cv2
import os
directory = "C:\\Users\\hp"
## directory where the images to be saved:
f_directory = "C:\\Users\\hp\\face_data/"
def facecrop(image):
## Crops the face of a person from an image!
## OpenCV XML FILE for Frontal Facial Detection using HAAR CASCADES.
facedata=
"C:\\opencv\\build\\etc\\haarcascades\\haarcascade_frontalface_default.xml"
cascade = cv2.CascadeClassifier(facedata)
## Reading the given Image with OpenCV
img = cv2.imread(image)
try:
minisize = (img.shape[1],img.shape[0])
miniframe = cv2.resize(img, minisize)
faces = cascade.detectMultiScale(miniframe)
for f in faces:
x, y, w, h = [ v for v in f ]
cv2.rectangle(img, (x,y), (x+w,y+h), (0,255,0), 2)
sub_face = img[y:y+h, x:x+w]
f_name = image.split('/')
f_name = f_name[-1]
## Change here the Desired directory.
cv2.imwrite(f_directory + f_name, sub_face)
print ("Writing: " + image)
except:
pass
if __name__ == '__main__':
images = os.listdir(directory)
i = 0
for img in images:
file = directory + img
print (i)
facecrop(file)
i += 1
After manipulating some code found on the net, (http://www.davidhampgonsalves.com/2011/05/OpenCV-Python-Color-Based-Object-Tracking) I am attempting to track coloured objects. While running the code, I have changed the input from a camera to an AVI file. When the code runs, there is no video and the code at line 40,
moments = cv.Moments(thresholded_img, 0)
returns a TypeError on argument: 'ܤ'. (It's basically a string of symbols I don't understand). Any help would be appreciated and I will postt he source code below in full.
Many Thanks
John
import cv
color_tracker_window = "Color Tracker"
class ColorTracker:
def __init__(self):
cv.NamedWindow( color_tracker_window, 1 )
f = 'video.avi'
self.capture = cv.CaptureFromFile(f)
def run(self):
while True:
img = cv.QueryFrame( self.capture )
#blur the source image to reduce color noise
cv.Smooth(img, img, cv.CV_BLUR, 3);
#convert the image to hsv(Hue, Saturation, Value) so its
#easier to determine the color to track(hue)
hsv_img = cv.CreateImage(cv.GetSize(img), 8, 3)
cv.CvtColor(img, hsv_img, cv.CV_BGR2HSV)
#limit all pixels that don't match our criteria, in this case we are
#looking for purple but if you want you can adjust the first value in
#both turples which is the hue range(120,140). OpenCV uses 0-180 as
#a hue range for the HSV color model
thresholded_img = cv.CreateImage(cv.GetSize(hsv_img), 8, 1)
cv.InRangeS(hsv_img, (120, 80, 80), (140, 255, 255), thresholded_img)
#determine the objects moments and check that the area is large
#enough to be our object
moments = cv.Moments(thresholded_img, 0)
area = cv.GetCentralMoment(moments, 0, 0)
data = []
#there can be noise in the video so ignore objects with small areas
if(area > 100000):
#determine the x and y coordinates of the center of the object
#we are tracking by dividing the 1, 0 and 0, 1 moments by the area
x = cv.GetSpatialMoment(moments, 1, 0)/area
y = cv.GetSpatialMoment(moments, 0, 1)/area
print 'x: ' + str(x) + ' y: ' + str(y) + ' area: ' + str(area)
data.append(x, y, area)
#create an overlay to mark the center of the tracked object
overlay = cv.CreateImage(cv.GetSize(img), 8, 3)
cv.Circle(overlay, (x, y), 2, (255, 255, 255), 20)
cv.Add(img, overlay, img)
#add the thresholded image back to the img so we can see what was
#left after it was applied
cv.Merge(thresholded_img, None, None, None, img)
#display the image
cv.ShowImage(color_tracker_window, img)
if cv.WaitKey(10) == 27:
break
if __name__=="__main__":
color_tracker = ColorTracker()
color_tracker.run()
You have to change the following:
moments = cv.Moments(thresholded_img, 0)
moments = cv.Moments(cv.GetMat(thresholded_img,1), 0)
and
cv.Circle(overlay, (x, y), 2, (255, 255, 255), 20)
cv.Circle(img, (int(x), int(y)), 2, (255, 255, 255), 20)
(Source: http://blog.goo.ne.jp/roboz80/e/16ea5be9a9eaf370046035be841b4bfd)