When I ran this sample code on window with opencv 3.0.0, sift = cv2.SIFT() works, and when I ran the same code with change to sift = cv2.xfeatures2d.SIFT_create() in Raspberry Pi with opencv 3.2.0, it shows this error,
"global name 'SIFT' is not defined"
#!/usr/bin/env python
# Copyright 2014 Jarmo Puttonen <jarmo.puttonen#gmail.com>. All rights reserved.
# Use of this source code is governed by a MIT-style
# licence that can be found in the LICENCE file.
"""Detect speed limits from webcam feed"""
import re
import os
import sys
import cv2
import threading
import numpy as np
import argparse
#from gps import gps, WATCH_ENABLE
def onTrackbarChange(_):
"""Pass any trackbar changes"""
class GPSInfo(threading.Thread):
"""Thread that takes care of updating GPS Speed info."""
def __init__(self):
threading.Thread.__init__(self)
self.gpsd = gps(mode=WATCH_ENABLE)
self.running = True
self.speed = 0
def run(self):
while self.running:
self.gpsd.next()
self.speed = self.gpsd.fix.speed*3.6 #from m/s to km/h
def read_paths(path):
"""Returns a list of files in given path"""
images = \[\[\] for _ in range(2)\]
for dirname, dirnames, _ in os.walk(path):
for subdirname in dirnames:
filepath = os.path.join(dirname, subdirname)
for filename in os.listdir(filepath):
try:
imgpath = str(os.path.join(filepath, filename))
images\[0\].append(imgpath)
limit = re.findall('\[0-9\]+', filename)
images\[1\].append(limit\[0\])
except IOError, (errno, strerror):
print "I/O error({0}): {1}".format(errno, strerror)
except:
print "Unexpected error:", sys.exc_info()\[0\]
raise
return images
def load_images(imgpath):
"""Loads images in given path and returns
a list containing image and keypoints"""
images = read_paths(imgpath)
imglist = \[\[\], \[\], \[\], \[\]\]
cur_img = 0
SIFT = cv2.xfeatures2d.SIFT_create()
for i in images\[0\]:
img = cv2.imread(i, 0)
imglist\[0\].append(img)
imglist\[1\].append(images\[1\]\[cur_img\])
cur_img += 1
keypoints, des = SIFT.detectAndCompute(img, None)
imglist\[2\].append(keypoints)
imglist\[3\].append(des)
return imglist
def run_flann(img):
"""Run FLANN-detector for given image with given image list"""
# Find the keypoint descriptors with SIFT
_, des = SIFT.detectAndCompute(img, None)
if des is None:
return "Unknown", 0
if len(des) < ARGS.MINKP:
return "Unknown", 0
biggest_amnt = 0
biggest_speed = 0
cur_img = 0
try:
for _ in IMAGES\[0\]:
des2 = IMAGES\[3\]\[cur_img\]
matches = FLANN.knnMatch(des2, des, k=2)
matchamnt = 0
# Find matches with Lowe's ratio test
for _, (moo, noo) in enumerate(matches):
if moo.distance < ARGS.FLANNTHRESHOLD*noo.distance:
matchamnt += 1
if matchamnt > biggest_amnt:
biggest_amnt = matchamnt
biggest_speed = IMAGES\[1\]\[cur_img\]
cur_img += 1
if biggest_amnt > ARGS.MINKP:
return biggest_speed, biggest_amnt
else:
return "Unknown", 0
except Exception, exept:
print exept
return "Unknown", 0
IMAGES = load_images("data")
def run_logic():
"""Run TSR and ISA"""
lastlimit = "00"
lastdetect = "00"
downscale = ARGS.DOWNSCALE
matches = 0
possiblematch = "00"
try:
if CAP.isOpened():
rval, frame = CAP.read()
print("Camera opened and frame read")
else:
rval = False
print("Camera not opened")
while rval:
origframe = frame
if ARGS.MORPH:
frame = cv2.morphologyEx(
frame,
cv2.MORPH_OPEN,
cv2.getStructuringElement(cv2.MORPH_RECT,(2,2))
)
frame = cv2.morphologyEx(
frame,
cv2.MORPH_CLOSE,
cv2.getStructuringElement(cv2.MORPH_RECT,(2,2))
)
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
if ARGS.EQ:
cv2.equalizeHist(frame, frame)
if ARGS.TRACKBARS:
ARGS.MINKP = cv2.getTrackbarPos('MINKEYPOINTS','preview')
downscale = cv2.getTrackbarPos('DOWNSCALE','preview')
ARGS.FLANNTHRESHOLD = float(
cv2.getTrackbarPos('FLANNTHRESHOLD','preview')
)/10
ARGS.CHECKS = cv2.getTrackbarPos('FLANNCHECKS','preview')
ARGS.TREES = cv2.getTrackbarPos('FLANNTREES','preview')
scaledsize = (frame.shape\[1\]/downscale, frame.shape\[0\]/downscale)
scaledframe = cv2.resize(frame, scaledsize)
# Detect signs in downscaled frame
signs = CLASSIFIER.detectMultiScale(
scaledframe,
1.1,
5,
0,
(10, 10),
(200, 200))
for sign in signs:
xpos, ypos, width, height = \[ i*downscale for i in sign \]
crop_img = frame\[ypos:ypos+height, xpos:xpos+width\]
sized = cv2.resize(crop_img, (128, 128))
comp = "Unknown"
comp, amnt = run_flann(sized)
if comp != "Unknown":
if comp != lastlimit:
# Require two consecutive hits for new limit.
if comp == lastdetect:
possiblematch = comp
matches = matches + 1
if matches >= ARGS.matches:
print "New speed limit: "+possiblematch
lastlimit = possiblematch
matches = 0
else:
possiblematch = "00"
matches = 0
cv2.rectangle(
origframe,
(xpos, ypos),
(xpos+width, ypos+height),
(0, 0, 255))
cv2.putText(
origframe,
"Speed limit: "+comp+" KP: "+str(amnt),
(xpos,ypos-5),
cv2.FONT_HERSHEY_SIMPLEX,
0.4,
(0,0,255),
1,)
else:
cv2.rectangle(
origframe,
(xpos,ypos),
(xpos+width,ypos+height),
(255,0,0))
cv2.putText(
origframe,
"UNKNOWN SPEED LIMIT",
(xpos,ypos-5),
cv2.FONT_HERSHEY_SIMPLEX,
0.4,
(255,0,0),
1,)
comp = lastdetect
lastdetect = comp
cv2.putText(
origframe,
"Current speed limit: "+str(lastlimit)+" km/h.",
(5,50),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(0,0,0),
2
)
cv2.imshow("preview", origframe)
if ARGS.PREVIEW:
cv2.imshow("preview", origframe)
_ = cv2.waitKey(20)
rval, frame = CAP.read()
except (KeyboardInterrupt, Exception), exept:
print exept
## if ARGS.GPS:
## print "Killing GPS"
## GPSP.running = False
## GPSP.join()
## print "Shutting down!"
# Preload all classes used in detection
sift = cv2.xfeatures2d.SIFT_create()
INDEX_PARAMS = None
SEARCH_PARAMS = None
FLANN = None
## Webcam logic starts
CAP = None
ARGS = None
if __name__ == "__main__":
PARSER = argparse.ArgumentParser(
description="Traffic sign recognition and intelligent speed assist.",
)
PARSER.add_argument("-d", "--device", dest="SOURCE", default=0,
help="Index of used video device. Default: 0 (/dev/video0).")
PARSER.add_argument("-g", "--gps",
dest="GPS", action="store_true", default=False,
help="Enable over speeding detection.")
PARSER.add_argument("-o", "--overspeed",
dest="COMMAND", default="false",
help="Command used in overspeed warning." \
" Default: echo OVERSPEEDING!.")
PARSER.add_argument("-c", "--cascade",
dest="CASCADE", default="lbpCascade.xml",
help="Cascade used in speed sign detection." \
" Default: lbpCascade.xml.")
PARSER.add_argument("-k", "--keypoints",
dest="MINKP", default=5,
help="Min amount of keypoints required in" \
" limit recognition. Default: 5.")
PARSER.add_argument("-D", "--downscale",
dest="DOWNSCALE", default=1,
help="Multiplier for downscaling frame before" \
" detecting signs. Default: 1.")
PARSER.add_argument("-f", "--flann",
dest="FLANNTHRESHOLD", default=0.8,
help="Threshold multiplier for accepting FLANN matches." \
" Default: 0.8.")
PARSER.add_argument("-F", "--flannchecks",
dest="CHECKS", default=50,
help="How many checks will be done in FLANN matching." \
" Default: 50.")
PARSER.add_argument("-t", "--flanntrees",
dest="TREES", default=5,
help="How many trees will be used in FLANN matching." \
" Default: 5.")
PARSER.add_argument("-m", "--matches",
dest="matches", default=2,
help="How many consecutive keypoint matches are needed" \
" before setting new limit. Default: 2.")
PARSER.add_argument("-e", "--disable-eq",
dest="EQ", action="store_false", default=True,
help="Disable histogram equalization.")
PARSER.add_argument("-M", "--morphopenclose",
dest="MORPH", action="store_true", default=False,
help="Enable morphological open/close used in removing" \
" noise from image.")
PARSER.add_argument("-T", "--trackbars",
dest="TRACKBARS", action="store_true", default=False,
help="Enable debug trackbars.")
PARSER.add_argument("-s", "--showvid",
dest="PREVIEW", action="store_true", default=False,
help="Show output video with detections.")
ARGS = PARSER.parse_args()
CAP = cv2.VideoCapture(0)
CLASSIFIER = cv2.CascadeClassifier(ARGS.CASCADE)
INDEX_PARAMS = dict(algorithm = 0, trees = ARGS.TREES)
SEARCH_PARAMS = dict(checks=ARGS.CHECKS) # or pass empty dictionary
FLANN = cv2.FlannBasedMatcher(INDEX_PARAMS, SEARCH_PARAMS)
if ARGS.PREVIEW:
cv2.namedWindow("preview")
if ARGS.GPS:
GPSP = GPSInfo()
GPSP.start()
if ARGS.TRACKBARS:
cv2.createTrackbar(
'MINKEYPOINTS',
'preview',
ARGS.MINKP,
100,
onTrackbarChange)
cv2.createTrackbar(
'DOWNSCALE',
'preview',
int(ARGS.DOWNSCALE),
20,
onTrackbarChange)
cv2.createTrackbar(
'FLANNTHRESHOLD',
'preview',
8,
10,
onTrackbarChange)
cv2.createTrackbar(
'FLANNCHECKS',
'preview',
ARGS.CHECKS,
1000,
onTrackbarChange)
cv2.createTrackbar(
'FLANNTREES',
'preview',
ARGS.TREES,
50,
onTrackbarChange)
run_logic()][1]
In OpenCV 3.2.0, the xfeatures2d module is moved to opencv_contrib. You may verify if your RPI cv2 installation has it or not by help(cv.xfeatures2d) and help(cv2.xfeatures2d.SIFT_create()). If that's the case, you need to find a OpenCV binary has been built with opencv_contrib modules if you didn't build it yourself.
Related
So i have a folder with images I am trying to make cluster of near similar images in a python dictionary with Imagehash as the key and a list of similar images as their value, how can i prevent for a image to generate a new key if it's already present in any of the other dictionary key list here is the code that I have done so far:
from PIL import Image
import imagehash
import cv2
import numpy as np
import dhash
import distance
norm_cache: dict = dict()
def _get_image(image_path: str) -> Image:
try:
img_arr = cv2.imread(image_path)
img_arr = cv2.resize(img_arr, (512, 512), interpolation=cv2.INTER_AREA)
# Convert image into 3 channels if it contains 4
if len(img_arr.shape) > 2 and img_arr.shape[2] == 4:
img_arr = cv2.cvtColor(img_arr, cv2.COLOR_BGRA2BGR)
# using W3C luminance calc to convert into gray-scale
data = np.inner(img_arr, [299, 587, 114]) / 1000.0
return Image.fromarray(np.uint8(data),"L")
except SyntaxError:
pass
def find_similar_images(userpath):
import os
global norm_cache
def is_image(filename):
f = filename.lower()
return f.endswith(".png") or f.endswith(".jpg") or \
f.endswith(".jpeg") or f.endswith(".bmp") or f.endswith(".gif")
image_filenames = [os.path.join(userpath, path) for path in os.listdir(userpath) if is_image(path)]
images = {}
buffer = []
for img in image_filenames:
if (len(buffer) == 0):
print("Original list is empty, Appending first image to buffer.")
buffer.append(img)
continue
gray1 = _get_image(img)
h1r,h1c = dhash.dhash_row_col(gray1)
hash1 = dhash.format_hex(h1r,h1c)
images[hash1] = images.get(hash1, []) + [img]
for each in buffer:
if each in norm_cache:
print(f"cached val found for {each}")
gray2 = norm_cache[each]
h2r,h2c = dhash.dhash_row_col(gray2)
hash2 = dhash.format_hex(h2r,h2c)
else:
print("No cached_val found, Computing and storing in norm_cache")
gray2 = _get_image(each)
h2r,h2c = dhash.dhash_row_col(gray2)
hash2 = dhash.format_hex(h2r,h2c)
norm_cache[each] = gray2 # Update cache...
print(f"Comparing ---> {img}:{hash1} with {each}:{hash2}")
if(distance.hamming(hash1,hash2) <= 22):
//what should i put in here
unique = 0
for k, img_list in images.items():
if(len(img_list) >= 1):
print(''.join(img_list))
unique = unique + 1
print(unique)
if __name__ == '__main__':
import sys, os
userpath = <Image folder/>
find_similar_images(userpath=userpath)
I am trying to save images from the Raspi cameras connected to my Jetson nano. My Code is below. However, the code shows that it is saving the files, but no matter which method I try I cannot find the images. Thanks for your help. I've included a smaller snippet of just the while loop itself so it will be easier for you all to refer to.
While loop:
while True:
_ , left_image=left_camera.read()
_ , right_image=right_camera.read()
camera_images = np.hstack((left_image, right_image))
cv2.imshow("CSI Cameras", camera_images)
t1 = datetime.now()
cntdwn_timer = countdown - int ((t1-t2).total_seconds())
# If cowntdown is zero - let's record next image
if cntdwn_timer == -1:
counter += 1
filename = './scenes/scene_'+ str(counter) + 'x'+'_'+ '.png'
#img = cv2.imread(camera_images)
#cv2.imwrite(os.path.join(os.path.expanduser('~'),'CSI-Camera', filename), camera_images)
cv2.imwrite('/home/aryan/CSI-Camera/{}'.format(filename), camera_images)
print (' monkey'+filename)
t2 = datetime.now()
time.sleep(1)
cntdwn_timer = 0 # To avoid "-1" timer display
next
# This also acts as
keyCode = cv2.waitKey(30) & 0xFF
# Stop the program on the ESC key
if keyCode == 27:
break
left_camera.stop()
left_camera.release()
right_camera.stop()
right_camera.release()
cv2.destroyAllWindows()
import cv2
import threading
import numpy as np
import time
from datetime import datetime
# gstreamer_pipeline returns a GStreamer pipeline for capturing from the CSI camera
# Flip the image by setting the flip_method (most common values: 0 and 2)
# display_width and display_height determine the size of each camera pane in the window on the screen
left_camera = None
right_camera = None
#PiCam
# Photo session settings
total_photos = 30 # Number of images to take
countdown = 5 # Interval for count-down timer, seconds
font=cv2.FONT_HERSHEY_SIMPLEX # Cowntdown timer font
class CSI_Camera:
def __init__ (self) :
# Initialize instance variables
# OpenCV video capture element
self.video_capture = None
# The last captured image from the camera
self.frame = None
self.grabbed = False
# The thread where the video capture runs
self.read_thread = None
self.read_lock = threading.Lock()
self.running = False
def open(self, gstreamer_pipeline_string):
try:
self.video_capture = cv2.VideoCapture(
gstreamer_pipeline_string, cv2.CAP_GSTREAMER
)
except RuntimeError:
self.video_capture = None
print("Unable to open camera")
print("Pipeline: " + gstreamer_pipeline_string)
return
# Grab the first frame to start the video capturing
self.grabbed, self.frame = self.video_capture.read()
def start(self):
if self.running:
print('Video capturing is already running')
return None
# create a thread to read the camera image
if self.video_capture != None:
self.running=True
self.read_thread = threading.Thread(target=self.updateCamera)
self.read_thread.start()
return self
def stop(self):
self.running=False
self.read_thread.join()
def updateCamera(self):
# This is the thread to read images from the camera
while self.running:
try:
grabbed, frame = self.video_capture.read()
with self.read_lock:
self.grabbed=grabbed
self.frame=frame
except RuntimeError:
print("Could not read image from camera")
# FIX ME - stop and cleanup thread
# Something bad happened
def read(self):
with self.read_lock:
frame = self.frame.copy()
grabbed=self.grabbed
return grabbed, frame
def release(self):
if self.video_capture != None:
self.video_capture.release()
self.video_capture = None
# Now kill the thread
if self.read_thread != None:
self.read_thread.join()
# Currently there are setting frame rate on CSI Camera on Nano through gstreamer
# Here we directly select sensor_mode 3 (1280x720, 59.9999 fps)
def gstreamer_pipeline(
sensor_id=0,
sensor_mode=3,
capture_width=1280,
capture_height=720,
display_width=1280,
display_height=720,
framerate=30,
flip_method=0,
):
return (
"nvarguscamerasrc sensor-id=%d sensor-mode=%d ! "
"video/x-raw(memory:NVMM), "
"width=(int)%d, height=(int)%d, "
"format=(string)NV12, framerate=(fraction)%d/1 ! "
"nvvidconv flip-method=%d ! "
"video/x-raw, width=(int)%d, height=(int)%d, format=(string)BGRx ! "
"videoconvert ! "
"video/x-raw, format=(string)BGR ! appsink"
% (
sensor_id,
sensor_mode,
capture_width,
capture_height,
framerate,
flip_method,
display_width,
display_height,
)
)
def start_cameras():
left_camera = CSI_Camera()
left_camera.open(
gstreamer_pipeline(
sensor_id=0,
sensor_mode=3,
flip_method=0,
display_height=540,
display_width=960,
)
)
left_camera.start()
right_camera = CSI_Camera()
right_camera.open(
gstreamer_pipeline(
sensor_id=1,
sensor_mode=3,
flip_method=0,
display_height=540,
display_width=960,
)
)
right_camera.start()
cv2.namedWindow("CSI-AV Cameras", cv2.WINDOW_AUTOSIZE)
if (
not left_camera.video_capture.isOpened()
or not right_camera.video_capture.isOpened()
):
# Cameras did not open, or no camera attached
print("Unable to open any cameras")
# TODO: Proper Cleanup
SystemExit(0)
counter = 0
t2 = datetime.now()
#Main stuff here
while True:
_ , left_image=left_camera.read()
_ , right_image=right_camera.read()
camera_images = np.hstack((left_image, right_image))
cv2.imshow("CSI Cameras", camera_images)
t1 = datetime.now()
cntdwn_timer = countdown - int ((t1-t2).total_seconds())
# If cowntdown is zero - let's record next image
if cntdwn_timer == -1:
counter += 1
filename = './scenes/scene_'+ str(counter) + 'x'+'_'+ '.png'
#img = cv2.imread(camera_images)
#cv2.imwrite(os.path.join(os.path.expanduser('~'),'CSI-Camera', filename), camera_images)
cv2.imwrite('/home/aryan/CSI-Camera/{}'.format(filename), camera_images)
print (' monkey'+filename)
t2 = datetime.now()
time.sleep(1)
cntdwn_timer = 0 # To avoid "-1" timer display
next
# This also acts as
keyCode = cv2.waitKey(30) & 0xFF
# Stop the program on the ESC key
if keyCode == 27:
break
left_camera.stop()
left_camera.release()
right_camera.stop()
right_camera.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
start_cameras()
Put breakpoint on line where you want to save image. Inspect image you want to save:
Does it have data inside?
Does it have camera_images.shape ?
What is return value of cv2.imwrite function?
Does path you are trying to write really exist?
Did you appended .png or .jpg ?
OpenCV Error: Bad argument (This Fisherfaces model is not computed yet. Did you
call Fisherfaces::train?) in cv::face::Fisherfaces::predict,
cv2.error: C:\projects\opencv-python\opencv_contrib\modules\face\src\fisher_faces.cpp:137: error: (-5) This Fisherfaces model is not computed yet. Did you call Fisherfaces::train? in function cv::face::Fisherfaces::predict
import cv2
import numpy as np
import argparse
import time
import glob
import os
import sys
import subprocess
import pandas
import random
import Update_Model
import math
#Define variables and load classifier
camnumber = 0
video_capture = cv2.VideoCapture()
facecascade = cv2.CascadeClassifier("haarcascade_frontalface_default.xml")
fishface = cv2.face.FisherFaceRecognizer_create()
try:
fishface.read("trained_emoclassifier.xml")
except:
print("no trained xml file found, please run program with --update flagfirst")
parser = argparse.ArgumentParser(description="Options for the emotion-based music player")
parser.add_argument("--update", help="Call to grab new images and update the model accordingly", action="store_true")
args = parser.parse_args()
facedict = {}
actions = {}
emotions = ["angry", "happy", "sad", "neutral"]
df = pandas.read_excel("EmotionLinks.xlsx") #open Excel file
actions["angry"] = [x for x in df.angry.dropna()] #We need de dropna() when columns are uneven in length, which creates NaN values at missing places. The OS won't know what to do with these if we try to open them.
actions["happy"] = [x for x in df.happy.dropna()]
actions["sad"] = [x for x in df.sad.dropna()]
actions["neutral"] = [x for x in df.neutral.dropna()]
def open_stuff(): #Open the file, credit to user4815162342, on the stackoverflow link in the text above
if sys.platform == "win32":
os.startfile(filename)
else:
opener ="open" if sys.platform == "darwin" else "xdg-open"
subprocess.call([opener, filename])
def crop_face(clahe_image, face):
for (x, y, w, h) in face:
faceslice = clahe_image[y:y+h, x:x+w]
faceslice = cv2.resize(faceslice, (350, 350))
facedict["face%s" %(len(facedict)+1)] = faceslice
return faceslice
def update_model(emotions):
print("Model update mode active")
check_folders(emotions)
for i in range(0, len(emotions)):
save_face(emotions[i])
print("collected images, looking good! Now updating model...")
Update_Model.update(emotions)
print("Done!")
def check_folders(emotions):
for x in emotions:
if os.path.exists("dataset\\%s" %x):
pass
else:
os.makedirs("dataset\\%s" %x)
def save_face(emotion):
print("\n\nplease look " + emotion + ". Press enter when you're ready to have your pictures taken")
input() #Wait until enter is pressed with the raw_input() method
video_capture.open(0)
while len(facedict.keys()) < 16:
detect_face()
video_capture.release()
for x in facedict.keys():
cv2.imwrite("dataset\\%s\\%s.jpg" %(emotion, len(glob.glob("dataset\\%s\\*" %emotion))), facedict[x])
facedict.clear()
def recognize_emotion():
predictions = []
confidence = []
for x in facedict.keys():
pred, conf = fishface.predict(facedict[x])
cv2.imwrite("images\\%s.jpg" %x, facedict[x])
predictions.append(pred)
confidence.append(conf)
recognized_emotion = emotions[max(set(predictions), key=predictions.count)]
print("I think you're %s" %recognized_emotion)
actionlist = [x for x in actions[recognized_emotion]] #get list of actions/files for detected emotion
random.shuffle(actionlist) #Randomly shuffle the list
open_stuff(actionlist[0]) #Open the first entry in the list
def grab_webcamframe():
ret, frame = video_capture.read()
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
clahe_image = clahe.apply(gray)
return clahe_image
def detect_face():
clahe_image = grab_webcamframe()
face = facecascade.detectMultiScale(clahe_image, scaleFactor=1.1, minNeighbors=15, minSize=(10, 10), flags=cv2.CASCADE_SCALE_IMAGE)
if len(face) == 1:
faceslice = crop_face(clahe_image, face)
recognize_emotion()
return faceslice
else:
print("no/multiple faces detected, passing over frame")
def run_detection():
while len(facedict) != 10:
detect_face()
recognize_emotion = recognize_emotion()
return recognize_emotion
if args.update:
update_model(emotions)
else:
video_capture.open(camnumber)
run_detection()
how can i resolve it?
I am coding a program that can monitor the red dot of a lazer and track the movement. I would like to collect the x and y movement of the laser (deviating from a stable point). Some searching found me this code on git hub which I plan to modify and use to track the red dot using openCV. I am however struggling to pull any data from this.
How would I go about storing the x and y coordinates of the laser in excel (or other useful means)?
import sys
import argparse
import cv2
import numpy
class LaserTracker(object):
def __init__(self, cam_width=640, cam_height=480, hue_min=20, hue_max=160,
sat_min=100, sat_max=255, val_min=200, val_max=256,
display_thresholds=False):
"""
* ``cam_width`` x ``cam_height`` -- This should be the size of the
image coming from the camera. Default is 640x480.
HSV color space Threshold values for a RED laser pointer are determined
by:
* ``hue_min``, ``hue_max`` -- Min/Max allowed Hue values
* ``sat_min``, ``sat_max`` -- Min/Max allowed Saturation values
* ``val_min``, ``val_max`` -- Min/Max allowed pixel values
If the dot from the laser pointer doesn't fall within these values, it
will be ignored.
* ``display_thresholds`` -- if True, additional windows will display
values for threshold image channels.
"""
self.cam_width = cam_width
self.cam_height = cam_height
self.hue_min = hue_min
self.hue_max = hue_max
self.sat_min = sat_min
self.sat_max = sat_max
self.val_min = val_min
self.val_max = val_max
self.display_thresholds = display_thresholds
self.capture = None # camera capture device
self.channels = {
'hue': None,
'saturation': None,
'value': None,
'laser': None,
}
self.previous_position = None
self.trail = numpy.zeros((self.cam_height, self.cam_width, 3),
numpy.uint8)
def create_and_position_window(self, name, xpos, ypos):
"""Creates a named widow placing it on the screen at (xpos, ypos)."""
# Create a window
cv2.namedWindow(name)
# Resize it to the size of the camera image
cv2.resizeWindow(name, self.cam_width, self.cam_height)
# Move to (xpos,ypos) on the screen
cv2.moveWindow(name, xpos, ypos)
def setup_camera_capture(self, device_num=0):
"""Perform camera setup for the device number (default device = 0).
Returns a reference to the camera Capture object.
"""
try:
device = int(device_num)
sys.stdout.write("Using Camera Device: {0}\n".format(device))
except (IndexError, ValueError):
# assume we want the 1st device
device = 0
sys.stderr.write("Invalid Device. Using default device 0\n")
# Try to start capturing frames
self.capture = cv2.VideoCapture(device)
if not self.capture.isOpened():
sys.stderr.write("Faled to Open Capture device. Quitting.\n")
sys.exit(1)
# set the wanted image size from the camera
self.capture.set(
cv2.cv.CV_CAP_PROP_FRAME_WIDTH if cv2.__version__.startswith('2') else cv2.CAP_PROP_FRAME_WIDTH,
self.cam_width
)
self.capture.set(
cv2.cv.CV_CAP_PROP_FRAME_HEIGHT if cv2.__version__.startswith('2') else cv2.CAP_PROP_FRAME_HEIGHT,
self.cam_height
)
return self.capture
def handle_quit(self, delay=10):
"""Quit the program if the user presses "Esc" or "q"."""
key = cv2.waitKey(delay)
c = chr(key & 255)
if c in ['c', 'C']:
self.trail = numpy.zeros((self.cam_height, self.cam_width, 3),
numpy.uint8)
if c in ['q', 'Q', chr(27)]:
sys.exit(0)
def threshold_image(self, channel):
if channel == "hue":
minimum = self.hue_min
maximum = self.hue_max
elif channel == "saturation":
minimum = self.sat_min
maximum = self.sat_max
elif channel == "value":
minimum = self.val_min
maximum = self.val_max
(t, tmp) = cv2.threshold(
self.channels[channel], # src
maximum, # threshold value
0, # we dont care because of the selected type
cv2.THRESH_TOZERO_INV # t type
)
(t, self.channels[channel]) = cv2.threshold(
tmp, # src
minimum, # threshold value
255, # maxvalue
cv2.THRESH_BINARY # type
)
if channel == 'hue':
# only works for filtering red color because the range for the hue
# is split
self.channels['hue'] = cv2.bitwise_not(self.channels['hue'])
def track(self, frame, mask):
"""
Track the position of the laser pointer.
Code taken from
http://www.pyimagesearch.com/2015/09/14/ball-tracking-with-opencv/
"""
center = None
countours = cv2.findContours(mask, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)[-2]
# only proceed if at least one contour was found
if len(countours) > 0:
# find the largest contour in the mask, then use
# it to compute the minimum enclosing circle and
# centroid
c = max(countours, key=cv2.contourArea)
((x, y), radius) = cv2.minEnclosingCircle(c)
moments = cv2.moments(c)
if moments["m00"] > 0:
center = int(moments["m10"] / moments["m00"]), \
int(moments["m01"] / moments["m00"])
else:
center = int(x), int(y)
# only proceed if the radius meets a minimum size
if radius > 10:
# draw the circle and centroid on the frame,
cv2.circle(frame, (int(x), int(y)), int(radius),
(0, 255, 255), 2)
cv2.circle(frame, center, 5, (0, 0, 255), -1)
# then update the ponter trail
if self.previous_position:
cv2.line(self.trail, self.previous_position, center,
(255, 255, 255), 2)
cv2.add(self.trail, frame, frame)
self.previous_position = center
def detect(self, frame):
hsv_img = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
# split the video frame into color channels
h, s, v = cv2.split(hsv_img)
self.channels['hue'] = h
self.channels['saturation'] = s
self.channels['value'] = v
# Threshold ranges of HSV components; storing the results in place
self.threshold_image("hue")
self.threshold_image("saturation")
self.threshold_image("value")
# Perform an AND on HSV components to identify the laser!
self.channels['laser'] = cv2.bitwise_and(
self.channels['hue'],
self.channels['value']
)
self.channels['laser'] = cv2.bitwise_and(
self.channels['saturation'],
self.channels['laser']
)
# Merge the HSV components back together.
hsv_image = cv2.merge([
self.channels['hue'],
self.channels['saturation'],
self.channels['value'],
])
self.track(frame, self.channels['laser'])
return hsv_image
def display(self, img, frame):
"""Display the combined image and (optionally) all other image channels
NOTE: default color space in OpenCV is BGR.
"""
cv2.imshow('RGB_VideoFrame', frame)
cv2.imshow('LaserPointer', self.channels['laser'])
if self.display_thresholds:
cv2.imshow('Thresholded_HSV_Image', img)
cv2.imshow('Hue', self.channels['hue'])
cv2.imshow('Saturation', self.channels['saturation'])
cv2.imshow('Value', self.channels['value'])
def setup_windows(self):
sys.stdout.write("Using OpenCV version: {0}\n".format(cv2.__version__))
# create output windows
self.create_and_position_window('LaserPointer', 0, 0)
self.create_and_position_window('RGB_VideoFrame',
10 + self.cam_width, 0)
if self.display_thresholds:
self.create_and_position_window('Thresholded_HSV_Image', 10, 10)
self.create_and_position_window('Hue', 20, 20)
self.create_and_position_window('Saturation', 30, 30)
self.create_and_position_window('Value', 40, 40)
def run(self):
# Set up window positions
self.setup_windows()
# Set up the camera capture
self.setup_camera_capture()
while True:
# 1. capture the current image
success, frame = self.capture.read()
if not success: # no image captured... end the processing
sys.stderr.write("Could not read camera frame. Quitting\n")
sys.exit(1)
hsv_image = self.detect(frame)
self.display(hsv_image, frame)
self.handle_quit()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Run the Laser Tracker')
parser.add_argument('-W', '--width',
default=640,
type=int,
help='Camera Width')
parser.add_argument('-H', '--height',
default=480,
type=int,
help='Camera Height')
parser.add_argument('-u', '--huemin',
default=20,
type=int,
help='Hue Minimum Threshold')
parser.add_argument('-U', '--huemax',
default=160,
type=int,
help='Hue Maximum Threshold')
parser.add_argument('-s', '--satmin',
default=100,
type=int,
help='Saturation Minimum Threshold')
parser.add_argument('-S', '--satmax',
default=255,
type=int,
help='Saturation Maximum Threshold')
parser.add_argument('-v', '--valmin',
default=200,
type=int,
help='Value Minimum Threshold')
parser.add_argument('-V', '--valmax',
default=255,
type=int,
help='Value Maximum Threshold')
parser.add_argument('-d', '--display',
action='store_true',
help='Display Threshold Windows')
params = parser.parse_args()
tracker = LaserTracker(
cam_width=params.width,
cam_height=params.height,
hue_min=params.huemin,
hue_max=params.huemax,
sat_min=params.satmin,
sat_max=params.satmax,
val_min=params.valmin,
val_max=params.valmax,
display_thresholds=params.display
)
tracker.run()
I'm by no means an expert on this, but taking a quick look at the code it seems that self.previous_position = (x,y).
Not sure why you would want to save to excel but to save to file just add f = open(filename, 'w') at the start of the run function and then f.write(self.previous_position) at the end of each loop.
Your file will contain all the recorded x and y coordinates recorded, if your set on saving this to excel check out python's out-the-box csv library.
I trying to put gradient on image - and that works.CPU and GPU programs should do the same. I have problem with output images because code for GPU giving me diffrent image than code for CPU and I don't know where is mistake. I think that CPU code it's fine but GPU not. Output images - orginal, cpu, gpu - Please check my code. Thanks.
import pyopencl as cl
import sys
import Image
import numpy
from time import time
def gpu_gradient():
if len(sys.argv) != 3:
print "USAGE: " + sys.argv[0] + " <inputImageFile> <outputImageFile>"
return 1
# create context and command queue
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
# load image
im = Image.open(sys.argv[1])
if im.mode != "RGBA":
im = im.convert("RGBA")
imgSize = im.size
buffer = im.tostring() # len(buffer) = imgSize[0] * imgSize[1] * 4
# Create ouput image object
clImageFormat = cl.ImageFormat(cl.channel_order.RGBA,
cl.channel_type.UNSIGNED_INT8)
input_image = cl.Image(ctx,
cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR,
clImageFormat,
imgSize,
None,
buffer)
output_image = cl.Image(ctx,
cl.mem_flags.WRITE_ONLY,
clImageFormat,
imgSize)
# load the kernel source code
kernelFile = open("gradient.cl", "r")
kernelSrc = kernelFile.read()
# Create OpenCL program
program = cl.Program(ctx, kernelSrc).build()
# Call the kernel directly
globalWorkSize = ( imgSize[0],imgSize[1] )
gpu_start_time = time()
program.gradientcover(queue,
globalWorkSize,
None,
input_image,
output_image)
# Read the output buffer back to the Host
buffer = numpy.zeros(imgSize[0] * imgSize[1] * 4, numpy.uint8)
origin = ( 0, 0, 0 )
region = ( imgSize[0], imgSize[1], 1 )
cl.enqueue_read_image(queue, output_image,
origin, region, buffer).wait()
# Save the image to disk
gsim = Image.fromstring("RGBA", imgSize, buffer.tostring())
gsim.save("GPU_"+sys.argv[2])
gpu_end_time = time()
print("GPU Time: {0} s".format(gpu_end_time - gpu_start_time))
def cpu_gradient():
if len(sys.argv) != 3:
print "USAGE: " + sys.argv[0] + " <inputImageFile> <outputImageFile>"
return 1
gpu_start_time = time()
im = Image.open(sys.argv[1])
if im.mode != "RGBA":
im = im.convert("RGBA")
pixels = im.load()
for i in range(im.size[0]):
for j in range(im.size[1]):
RGBA= pixels[i,j]
RGBA2=RGBA[0],RGBA[1],0,0
pixel=RGBA[0]+RGBA2[0],RGBA[1]+RGBA2[1],RGBA[2],RGBA[3]
final_pixels=list(pixel)
if final_pixels[0]>255:
final_pixels[0]=255
elif final_pixels[1]>255:
final_pixels[1]=255
pixel=tuple(final_pixels)
pixels[i,j]=pixel
im.save("CPU_"+sys.argv[2])
gpu_end_time = time()
print("CPU Time: {0} s".format(gpu_end_time - gpu_start_time))
cpu_gradient()
gpu_gradient()
Kernel code:
const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |
CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST;
__kernel void gradientcover(read_only image2d_t srcImg,
write_only image2d_t dstImg)
{
int2 coord = (int2) (get_global_id(0), get_global_id(1));
uint4 pixel = read_imageui(srcImg, sampler, coord);
uint4 pixel2 = (uint4)(coord.x, coord.y,0,0);
pixel=pixel + pixel2;
if(pixel.x > 255) pixel.x=255;
if(pixel.y > 255) pixel.y=255;
// Write the output value to image
write_imageui(dstImg, coord, pixel);
}
Your CL and Python code do not do the same thing!
RGBA= pixels[i,j]
RGBA2=RGBA[0],RGBA[1],0,0
pixel=RGBA[0]+RGBA2[0],RGBA[1]+RGBA2[1],RGBA[2],RGBA[3]
adds the RG component to the pixel.
uint4 pixel = read_imageui(srcImg, sampler, coord);
uint4 pixel2 = (uint4)(coord.x, coord.y,0,0);
pixel=pixel + pixel2;
adds the X, Y from the coordinates to the pixel.
It is highly likely that this is the cause of difference between your results.
Assuming (from the description) that you want to darkenlighten the image by coordinates, I'd sugest the python code should be:
RGBA= pixels[i,j]
RGBA2=i,j,0,0
instead.