I've gotten OpenCV working with Python and I can even detect a face through my webcam. What I really want to do though, is see movement and find the point in the middle of the blob of movement. The camshift sample is close to what I want, but I don't want to have to select which portion of the video to track. Bonus points for being able to predict the next frame.
Here's the code I have currently:
#!/usr/bin/env python
import cv
def is_rect_nonzero(r):
(_,_,w,h) = r
return (w > 0) and (h > 0)
class CamShiftDemo:
def __init__(self):
self.capture = cv.CaptureFromCAM(0)
cv.NamedWindow( "CamShiftDemo", 1 )
self.storage = cv.CreateMemStorage(0)
self.cascade = cv.Load("/usr/local/share/opencv/haarcascades/haarcascade_mcs_upperbody.xml")
self.last_rect = ((0, 0), (0, 0))
def run(self):
hist = cv.CreateHist([180], cv.CV_HIST_ARRAY, [(0,180)], 1 )
backproject_mode = False
i = 0
while True:
i = (i + 1) % 12
frame = cv.QueryFrame( self.capture )
if i == 0:
found = cv.HaarDetectObjects(frame, self.cascade, self.storage, 1.2, 2, 0, (20, 20))
for p in found:
# print p
self.last_rect = (p[0][0], p[0][1]), (p[0][2], p[0][3])
print self.last_rect
cv.Rectangle( frame, self.last_rect[0], self.last_rect[1], cv.CV_RGB(255,0,0), 3, cv.CV_AA, 0 )
cv.ShowImage( "CamShiftDemo", frame )
c = cv.WaitKey(7) % 0x100
if c == 27:
break
if __name__=="__main__":
demo = CamShiftDemo()
demo.run()
Found a solution at How do I track motion using OpenCV in Python?
Related
I am trying to read all the touching pixels with the same color in a image.
For that I use reccursive functions. When I check one pixel, I look on the right, left, top and bottom if the pixel close to it is the same color. If it is I add it to an array otherwise I don't.
The code is as follow:
vimport tkinter as tk
from PIL import Image
import sys
sys.setrecursionlimit(200000)
## WINDOWS
# to launch in debug mode
imgToDraw = Image.open('assets-test\\smile-face.png')
# to launch normaly
# imgToDraw = Image.open('..\\assets-test\\smile-face.png')
## LINUX
# imgToDraw = Image.open('../assets-test/smile-face.png')
imgPixels = imgToDraw.load()
imgWidth = imgToDraw.size[0]
imgHeight = imgToDraw.size[1]
# an element is a part of the image, it's a bunch of pixels with approximately the same color
# and each pixel touch at least one other pixel of the same element
elements = [];
isPixelChecked = [[ False for y in range( imgWidth ) ] for x in range( imgHeight )]
# min tolerable difference between two colors to consider them the same
# the higher the value is the more colors will be considered the same
COLOR_TOLERANCE = 10
reccursionCount = 0
class Element:
def __init__(self, color):
self.pixels = [];
self.color = color;
def addPixel(self, pixel):
self.pixels.append(pixel);
class Pixel:
def __init__(self, x, y, color):
self.x = x # x position of the pixel
self.y = y # y position of the pixel
self.color = color # color is a tuple (r,g,b)
def cutImageInElements():
global element
completeElement(element.pixels)
def completeElement(elemPixels):
global reccursionCount
global isPixelChecked
reccursionCount += 1
nbPixels = len(elemPixels);
xIndex = elemPixels[nbPixels - 1].x
yIndex = elemPixels[nbPixels - 1].y
xRightIdx = elemPixels[nbPixels - 1].x + 1
xLeftIdx = elemPixels[nbPixels - 1].x - 1
yBottomIdx = elemPixels[nbPixels - 1].y + 1
yTopIdx = elemPixels[nbPixels - 1].y - 1
isPixelChecked[xIndex][yIndex] = True
if((xRightIdx < imgWidth) and isPixelChecked[xRightIdx][yIndex] == False):
if(isColorAlmostSame(imgPixels[elemPixels[0].x, elemPixels[0].y], imgPixels[xRightIdx, yIndex])):
pixelAppended = Pixel(xRightIdx, yIndex, imgPixels[xRightIdx, yIndex])
elemPixels.append(pixelAppended)
completeElement(elemPixels)
if((xLeftIdx >= 0) and isPixelChecked[xLeftIdx][yIndex] == False):
if(isColorAlmostSame(imgPixels[elemPixels[0].x, elemPixels[0].y], imgPixels[xLeftIdx, yIndex])):
pixelAppended = Pixel(xLeftIdx, yIndex, imgPixels[xLeftIdx, yIndex])
elemPixels.append(pixelAppended)
completeElement(elemPixels)
if((yBottomIdx < imgHeight) and isPixelChecked[xIndex][yBottomIdx] == False):
if(isColorAlmostSame(imgPixels[elemPixels[0].x, elemPixels[0].y], imgPixels[xIndex, yBottomIdx])):
pixelAppended = Pixel(xIndex, yBottomIdx, imgPixels[xIndex, yBottomIdx])
elemPixels.append(pixelAppended)
completeElement(elemPixels)
if((yTopIdx >= 0) and isPixelChecked[xIndex][yTopIdx] == False):
if(isColorAlmostSame(imgPixels[elemPixels[0].x, elemPixels[0].y], imgPixels[xIndex, yTopIdx])):
pixelAppended = Pixel(xIndex, yTopIdx, imgPixels[xIndex, yTopIdx])
elemPixels.append(pixelAppended)
completeElement(elemPixels)
def isColorAlmostSame(pixel1, pixel2):
redDiff = abs(pixel1[0] - pixel2[0])
greenDiff = abs(pixel1[1] - pixel2[1])
blueDiff = abs(pixel1[2] - pixel2[2])
if(redDiff < COLOR_TOLERANCE and greenDiff < COLOR_TOLERANCE and blueDiff < COLOR_TOLERANCE):
return True
else:
return False
def printPixelsArr(pixelsArr):
for x in range(0, len(pixelsArr)):
print(pixelsArr[x].x, pixelsArr[x].y, pixelsArr[x].color)
if __name__ == '__main__':
pixel = Pixel(0, 0, imgPixels[0, 0]);
element = Element(pixel.color);
element.addPixel(pixel);
cutImageInElements();
print("NbReccursive call: ", reccursionCount)
This code works for small images of size 100x100 but crashes with an image of 400x400 with the error "terminated by signal SIGSEGV (Address boundary error)" when I launch the program on wsl2. When I run the program on cmd or powershell it just crashes but with no error code/msg.
I cannot understand why it would work with some size of images and not others. I can only think that the memory runs out or something but in the task manager the program uses almost no memory.
Not sure why that's failing, but that much recursion in Python isn't a great idea. I'd suggest reading about tail recursion that other languages use to make some recursive algorithms consume constant stack space. Note that your algorithm is not tail recursive, so this optimisation wouldn't help even if Python supported it.
I hacked together the following flood fill implementation. It uses Numpy so that it's only 10x slower than Pillow's ImageDraw.floodfill.
import numpy as np
def floodfill(im, row, col, threshold):
similar = np.mean(np.abs(im - im[row, col]), 2) < threshold
mask = np.zeros_like(similar)
mask[row, col] = 1
m2 = mask.copy()
while True:
m2[:,:] = mask
m2[1:,:] |= mask[:-1]
m2[:-1,:] |= mask[1:]
m2[:,1:] |= mask[:,:-1]
m2[:,:-1] |= mask[:,1:]
m2 &= similar
if np.all(m2 == mask):
return mask
mask[:,:] = m2
As an example of using this, you could do;
import requests
from io import BytesIO
res = requests.get("https://picsum.photos/300")
res.raise_for_status()
src = Image.open(BytesIO(res.content))
mask = floodfill(np.array(src, int), 10, 10, 40)
where the random image I got and the output mask are:
The problem is, program is really lengthy and computationally expensive.
so is there any way to make this program faster or any other way to write this code?
I am beginner in python and would love to take all suggestions or different approach then this program
also i am new to the stack overflow so if anything is wrong in this post or any issue in program please point out in comment .
first section of code is
#TEST V2.1 multitracker
import cv2
import numpy as np
#path = (input("enter the video path: "))
cap = cv2.VideoCapture(" YOUR VIDEO PATH ")
# creating the dictionary to add all the wanted trackers in OpenCV that can be used for tracking in future
OBJECT_TRACKING_MACHINE = {
"csrt": cv2.legacy.TrackerCSRT_create,
"kcf": cv2.legacy.TrackerKCF_create,
"boosting": cv2.legacy.TrackerBoosting_create,
"mil": cv2.legacy.TrackerMIL_create,
"tld": cv2.legacy.TrackerTLD_create,
"medianflow": cv2.legacy.TrackerMedianFlow_create,
"mosse": cv2.legacy.TrackerMOSSE_create
}
# Creating the MultiTracker variable object to store the
trackers = cv2.legacy.MultiTracker_create()
here I started the loop
while True:
frame = cap.read()[1]
#print("freame start",frame)
if frame is None:
print("error getting the video,please check the input")
break
frame = cv2.resize(frame,(1080,720))
Thresh = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
#Thresh = cv2.adaptiveThreshold(gray, 185, cv2.ADAPTIVE_THRESH_MEAN_C,cv2.THRESH_BINARY_INV, 11, 6)
#print(trackers.update(Thresh))
(success, boxes) = trackers.update(Thresh)
# loop over the bounding boxes and draw them on the frame
if success == False:
bound_boxes = trackers.getObjects()
idx = np.where(bound_boxes.sum(axis= 1) != 0)[0]
bound_boxes = bound_boxes[idx]
trackers = cv2.legacy.MultiTracker_create()
for bound_box in bound_boxes:
trackers.add(tracker,Thresh,bound_box)
x,y,w,h = cv2.boundingRect(Thresh)
k = cv2.waitKey(50)
And I am guessing this is the section which is making the program slow
if is there any different way to represent this part or any idea different then this
for i,box in enumerate(boxes):
(x, y, w, h) = [int(v) for v in box]
#cv2.rectangle(Thresh, (x, y), (x + w, y + h), (255, 255, 255), 2)
#cv2.putText(Thresh,('TRACKING BOX NO-'+str(i)),(x+10,y-3),cv2.FONT_HERSHEY_PLAIN,1.0,(255,255,0),2)
arr = boxes.astype(int)
if i == 0 :
Roi = Thresh[(arr[i,1]):(arr[i,1]+arr[i,3]),(arr[i,0]):(arr[i,0]+arr[i,2])]
murg = cv2.resize(Roi,(300,200))
cv2.imshow("horizon", murg)
#print(murg)
if i == 1 :
Roi1 = Thresh[(arr[i,1]):(arr[i,1]+arr[i,3]),(arr[i,0]):(arr[i,0]+arr[i,2])]
Roi = Thresh[(arr[(i-1),1]):(arr[(i-1),1]+arr[(i-1),3]),(arr[(i-1),0]):(arr[(i-1),0]+arr[(i-1),2])]
murg = cv2.resize(Roi,(300,200))
murg1 = cv2.resize(Roi1,(300,200))
hori = np.concatenate((murg,murg1),axis=1)
cv2.imshow("horizon",hori)
#print(hori)
elif i == 2 :
Roi2 = Thresh[(arr[i,1]):(arr[i,1]+arr[i,3]),(arr[i,0]):(arr[i,0]+arr[i,2])]
Roi1 = Thresh[(arr[(i-1),1]):(arr[(i-1),1]+arr[(i-1),3]),(arr[(i-1),0]):(arr[(i-1),0]+arr[(i-1),2])]
Roi = Thresh[(arr[(i-2),1]):(arr[(i-2),1]+arr[(i-2),3]),(arr[(i-2),0]):(arr[(i-2),0]+arr[(i-2),2])]
murg = cv2.resize(Roi,(300,200))
murg1 = cv2.resize(Roi1,(300,200))
murg2 = cv2.resize(Roi2,(300,200))
hori = np.concatenate((murg,murg1,murg2),axis=1)
cv2.imshow("horizon",hori)
#print(hori)
elif i == 3 :
Roi3 = Thresh[(arr[i,1]):(arr[i,1]+arr[i,3]),(arr[i,0]):(arr[i,0]+arr[i,2])]
Roi2 = Thresh[(arr[(i-1),1]):(arr[(i-1),1]+arr[(i-1),3]),(arr[(i-1),0]):(arr[(i-1),0]+arr[(i-1),2])]
Roi1 = Thresh[(arr[(i-2),1]):(arr[(i-2),1]+arr[(i-2),3]),(arr[(i-2),0]):(arr[(i-2),0]+arr[(i-2),2])]
Roi = Thresh[(arr[(i-3),1]):(arr[(i-3),1]+arr[(i-3),3]),(arr[(i-3),0]):(arr[(i-3),0]+arr[(i-3),2])]
murg = cv2.resize(Roi,(300,200))
murg1 = cv2.resize(Roi1,(300,200))
murg2 = cv2.resize(Roi2,(300,200))
murg3 = cv2.resize(Roi3,(300,200))
hori = np.concatenate((murg,murg1,murg2,murg3),axis=1)
cv2.imshow("horizon",hori)
#print(hori)
this section is so that I can print the ROI matrix and to select the ROI
if k == ord("1"):
print(murg)
if k == ord("2"):
print(murg1)
if k == ord ("3"):
print(murg2)
if k == ord("4"):
print(murg3)
cv2.imshow('Frame', Thresh)
if k == ord("e"):
break
if k == ord("s"):
roi = cv2.selectROI("Frame", Thresh, fromCenter=False,showCrosshair=False)
tracker = OBJECT_TRACKING_MACHINE['mosse']()
trackers.add(tracker, Thresh, roi)
#print(boxes,success)
cap.release()
cv2.destroyAllWindows()
when you will run this code you can extract 4 ROI frames which will track your ROI's (I haven't added the precaution for empty matrix so it will give you error if you select more than 4 roi's)
my end goal is to extract those ROI videos for Image processing (this code is not done yet and there's more image processing is going to happen in letter part) **
I am trying to write and save the video of my object detection. I have referred and tried based on the posts posted here in Stackoverflow. However, none of them is working.
My specifications:
Python 3.7.9
OpenCV 4.5.1
The code is as below
import cv2
from main import *
import imutils
import math
darknetmain = darknet_main()
darknetmain.setGPU(is_GPU=True)
saveVid ="./data/test_sampah_tasik.mp4"
video = cv2.VideoCapture(0)
width = 270
height = 480
size = (width,height)
result = cv2.VideoWriter('21_output.mpeg',cv2.VideoWriter_fourcc(*'MPJG'),20,size)
# initialize the known distance from the camera to the object, which
# in this case is 24 inches
KNOWN_DISTANCE = 12.0
# initialize the known object width, which in this case, the piece of
# paper is 11.81 inches wide
KNOWN_WIDTH = 12.0
if video.isOpened():
while(True):
res, cv_img = video.read()
if res==True:
k = cv_img[90:360, 90:570]
imcaptions, boundingBoxs = darknetmain.performDetect(k)
if len(imcaptions)>0:
for i in range(len(imcaptions)):
k = cv2.rectangle(k, boundingBoxs[i][0], boundingBoxs[i][2], (0, 255, 0), 2)
k = cv2.putText(k, imcaptions[i], boundingBoxs[i][0], cv2.FONT_HERSHEY_SIMPLEX, 1,
(0, 0, 255))
startPoint = (180,570)
dividePoint = [x for x in boundingBoxs[i][2]]
dividePoint[0] = dividePoint[0] - 50
dividePoint[1] = dividePoint[1] - 50
if dividePoint[0] > 160:
print("Right")
elif dividePoint[0] <= 160:
print("Left")
centerPoint= (int(dividePoint[0]),int(dividePoint[1]))
k = cv2.line(k, startPoint, centerPoint, (0, 255, 0), 2)
result.write(k)
cv2.imshow("result", k)
else:
#print("no result")
result.write(k)
cv2.imshow("result", k)
key = cv2.waitKey(1)
if key==27 or 0xFF == ord('q'):
break
else:
break
else:
print("Cannot read the video file")
The error that I received is
OpenCV: FFMPEG: tag 0x4745504d/'MPEG' is not supported with codec id 2 and format 'mpeg / MPEG-1 Systems / MPEG program stream'
The Code:
I am attempting to construct a python script that takes the RGB values of an image, along with the contours width and height into a neural network to determine if the little rc car I have constructed should go forward or turn right.
Code:
import cv2
cam = cv2.VideoCapture(2)
class Pic_Capture:
def __init__(self):
# print("Fuck you")
self.i = 0
def Img_Mods(self, cam):
self.ret, self.frame = cam.read()
self.buf1 = cv2.flip(self.frame, 1)
# self.buf2 = cv2.cvtColor(self.buf1, cv2.COLOR_BGR2RGB)
self.buf3 = cv2.GaussianBlur(self.buf1,(5,5),0)
cv2.imshow('buf2', self.buf3)
def Measurement_Bullshit(self):
self.buf4 = cv2.cvtColor(self.buf3, cv2.COLOR_RGB2GRAY)
self.buf5 = cv2.adaptiveThreshold(self.buf4,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY,11,2)
_, contours, hierarchy = cv2.findContours(self.buf5, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
for cnt in contours:
M = cv2.moments(cnt)
self.cx = int(M['m10']/M['m00'])
self.cy = int(M['m01']/M['m00'])
print(self.cx, self.cy)
cv2.imshow('buf4', self.buf4)
running = True
i = 0
Camera = Pic_Capture()
while running:
Camera.Img_Mods(cam)
if i % 30 == 0 and i != 0:
Camera.Off_and_On_Again()
Camera.Measurement_Bullshit()
i += 1
print(i)
if cv2.waitKey(1) == 27:
Camera.Stop_Code()
Question 1:
I consistently receive this within my console and I would like to get to rid of it for debugging later down the line.
Corrupt JPEG data: 4 extraneous bytes before marker 0xd5
Every time this pops up it is at a different marker, so I do not know how to not print that to the console.
Question 2:
After the code within the if statement: if i % 10 == 0 and i != 0: runs the code spits back this error.
File "Nueral_Network_Creation.py", line 23, in Measurement_Bullshit self.cx = int(M['m10']/M['m00']) ZeroDivisionError: float division by zero
I understand that it is most likely that the code is not finding enough contours to be run properly or maybe it is something completely different. As always any help is much appreciated.
I've been struggling to come up with a script that allows me to take screenshots of my desktop more than once per every second. I'm using Win10.
PIL:
from PIL import ImageGrab
import time
while True:
im = ImageGrab.grab()
fname = "dropfolder/%s.png" %int(time.time())
im.save(fname,'PNG')
Results 1.01 seconds per image.
PyScreeze (https://github.com/asweigart/pyscreeze):
import pyscreeze
import time
while True:
fname = "dropfolder/%s.png" %int(time.time())
x = pyscreeze.screenshot(fname)
Results 1.00 seconds per image.
Win32:
import win32gui
import win32ui
import win32con
import time
w=1920 #res
h=1080 #res
while True:
wDC = win32gui.GetWindowDC(0)
dcObj=win32ui.CreateDCFromHandle(wDC)
cDC=dcObj.CreateCompatibleDC()
dataBitMap = win32ui.CreateBitmap()
dataBitMap.CreateCompatibleBitmap(dcObj, w, h)
cDC.SelectObject(dataBitMap)
cDC.BitBlt((0,0),(w, h) , dcObj, (0,0), win32con.SRCCOPY)
fname = "dropfolder/%s.png" %int(time.time())
dataBitMap.SaveBitmapFile(cDC, fname)
dcObj.DeleteDC()
cDC.DeleteDC()
win32gui.ReleaseDC(0, wDC)
win32gui.DeleteObject(dataBitMap.GetHandle())
Results 1.01 seconds per image.
Then I stumbled into thread (Fastest way to take a screenshot with python on windows) where it was suggested that gtk would yield phenomenal results.
However using gtk:
import gtk
import time
img_width = gtk.gdk.screen_width()
img_height = gtk.gdk.screen_height()
while True:
screengrab = gtk.gdk.Pixbuf(
gtk.gdk.COLORSPACE_RGB,
False,
8,
img_width,
img_height
)
fname = "dropfolder/%s.png" %int(time.time())
screengrab.get_from_drawable(
gtk.gdk.get_default_root_window(),
gtk.gdk.colormap_get_system(),
0, 0, 0, 0,
img_width,
img_height
).save(fname, 'png')
Results 2.34 seconds per image.
It seems to me like I'm doing something wrong, because people have been getting great results with gtk.
Any advices how to speed up the process?
Thanks!
Your first solution should be giving you more than one picture per second. The problem though is that you will be overwriting any pictures that occur within the same second, i.e. they will all have the same filename. To get around this you could create filenames that include 10ths of a second as follows:
from PIL import ImageGrab
from datetime import datetime
while True:
im = ImageGrab.grab()
dt = datetime.now()
fname = "pic_{}.{}.png".format(dt.strftime("%H%M_%S"), dt.microsecond // 100000)
im.save(fname, 'png')
On my machine, this gave the following output:
pic_1143_24.5.png
pic_1143_24.9.png
pic_1143_25.3.png
pic_1143_25.7.png
pic_1143_26.0.png
pic_1143_26.4.png
pic_1143_26.8.png
pic_1143_27.2.png
In case anyone cares in 2022: You can try my newly created project DXcam: I think for raw speed it's the fastest out there (in python, and without going too deep into the rabbit hole). It's originally created for a deep learning pipeline for FPS games where the higher FPS you get the better. Plus I (am trying to) design it to be user-friendly:
For a screenshot just do
import dxcam
camera = dxcam.create()
frame = camera.grab() # full screen
frame = camera.grab(region=(left, top, right, bottom)) # region
For screen capturing:
camera.start(target_fps=60) # threaded
for i in range(1000):
image = camera.get_latest_frame() # Will block until new frame available
camera.stop()
I copied the part of the benchmarks section from the readme:
DXcam
python-mss
D3DShot
Average FPS
238.79
75.87
118.36
Std Dev
1.25
0.5447
0.3224
The benchmarks is conducted through 5 trials on my 240hz monitor with a constant 240hz rendering rate synced w/the monitor (using blurbuster ufo test).
You can read more about the details here: https://github.com/ra1nty/DXcam
This solution uses d3dshot.
def d3dgrab(rect=(0, 0, 0, 0), spath=r".\\pictures\\cache\\", sname="", title=""):
""" take a screenshot by rect. """
sname = sname if sname else time.strftime("%Y%m%d%H%M%S000.jpg", time.localtime())
while os.path.isfile("%s%s" % (spath, sname)):
sname = "%s%03d%s" % (sname[:-7], int(sname[-7:-4]) + 1, sname[-4:])
xlen = win32api.GetSystemMetrics(win32con.SM_CXSCREEN)
ylen = win32api.GetSystemMetrics(win32con.SM_CYSCREEN)
assert 0 <= rect[0] <= xlen and 0 <= rect[2] <= xlen, ValueError("Illegal value of X coordination in rect: %s" % rect)
assert 0 <= rect[1] <= ylen and 0 <= rect[3] <= ylen, ValueError("Illegal value of Y coordinatoin in rect: %s" % rect)
if title:
hdl = win32gui.FindWindow(None, title)
if hdl != win32gui.GetForegroundWindow():
win32gui.SetForegroundWindow(hdl)
rect = win32gui.GetWindowRect(hdl)
elif not sum(rect):
rect = (0, 0, xlen, ylen)
d = d3dshot.create(capture_output="numpy")
return d.screenshot_to_disk(directory=spath, file_name=sname, region=rect)
I think it can be helped
sname = sname if sname else time.strftime("%Y%m%d%H%M%S000.jpg", time.localtime())
while os.path.isfile("%s%s" % (spath, sname)):
sname = "%s%03d%s" % (sname[:-7], int(sname[-7:-4]) + 1, sname[-4:])
And it's fastest way to take screenshot I found.