Lucas Kanade Optical Flow Implementation not working properly - python

I was working on my own optical flow script using lucas kanade method on python and numpy. But I get really different flow results with the opencv implementation of that algorithm (This is testing video), than with my own.
This is my full code :
import cv2
import numpy as np
def drawlines (image, pts0, pts1, color):
for pt0, pt1 in zip(pts0, pts1):
cv2.line(image,(pt0[0],pt0[1]),(pt1[0],pt1[1]),color,thickness=2);
def calcFlowLib (prev, curr, points):
#OPTICAL FLOW USING OPENCV IMPLEMENTATION
currpts = np.reshape(points,(-1,1,2));
lk_params = dict(winSize=(41,41), maxLevel=0, criteria=(cv2.TERM_CRITERIA_EPS|cv2.TERM_CRITERIA_COUNT,10,0.03));
nextpts, stat, err = cv2.calcOpticalFlowPyrLK(prev,curr,currpts,None,**lk_params);
nextpts = np.reshape(nextpts,(-1,2));
return nextpts;
def calcFlow (im1, im2, points, halfwin=(20,20)):
#MY OWN IMPLEMENTATION
pts = np.copy(points)
npts = points.shape[0]
i_x = np.zeros(im1.shape)
i_y = np.zeros(im1.shape)
i_t = np.zeros(im1.shape)
i_x[1:-1, 1:-1] = (im1[1:-1, 2:] - im1[1:-1, :-2]) / 2
i_y[1:-1, 1:-1] = (im1[2:, 1:-1] - im1[:-2, 1:-1]) / 2
i_t[1:-1, 1:-1] = im2[1:-1, 1:-1] - im1[1:-1, 1:-1]
for i in xrange(0,npts,1):
p = np.reshape(pts[i],(2,1))
ix = i_x[p[1]-halfwin[1]:p[1]+halfwin[1]+1,p[0]-halfwin[0]:p[0]+halfwin[0]+1]
iy = i_y[p[1]-halfwin[1]:p[1]+halfwin[1]+1,p[0]-halfwin[0]:p[0]+halfwin[0]+1]
it = i_t[p[1]-halfwin[1]:p[1]+halfwin[1]+1,p[0]-halfwin[0]:p[0]+halfwin[0]+1]
ixx = ix*ix
iyy = iy*iy
ixy = ix*iy
ixt = ix*it
iyt = iy*it
G = np.array([[np.sum(ixx),np.sum(ixy)],[np.sum(ixy),np.sum(iyy)]],dtype=np.float32)
B = np.array([[np.sum(ixy)],[np.sum(iyt)]],dtype=np.float32)
vel = np.linalg.lstsq(G,B)[0]
pts[i] = pts[i] + np.ravel(vel)
return pts
cap = cv2.VideoCapture("OF.mp4")
prevgray = None
currgray = None
prevpts0 = np.array([[320,180],[100,100]],dtype=np.float32)
prevpts1 = np.array([[320,180],[100,100]],dtype=np.float32)
while(cap.isOpened()):
ret, frame = cap.read()
if not ret:
break
frame = cv2.resize(frame,(640,360))
currgray = cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY)
if(prevgray is not None):
currpts0 = calcFlow(prevgray,currgray,prevpts0)
drawlines(frame,prevpts0,currpts0,(0,255,0))
currpts1 = calcFlowLib(prevgray,currgray,prevpts1)
drawlines(frame,prevpts1,currpts1,(0,0,255))
prevpts0 = np.copy(currpts0)
prevpts1 = np.copy(currpts1)
prevgray = np.copy(currgray)
cv2.imshow("Frame",frame)
if cv2.waitKey(1) & 0xFF == ord('1'):
break
cap.release()
cv2.destroyAllWindows()
Where is the wrong part of my implementation?

Related

I have been facing a traceback error while running a cvzone module in python (cvzone hand detection project (asl))

given below is the code that is available on the website as well as on the tutorial videohttps://www.youtube.com/watch?v=wa2ARoUUdU8&t=118s
import cv2
from cvzone.HandTrackingModule import HandDetector
import numpy as np
import math
import time
cap = cv2.VideoCapture(0)
detector = HandDetector(maxHands=1)
offset = 20
imgSize = 300
folder = "Data/C"
counter = 0
while True:
success, img = cap.read()
hands, img = detector.findHands(img)
if hands:
hand = hands[0]
x, y, w, h = hand['bbox']
imgWhite = np.ones((imgSize, imgSize, 3), np.uint8) * 255
imgCrop = img[y - offset:y + h + offset, x - offset:x + w + offset]
imgCropShape = imgCrop.shape
aspectRatio = h / w
if aspectRatio > 1:
k = imgSize / h
wCal = math.ceil(k * w)
imgResize = cv2.resize(imgCrop, (wCal, imgSize))
imgResizeShape = imgResize.shape
wGap = math.ceil((imgSize - wCal) / 2)
imgWhite[:, wGap:wCal + wGap] = imgResize
else:
k = imgSize / w
hCal = math.ceil(k * h)
imgResize = cv2.resize(imgCrop, (imgSize, hCal))
imgResizeShape = imgResize.shape
hGap = math.ceil((imgSize - hCal) / 2)
imgWhite[hGap:hCal + hGap, :] = imgResize
cv2.imshow("ImageCrop", imgCrop)
cv2.imshow("ImageWhite", imgWhite)
cv2.imshow("Image", img)
key = cv2.waitKey(1)
if key == ord("s"):
counter += 1
cv2.imwrite(f'{folder}/Image_{time.time()}.jpg', imgWhite)
print(counter)
there is a traceback error while running this code(probably problem in importing cvzone module)(screenshots attached)screenshot of error
I tried reinstalling the packages with the same version, used in the video.
How do I deal with this problem?

Stereo camera structured light disparity problem

i want to build 3d scanner with stereo camera and LED Projector. I calibrated the stereo camera and created the mechanical system. I project gray code and capture them with two camera. I decoded the images and take correspondence image. But i can't triangulate the decoded points. This is my reference project for generate pattern and decode them.
Reference Github project
And this is my code
import numpy as np
import cv2
import structuredlight as sl
def generate_rectify_data(calib_data, size):
XML_File = cv2.FileStorage(calib_data, cv2.FILE_STORAGE_READ)
M1 = XML_File.getNode('Camera_Matrix_Left').mat()
M2 = XML_File.getNode('Camera_Matrix_Right').mat()
d1 = XML_File.getNode('Camera_Distortion_Left').mat()
d2 = XML_File.getNode('Camera_Distortion_Right').mat()
R = XML_File.getNode('Rotation_Matrix').mat()
t = XML_File.getNode('Translation_Matrix').mat()
flag = cv2.CALIB_ZERO_DISPARITY
R1, R2, P1, P2, Q = cv2.stereoRectify(cameraMatrix1=M1, cameraMatrix2=M2,
distCoeffs1=d1, distCoeffs2=d2, R=R, T=t, flags=flag, alpha=-1, imageSize=size,
newImageSize=size)[0:5]
map_x_l, map_y_l = cv2.initUndistortRectifyMap(M1, d1, R1, P1, size, cv2.CV_32FC1)
map_x_r, map_y_r = cv2.initUndistortRectifyMap(M2, d2, R2, P2, size, cv2.CV_32FC1)
return map_x_l, map_y_l, map_x_r, map_y_r, P1, P2,Q
def rectify(img, map_x, map_y):
res = cv2.remap(img, map_x, map_y, cv2.INTER_LINEAR, cv2.BORDER_CONSTANT)
return res
"""
***I generate pattern like this and capture them in another python script***
W = 240
H = 240
gray = sl.Gray()
imlist_posi_x_pat = gray.generate((W, H))
imlist_posi_y_pat = sl.transpose(gray.generate((H, W)))
"""
if __name__ == '__main__':
img_size = (1648, 1232)
map_x_l, map_y_l, map_x_r, map_y_r, P1, P2, Q = generate_rectify_data(
"C:/Users/XXX/PycharmProjects/Stereo_Structured_Light/Calibration_Data"
"/Calibration_Parameters_1.xml", size=(1648, 1232))
rect_list_l, rect_list_r = [], []
imlist_posi_x_cap_R = []
imlist_posi_y_cap_R = []
imlist_posi_x_cap_L = []
imlist_posi_y_cap_L = []
for i in range(0,16):
img_l = cv2.imread("C:/OxO_Scan/Images_1/Left_cam3/L_" + str(i) + ".png", 0)
img_r = cv2.imread("C:/OxO_Scan/Images_1/Right_cam3/R_" + str(i) + ".png", 0)
l_rect = rectify(img_l, map_x_l, map_y_l)
r_rect = rectify(img_r, map_x_r, map_y_r)
if i < 8: # 8 for the horizontal, 8 for the vertical pattern images
imlist_posi_x_cap_R.append(r_rect)
imlist_posi_x_cap_L.append(l_rect)
else:
imlist_posi_y_cap_R.append(r_rect)
imlist_posi_y_cap_L.append(l_rect)
W = 240
H = 240
gray = sl.Gray()
img_index_x_R = gray.decode(imlist_posi_x_cap_R, thresh=40)
img_index_x_L = gray.decode(imlist_posi_x_cap_L, thresh=40)
img_index_y_R = gray.decode(imlist_posi_y_cap_R, thresh=40)
img_index_y_L = gray.decode(imlist_posi_y_cap_L, thresh=40)
img_correspondence_x_r = cv2.merge([0.0 * np.zeros_like(img_index_x_R),
img_index_x_R / W, img_index_y_R / H])
img_correspondence_r = np.clip(img_correspondence_x_r * 255.0, 0,
255).astype(np.uint8)
img_correspondence_y_l = cv2.merge([0.0 * np.zeros_like(img_index_x_L),
img_index_x_L / W, img_index_y_L / H])
img_correspondence_l = np.clip(img_correspondence_y_l * 255.0, 0,
255).astype(np.uint8)
####################################
cv2.imshow("a", cv2.resize(img_correspondence_l, (640, 480)))
cv2.imshow("b", cv2.resize(img_correspondence_r, (640, 480)))
cv2.waitKey()
cv2.destroyAllWindows()
img_correspondence_L_2 = np.copy(img_correspondence_l)
img_correspondence_R_2 = np.copy(img_correspondence_r)
cam_pts_l, cam_pts_r = [], []
cam_pts_l2, cam_pts_r2 = [], []
for i in range(img_correspondence_l.shape[0]):
for j in range(img_correspondence_l.shape[1]):
if (img_correspondence_l[i][j] != 0).any():
qwert = img_index_x_L[i][j]
cam_pts_l.append([j, i])
cam_pts_r.append([j+qwert, i])
cam_pts_l = np.array(cam_pts_l, dtype=np.float32)
cam_pts_r = np.array(cam_pts_r, dtype=np.float32)
cam_pts_l = np.array(cam_pts_l)[:, np.newaxis, :]
cam_pts_r = np.array(cam_pts_r)[:, np.newaxis, :]
pts4D = cv2.triangulatePoints(P1, P2, np.float32(cam_pts_l),np.float32(cam_pts_r)).T
pts3D = (pts4D[:, :3] / pts4D[:, -1:])
I don't know what to do in code for loop "for i in range(img_correspondence_l.shape[0]):" For example, I cannot find where a point that I found using left correspondence image in the left camera image corresponds to in the right camera image. j+img_index_x_L[i][j] does not give correct results. What should i do here
Thanks for your answers

Image threshold algorithms to use on an x-ray image and detect bones

I have a little project with OpenCV (python) where one of my steps is to take an x-ray image from the human body and convert it to a binary image where white pixels represent where some bone is present and black means there is no bone there.
Since sometimes "bone parts" can be darker than "non-bone parts" from another region, simple thresholding won't work. I also tried adaptive threshold and I couldn't see much difference.
I came up with a simple algorithm that applies a simple threshold for each row.
Here is the code:
def threshhold(image, val):
image = image.copy()
for row_idx in range(image.shape[0]):
max_row = image[row_idx].max()
min_row = image[row_idx].min()
tresh = np.median(image[row_idx]) + (val * (max_row - min_row))
# Or use np.mean instead of np.median
_, tresh = cv2.threshold(image[row_idx], tresh, 255, cv2.THRESH_BINARY)
image[row_idx] = tresh.ravel()
return image
And here is the code that does the same work but column-by-column instead of row-by-row:
def threshhold2(image, val):
image = image.copy()
for row_idx in range(image.shape[1]):
max_row = image[:, row_idx].max()
min_row = image[:, row_idx].min()
tresh = np.median(image[:, row_idx]) + (val * (max_row - min_row))
# Or use np.mean instead of np.median
_, tresh = cv2.threshold(image[:, row_idx], tresh, 255, cv2.THRESH_BINARY)
image[:, row_idx] = tresh.ravel()
return image
This method works pretty well with images like this:
Not quite well for this one but it is not that bad:
Very terrible:
Only the left half looks good
...
As you can see; this algorithm works well only for some images.
I will be glad to see more experienced people's ideas.
Images are not for me by the way.
Entire source code:
import os
import cv2
import numpy as np
files_to_see = os.listdir("data_set")
files_to_see.sort()
current_file = 0
print(files_to_see)
def slice(image, size):
out = []
x_count = image.shape[1] // size
y_count = image.shape[0] // size
for y_idx in range(y_count):
for x_idx in range(x_count):
out.append(
(
(y_idx, x_idx),
image[y_idx * size: (y_idx + 1) * size,
x_idx * size: (x_idx + 1) * size]
)
)
return y_count, x_count, out
def normalize(image):
image = image.copy()
min_pix = image.min()
max_pix = image.max()
for y in range(image.shape[0]):
for x in range(image.shape[1]):
val = image[y, x]
val -= min_pix
val *= 255 / (max_pix - min_pix)
image[y, x] = round(val)
# image -= min_pix
# image *= round(255 / (max_pix - min_pix))
return image
def threshhold(image, val, method):
image = image.copy()
for row_idx in range(image.shape[0]):
max_row = image[row_idx].max()
min_row = image[row_idx].min()
# tresh = np.median(image[row_idx]) + (val * (max_row - min_row))
tresh = method(image[row_idx]) + (val * (max_row - min_row))
_, tresh = cv2.threshold(image[row_idx], tresh, 255, cv2.THRESH_BINARY)
image[row_idx] = tresh.ravel()
return image
def threshhold2(image, val, method):
image = image.copy()
for row_idx in range(image.shape[1]):
max_row = image[:, row_idx].max()
min_row = image[:, row_idx].min()
tresh = method(image[:, row_idx]) + (val * (max_row - min_row))
_, tresh = cv2.threshold(image[:, row_idx], tresh, 255, cv2.THRESH_BINARY)
image[:, row_idx] = tresh.ravel()
return image
def recalculate_threshhold(v):
global original_current_image, thresh_current_image, y_c, x_c, slices
method = np.mean
if cv2.getTrackbarPos("method", "xb labeler") == 0:
method = np.median
thresh_current_image = threshhold2(original_current_image, cv2.getTrackbarPos("threshhold_value", "xb labeler") / 1000, method)
y_c, x_c, slices = slice(thresh_current_image, 128)
def thresh_current_image_mouse_event(event, x, y, flags, param):
if event == 1:
print(x // 128, y // 128)
cv2.imshow("slice", slices[(x // 128) + (y // 128) * x_c][1])
cv2.namedWindow("xb labeler")
cv2.createTrackbar("threshhold_value", "xb labeler", 0, 1000, recalculate_threshhold)
cv2.createTrackbar("method", "xb labeler", 0, 1, recalculate_threshhold)
cv2.namedWindow("thresh_current_image")
cv2.setMouseCallback("thresh_current_image", thresh_current_image_mouse_event)
def init():
global original_current_image, thresh_current_image, x_c, y_c, slices, files_to_see, current_file
original_current_image = cv2.imread("data_set/" + files_to_see[current_file], cv2.CV_8UC1)
original_current_image = cv2.resize(original_current_image, (512, 512))
original_current_image = normalize(original_current_image)
original_current_image = cv2.GaussianBlur(original_current_image, (5, 5), 10)
recalculate_threshhold(1)
y_c, x_c, slices = slice(thresh_current_image, 128)
init()
while True:
cv2.imshow("thresh_current_image", thresh_current_image)
cv2.imshow("xb labeler", original_current_image)
k = cv2.waitKey(1)
if k == ord('p'):
cv2.imwrite("ssq.png", thresh_current_image)
current_file += 1
init()
cv2.destroyAllWindows()
EDIT: Added original images:

Is it possible to run two YOLO (yolov4) object detection models in a single application?

I understand that it would be much simpler to just train YOLOv4 accordingly but:
I have limited computational resources and was hoping I could combine pre-trained models I found online, saving my time, or train a model and combine it with other models available online.
If I have one '.weights' file of a custom object detector that detects traffic signs and another '.weights' file of detector that detect pedestrians. Is there a way to combine these models, so that when run on a video/image (or in real-time capture), it detects pedestrians and traffic signs simultaneously.
By combining I mean, either to edit the 'weights' file somehow to achieve this, or editing the python code (while running the detector) that gets this done. (or any other way)
If not possible - is there any way to make them run in a sequence, efficiently?
Yes, It is possible to do that. Create two different python files and load the model in them. Then create a new file and initialise both of them in the new file. Then take the continous video / image feed as an input, the file will give output of detected traffic signs. Use this output as an input to your pedestrian problem.
import numpy as np
import pandas as pd
import cv2
import time
margin = 20
import detection_cls
run = detection_cls.Ppe_Detection_1()
class Ppe_Detection():
def __init__(self):
self.weightfile = 'yolov4_pretrained.weights'
self.cfgfile = 'cfg/yolov4_pretrained.cfg'
self.PpeNet = cv2.dnn.readNet(self.weightfile,self.cfgfile)
self.classes = self.get_classes()
layer_names = self.PpeNet.getLayerNames()
self.output_layers = [layer_names[i - 1] for i in self.PpeNet.getUnconnectedOutLayers()]
# = [self.PpeNet.getLayerNames()[(i[0] - 1)] for i in self.PpeNet.getUnconnectedOutLayers()]
def get_classes(self):
# self.classes = []
with open("coco.names","r") as f:
self.classes_val = [line.strip() for line in f.readlines()]
return self.classes_val
def detection(self,img):
start = time.perf_counter()
height,width,channels = img.shape
# Detecting objects
blob = cv2.dnn.blobFromImage(img,0.00392,(416,416),(0,0,0),True,crop=False)
self.PpeNet.setInput(blob)
outs = self.PpeNet.forward(self.output_layers)
time_took = time.perf_counter() - start
fps = str(int(1/time_took))
# getting the list
class_ids = []
confidences = []
boxes = []
for out in outs:
for detection in out:
# print(detection)
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.4:
# object detected
center_x = int(detection[0]*width)
center_y = int(detection[1]*height)
w = int(detection[2]*width)
h = int(detection[3]*height)
# Rectangle Coordinates
x = int(center_x-w/2)
y = int(center_y-h/2)
boxes.append([x,y,w,h])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes,confidences,0.4,0.4)
info = []
if len(indexes) > 0:
for i in indexes.flatten():
x,y = boxes[i][0], boxes[i][1]
w,h = boxes[i][2], boxes[i][3]
conf = confidences[i]
if x<0:
x = 0
if y < 0:
y = 0
type = '{}'.format(self.classes[class_ids[i]])
info.append([x,y,w,h,type,conf])
new_frame_time = time.time()
font = cv2.FONT_HERSHEY_PLAIN
for i in range(len(boxes)):
for i in indexes:
x,y,w,h = boxes[i]
label = str(self.classes[class_ids[i]])
color = (0,0,145)
# cv2.rectangle(img,(x,y),(x+w,y+h),color,2)
cv2.putText(img,fps,(10,30),font,3,color,3)
cv2.putText(img,label,(x,y+30),font,3,color,3)
# print(boxes,confidences,class_ids)
# print("opened")
# print(info)
return info
def cropping_detection(self):
count = 0
cropped_frame = []
value_img = []
cap = cv2.VideoCapture(0)
while cap.isOpened():
r,f = cap.read()
# counts +=1
detection = ppe.detection(f)
try:
x,y,w,h,cls,conf = detection[0]
except Exception as e:
pass
# for i in range(len(detection)):
# print(detection[i])
try:
if cls=='person':
print("Person detected")
cropped_img = f[y-margin:y+h+margin,x-margin:x+w+margin]
# print(cropped_img)
# value = ppe.detection(cropped_img)
final = run.detection(cropped_img)
print('final',final)
# print(value)
# print(cropped_img)
# value_img.append(value)
# cropped_frame.append(cropped_img)
# cv2.imwrite("data/frame%d.jpg" % count, cropped_img)
cv2.imshow("Hi",cropped_img)
if cv2.waitKey(1) & 0xff==ord('q'):
break
# count += 1
else:
pass
except Exception as e:
print("In except part")
# cap.release()
# cv2.destroyAllWindows()
# print(cropped_img)
return value,cropped_img
ppe = Ppe_Detection()
ppe.cropping_detection()
# ppe.get_classes()
# ppe.run_video()
# take_photo()
# inference_image()
The function that I've imported here can be found below
import numpy as np
import pandas as pd
import cv2
import time
class Ppe_Detection():
def __init__(self):
self.weightfile = 'backup/yolov3_best.weights'
self.cfgfile = 'yolov3.cfg'
self.PpeNet = cv2.dnn.readNet(self.weightfile,self.cfgfile)
self.classes = self.get_classes()
layer_names = self.PpeNet.getLayerNames()
self.output_layers = [layer_names[i - 1] for i in self.PpeNet.getUnconnectedOutLayers()]
# = [self.PpeNet.getLayerNames()[(i[0] - 1)] for i in self.PpeNet.getUnconnectedOutLayers()]
def get_classes(self):
# self.classes = []
with open("obj.names","r") as f:
self.classes_val = [line.strip() for line in f.readlines()]
return self.classes_val
def print_all(self):
print(self.classes)
def detection(self,img):
start = time.perf_counter()
height,width,channels = img.shape
# Detecting objects
blob = cv2.dnn.blobFromImage(img,0.00392,(416,416),(0,0,0),True,crop=False)
self.PpeNet.setInput(blob)
outs = self.PpeNet.forward(self.output_layers)
time_took = time.perf_counter() - start
fps = str(int(1/time_took))
# getting the list
class_ids = []
confidences = []
boxes = []
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.4:
# object detected
center_x = int(detection[0]*width)
center_y = int(detection[1]*width)
w = int(detection[2]*width)
h = int(detection[3]*height)
# Rectangle Coordinates
x = int(center_x-w/2)
y = int(center_y-h/2)
boxes.append([x,y,w,h])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes,confidences,0.4,0.4)
info = []
if len(indexes) > 0:
for i in indexes.flatten():
x,y = boxes[i][0], boxes[i][1]
w,h = boxes[i][2], boxes[i][3]
conf = confidences[i]
if x<0:
x = 0
if y < 0:
y = 0
type = '{}'.format(self.classes[class_ids[i]])
info.append([x,y,w,h,type,conf])
new_frame_time = time.time()
font = cv2.FONT_HERSHEY_PLAIN
for i in range(len(boxes)):
for i in indexes:
x,y,w,h = boxes[i]
label = str(self.classes[class_ids[i]])
color = (0,255,145)
cv2.rectangle(img,(x,y),(x+w,y+h),color,2)
cv2.putText(img,fps,(10,30),font,3,color,3)
cv2.putText(img,label,(x,y+30),font,3,color,3)
# print(boxes,confidences,class_ids)
# print("opened")
print(info)
return info
ppe = Ppe_Detection()
ppe.get_classes()
ppe.print_all()
#############
def inference_image():
img = cv2.imread("sumit_off.jpg")
ppe.detection(img)
cv2.imshow("Image",img)
cv2.waitKey(0)
cv2.destroyAllWindows()
def run_video():
cap = cv2.VideoCapture(0)
while cap.isOpened():
r,f = cap.read()
try:
info = ppe.detection(f)
except Exception as e:
print("______",e)
cv2.imshow("image",f)
if cv2.waitKey(1) & 0xFF == ord("q") :
break
cap.release()
cv2.destroyAllWindows()
run_video()

How use for loop many variable?

I am using Yolov5. I want change my webcam -> lancamera
class LoadStreams: # multiple IP or RTSP cameras
def __init__(self, sources='streams.txt', img_size=640):
self.mode = 'images'
self.img_size = img_size
if os.path.isfile(sources):
with open(sources, 'r') as f:
sources = [x.strip() for x in f.read().splitlines() if len(x.strip())]
else:
sources = [sources]
n = len(sources)
self.imgs = [None] * n
self.sources = sources
for i, s in enumerate(sources):
# Start the thread to read frames from the video stream
print('%g/%g: %s... ' % (i + 1, n, s), end='')
cap = cv2.VideoCapture(eval(s) if s.isnumeric() else s)
assert cap.isOpened(), 'Failed to open %s' % s
w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS) % 100
_, self.imgs[i] = cap.read() # guarantee first frame
thread = Thread(target=self.update, args=([i, cap]), daemon=True)
print(' success (%gx%g at %.2f FPS).' % (w, h, fps))
thread.start()
print('') # newline
# check for common shapes
s = np.stack([letterbox(x, new_shape=self.img_size)[0].shape for x in self.imgs], 0) # inference shapes
self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal
if not self.rect:
print('WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.')
def update(self, index, cap):
# Read next stream frame in a daemon thread
n = 0
while cap.isOpened():
n += 1
# _, self.imgs[index] = cap.read()
cap.grab()
if n == 4: # read every 4th frame
_, self.imgs[index] = cap.retrieve()
n = 0
time.sleep(0.01) # wait time
def __iter__(self):
self.count = -1
return self
def __next__(self):
self.count += 1
img0 = self.imgs.copy()
if cv2.waitKey(1) == ord('q'): # q to quit
cv2.destroyAllWindows()
raise StopIteration
# Letterbox
img = [letterbox(x, new_shape=self.img_size, auto=self.rect)[0] for x in img0]
# Stack
img = np.stack(img, 0)
# Convert
img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416
img = np.ascontiguousarray(img)
return self.sources, img, img0, None
def __len__(self):
return 0 # 1E12 frames = 32 streams at 30 FPS for 30 years
this code return 'self.sources, img, img0, None'
if webcam:
view_img = True
cudnn.benchmark = True # set True to speed up constant image size inference
dataset = LoadStreams(source, img_size=imgsz)
print((dataset))
I use 'dataset'
for path, img, im0s, vid_cap in dataset:
img = torch.from_numpy(img).to(device)
img = img.half() if half else img.float() # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
How to use for path, img, im0s, vid_cap in dataset: ??
my lancam code
def livecame():
vimba = Vimba()
vimba.startup()
system = vimba.system()
system.run_feature_command("GeVDiscoveryAllOnce")
time.sleep(0.1)
camera_ids = vimba.camera_ids()
# for cam_id in camera_ids:
# print("Camera found: ", cam_id)
print(camera_ids[0])
c0 = vimba.camera(camera_ids[0])
c0.open()
pixel_format = c0.feature("PixelFormat")
pixel_format.value = "BayerBG8"
try:
c0.StreamBytesPerSecond = 100000000
except:
pass
frame = c0.new_frame()
frame.announce()
c0.start_capture()
try:
frame.queue_for_capture()
success = True
except:
success = False
c0.run_feature_command("AcquisitionStart")
c0.run_feature_command("AcquisitionStop")
frame.wait_for_capture(1000)
frame_data = frame.buffer_data()
k = cv2.waitKey(1)
if k == 0x1b:
cv2.destroyAllWindows()
if success:
img = np.ndarray(buffer=frame_data,
dtype=np.uint8,
shape=(frame.data.height, frame.data.width, 1))
img = cv2.cvtColor(img, cv2.COLOR_BAYER_BG2RGB)
img0 = img.copy()
img = img.tolist()
img = [letterbox(x, new_shape=(800,400), auto= True)[0] for x in img0]
#img = np.ascontiguousarray(img)
img = np.stack(img, 0)
#img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416
img = np.ascontiguousarray(img)
return ['0'], img, img0
but I use dataset = new_file.livecame()
I can see error ValueError: not enough values to unpack (expected 3, got 1)
in for path, img, im0s, vid_cap in dataset:
how to use many variable? in for loop?
In Python OpenCV, one way is simply to use zip.
for component in zip(contours, hierarchy):
cntr = component[0]
hier = component[1]

Categories

Resources