Do parallel processing for files in same folder

Do parallel processing for files in same folder - python

I Have doubts about parallel processing, I extracted frames from the video it will store into the H5 file now I want to do this in parallel processing,
what I am tried :
I tried to generate a file for all videos and its features like features change point and all other things
expected :
Generate h5 file via parallel processing
import os
from networks.CNN import ResNet
from utils.KTS.cpd_auto import cpd_auto
from tqdm import tqdm
import math
import cv2
import numpy as np
import h5py
import numpy as np
class Generate_Dataset:
def __init__(self, video_path, save_path):
self.resnet = ResNet()
self.dataset = {}
self.video_list = []
self.video_path = ''
self.h5_file = h5py.File(save_path, 'w')
self._set_video_list(video_path)
def _set_video_list(self, video_path):
# import pdb;pdb.set_trace()
if os.path.isdir(video_path):
self.video_path = video_path
fileExt = r".mp4",".avi"
self.video_list = [_ for _ in os.listdir(video_path) if _.endswith(fileExt)]
self.video_list.sort()
else:
self.video_path = ''
self.video_list.append(video_path)
for idx, file_name in enumerate(self.video_list):
self.dataset['video_{}'.format(idx+1)] = {}
self.h5_file.create_group('video_{}'.format(idx+1))
def _extract_feature(self, frame):
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame = cv2.resize(frame, (224, 224))
res_pool5 = self.resnet(frame)
frame_feat = res_pool5.cpu().data.numpy().flatten()
return frame_feat
def _get_change_points(self, video_feat, n_frame, fps):
n = n_frame / fps
m = int(math.ceil(n/2.0))
K = np.dot(video_feat, video_feat.T)
change_points, _ = cpd_auto(K, m, 1)
change_points = np.concatenate(([0], change_points, [n_frame-1]))
temp_change_points = []
for idx in range(len(change_points)-1):
segment = [change_points[idx], change_points[idx+1]-1]
if idx == len(change_points)-2:
segment = [change_points[idx], change_points[idx+1]]
temp_change_points.append(segment)
change_points = np.array(list(temp_change_points))
# temp_n_frame_per_seg = []
# for change_points_idx in range(len(change_points)):
# n_frame = change_points[change_points_idx][1] - change_points[change_points_idx][0]
# temp_n_frame_per_seg.append(n_frame)
# n_frame_per_seg = np.array(list(temp_n_frame_per_seg))
# print(change_points)
arr = change_points
list1 = arr.tolist()
list2 = list1[-1].pop(1) #pop [-1]value
print(list2)
print(list1)
print("****************") # [-1][-1] value find and divided by 15
cps_m = math.floor(arr[-1][1]/15)
list1[-1].append(cps_m) #append to list
print(list1)
print("****************") #list to nd array convertion
arr = np.asarray(list1)
print(arr)
arrmul = arr * 15
print(arrmul)
print("****************")
# print(type(change_points))
# print(n_frame_per_seg)
# print(type(n_frame_per_seg))
median_frame = []
for x in arrmul:
print(x)
med = np.mean(x)
print(med)
int_array = med.astype(int)
median_frame.append(int_array)
print(median_frame)
# print(type(int_array))
return arrmul
# TODO : save dataset
def _save_dataset(self):
pass
def generate_dataset(self):
print('[INFO] CNN processing')
for video_idx, video_filename in enumerate(self.video_list):
video_path = video_filename
if os.path.isdir(self.video_path):
video_path = os.path.join(self.video_path, video_filename)
video_basename = os.path.basename(video_path).split('.')[0]
video_capture = cv2.VideoCapture(video_path)
fps = video_capture.get(cv2.CAP_PROP_FPS)
n_frames = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
frame_list = []
picks = []
video_feat = None
video_feat_for_train = None
for frame_idx in tqdm(range(n_frames-1)):
success, frame = video_capture.read()
if frame_idx % 15 == 0:
if success:
frame_feat = self._extract_feature(frame)
picks.append(frame_idx)
if video_feat_for_train is None:
video_feat_for_train = frame_feat
else:
video_feat_for_train = np.vstack((video_feat_for_train, frame_feat))
if video_feat is None:
video_feat = frame_feat
else:
video_feat = np.vstack((video_feat, frame_feat))
else:
break
video_capture.release()
arrmul = self._get_change_points(video_feat, n_frames, fps)
self.h5_file['video_{}'.format(video_idx+1)]['features'] = list(video_feat_for_train)
self.h5_file['video_{}'.format(video_idx+1)]['picks'] = np.array(list(picks))
self.h5_file['video_{}'.format(video_idx+1)]['n_frames'] = n_frames
self.h5_file['video_{}'.format(video_idx+1)]['fps'] = fps
self.h5_file['video_{}'.format(video_idx + 1)]['video_name'] = video_filename.split('.')[0]
self.h5_file['video_{}'.format(video_idx+1)]['change_points'] = arrmul

You can do it like this
'''
first import the following, here Parallel will parallelize the processing and
delayed is the wraper.
'''
from joblib import Parallel, delayed
'''
Now we create a new function and copy paste everything that was previously
inside the for loop and pass `video_idx and video_filename` as arguments.
'''
def _generator(self, video_idx, video_filename):
video_path = video_filename
if os.path.isdir(self.video_path):
video_path = os.path.join(self.video_path, video_filename)
video_basename = os.path.basename(video_path).split('.')[0]
video_capture = cv2.VideoCapture(video_path)
fps = video_capture.get(cv2.CAP_PROP_FPS)
n_frames = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
frame_list = []
picks = []
video_feat = None
video_feat_for_train = None
for frame_idx in tqdm(range(n_frames-1)):
success, frame = video_capture.read()
if frame_idx % 15 == 0:
if success:
frame_feat = self._extract_feature(frame)
picks.append(frame_idx)
if video_feat_for_train is None:
video_feat_for_train = frame_feat
else:
video_feat_for_train = np.vstack((video_feat_for_train, frame_feat))
if video_feat is None:
video_feat = frame_feat
else:
video_feat = np.vstack((video_feat, frame_feat))
else:
break
video_capture.release()
arrmul = self._get_change_points(video_feat, n_frames, fps)
self.h5_file['video_{}'.format(video_idx+1)]['features'] = list(video_feat_for_train)
self.h5_file['video_{}'.format(video_idx+1)]['picks'] = np.array(list(picks))
self.h5_file['video_{}'.format(video_idx+1)]['n_frames'] = n_frames
self.h5_file['video_{}'.format(video_idx+1)]['fps'] = fps
self.h5_file['video_{}'.format(video_idx + 1)]['video_name'] = video_filename.split('.')[0]
self.h5_file['video_{}'.format(video_idx+1)]['change_points'] = arrmul
'''
Finally we update our current function using Parallel and delayed.
'''
def generate_dataset(self):
print('[INFO] CNN processing')
Parallel(n_jobs=-1)(delayed(self._generator)(video_idx, video_filename) for video_idx, video_filename in enumerate(self.video_list))

Related

Is it possible to run two YOLO (yolov4) object detection models in a single application?

I understand that it would be much simpler to just train YOLOv4 accordingly but:
I have limited computational resources and was hoping I could combine pre-trained models I found online, saving my time, or train a model and combine it with other models available online.
If I have one '.weights' file of a custom object detector that detects traffic signs and another '.weights' file of detector that detect pedestrians. Is there a way to combine these models, so that when run on a video/image (or in real-time capture), it detects pedestrians and traffic signs simultaneously.
By combining I mean, either to edit the 'weights' file somehow to achieve this, or editing the python code (while running the detector) that gets this done. (or any other way)
If not possible - is there any way to make them run in a sequence, efficiently?

Yes, It is possible to do that. Create two different python files and load the model in them. Then create a new file and initialise both of them in the new file. Then take the continous video / image feed as an input, the file will give output of detected traffic signs. Use this output as an input to your pedestrian problem.
import numpy as np
import pandas as pd
import cv2
import time
margin = 20
import detection_cls
run = detection_cls.Ppe_Detection_1()
class Ppe_Detection():
def __init__(self):
self.weightfile = 'yolov4_pretrained.weights'
self.cfgfile = 'cfg/yolov4_pretrained.cfg'
self.PpeNet = cv2.dnn.readNet(self.weightfile,self.cfgfile)
self.classes = self.get_classes()
layer_names = self.PpeNet.getLayerNames()
self.output_layers = [layer_names[i - 1] for i in self.PpeNet.getUnconnectedOutLayers()]
# = [self.PpeNet.getLayerNames()[(i[0] - 1)] for i in self.PpeNet.getUnconnectedOutLayers()]
def get_classes(self):
# self.classes = []
with open("coco.names","r") as f:
self.classes_val = [line.strip() for line in f.readlines()]
return self.classes_val
def detection(self,img):
start = time.perf_counter()
height,width,channels = img.shape
# Detecting objects
blob = cv2.dnn.blobFromImage(img,0.00392,(416,416),(0,0,0),True,crop=False)
self.PpeNet.setInput(blob)
outs = self.PpeNet.forward(self.output_layers)
time_took = time.perf_counter() - start
fps = str(int(1/time_took))
# getting the list
class_ids = []
confidences = []
boxes = []
for out in outs:
for detection in out:
# print(detection)
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.4:
# object detected
center_x = int(detection[0]*width)
center_y = int(detection[1]*height)
w = int(detection[2]*width)
h = int(detection[3]*height)
# Rectangle Coordinates
x = int(center_x-w/2)
y = int(center_y-h/2)
boxes.append([x,y,w,h])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes,confidences,0.4,0.4)
info = []
if len(indexes) > 0:
for i in indexes.flatten():
x,y = boxes[i][0], boxes[i][1]
w,h = boxes[i][2], boxes[i][3]
conf = confidences[i]
if x<0:
x = 0
if y < 0:
y = 0
type = '{}'.format(self.classes[class_ids[i]])
info.append([x,y,w,h,type,conf])
new_frame_time = time.time()
font = cv2.FONT_HERSHEY_PLAIN
for i in range(len(boxes)):
for i in indexes:
x,y,w,h = boxes[i]
label = str(self.classes[class_ids[i]])
color = (0,0,145)
# cv2.rectangle(img,(x,y),(x+w,y+h),color,2)
cv2.putText(img,fps,(10,30),font,3,color,3)
cv2.putText(img,label,(x,y+30),font,3,color,3)
# print(boxes,confidences,class_ids)
# print("opened")
# print(info)
return info
def cropping_detection(self):
count = 0
cropped_frame = []
value_img = []
cap = cv2.VideoCapture(0)
while cap.isOpened():
r,f = cap.read()
# counts +=1
detection = ppe.detection(f)
try:
x,y,w,h,cls,conf = detection[0]
except Exception as e:
pass
# for i in range(len(detection)):
# print(detection[i])
try:
if cls=='person':
print("Person detected")
cropped_img = f[y-margin:y+h+margin,x-margin:x+w+margin]
# print(cropped_img)
# value = ppe.detection(cropped_img)
final = run.detection(cropped_img)
print('final',final)
# print(value)
# print(cropped_img)
# value_img.append(value)
# cropped_frame.append(cropped_img)
# cv2.imwrite("data/frame%d.jpg" % count, cropped_img)
cv2.imshow("Hi",cropped_img)
if cv2.waitKey(1) & 0xff==ord('q'):
break
# count += 1
else:
pass
except Exception as e:
print("In except part")
# cap.release()
# cv2.destroyAllWindows()
# print(cropped_img)
return value,cropped_img
ppe = Ppe_Detection()
ppe.cropping_detection()
# ppe.get_classes()
# ppe.run_video()
# take_photo()
# inference_image()
The function that I've imported here can be found below
import numpy as np
import pandas as pd
import cv2
import time
class Ppe_Detection():
def __init__(self):
self.weightfile = 'backup/yolov3_best.weights'
self.cfgfile = 'yolov3.cfg'
self.PpeNet = cv2.dnn.readNet(self.weightfile,self.cfgfile)
self.classes = self.get_classes()
layer_names = self.PpeNet.getLayerNames()
self.output_layers = [layer_names[i - 1] for i in self.PpeNet.getUnconnectedOutLayers()]
# = [self.PpeNet.getLayerNames()[(i[0] - 1)] for i in self.PpeNet.getUnconnectedOutLayers()]
def get_classes(self):
# self.classes = []
with open("obj.names","r") as f:
self.classes_val = [line.strip() for line in f.readlines()]
return self.classes_val
def print_all(self):
print(self.classes)
def detection(self,img):
start = time.perf_counter()
height,width,channels = img.shape
# Detecting objects
blob = cv2.dnn.blobFromImage(img,0.00392,(416,416),(0,0,0),True,crop=False)
self.PpeNet.setInput(blob)
outs = self.PpeNet.forward(self.output_layers)
time_took = time.perf_counter() - start
fps = str(int(1/time_took))
# getting the list
class_ids = []
confidences = []
boxes = []
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.4:
# object detected
center_x = int(detection[0]*width)
center_y = int(detection[1]*width)
w = int(detection[2]*width)
h = int(detection[3]*height)
# Rectangle Coordinates
x = int(center_x-w/2)
y = int(center_y-h/2)
boxes.append([x,y,w,h])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes,confidences,0.4,0.4)
info = []
if len(indexes) > 0:
for i in indexes.flatten():
x,y = boxes[i][0], boxes[i][1]
w,h = boxes[i][2], boxes[i][3]
conf = confidences[i]
if x<0:
x = 0
if y < 0:
y = 0
type = '{}'.format(self.classes[class_ids[i]])
info.append([x,y,w,h,type,conf])
new_frame_time = time.time()
font = cv2.FONT_HERSHEY_PLAIN
for i in range(len(boxes)):
for i in indexes:
x,y,w,h = boxes[i]
label = str(self.classes[class_ids[i]])
color = (0,255,145)
cv2.rectangle(img,(x,y),(x+w,y+h),color,2)
cv2.putText(img,fps,(10,30),font,3,color,3)
cv2.putText(img,label,(x,y+30),font,3,color,3)
# print(boxes,confidences,class_ids)
# print("opened")
print(info)
return info
ppe = Ppe_Detection()
ppe.get_classes()
ppe.print_all()
#############
def inference_image():
img = cv2.imread("sumit_off.jpg")
ppe.detection(img)
cv2.imshow("Image",img)
cv2.waitKey(0)
cv2.destroyAllWindows()
def run_video():
cap = cv2.VideoCapture(0)
while cap.isOpened():
r,f = cap.read()
try:
info = ppe.detection(f)
except Exception as e:
print("______",e)
cv2.imshow("image",f)
if cv2.waitKey(1) & 0xFF == ord("q") :
break
cap.release()
cv2.destroyAllWindows()
run_video()

How use for loop many variable?

I am using Yolov5. I want change my webcam -> lancamera
class LoadStreams: # multiple IP or RTSP cameras
def __init__(self, sources='streams.txt', img_size=640):
self.mode = 'images'
self.img_size = img_size
if os.path.isfile(sources):
with open(sources, 'r') as f:
sources = [x.strip() for x in f.read().splitlines() if len(x.strip())]
else:
sources = [sources]
n = len(sources)
self.imgs = [None] * n
self.sources = sources
for i, s in enumerate(sources):
# Start the thread to read frames from the video stream
print('%g/%g: %s... ' % (i + 1, n, s), end='')
cap = cv2.VideoCapture(eval(s) if s.isnumeric() else s)
assert cap.isOpened(), 'Failed to open %s' % s
w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS) % 100
_, self.imgs[i] = cap.read() # guarantee first frame
thread = Thread(target=self.update, args=([i, cap]), daemon=True)
print(' success (%gx%g at %.2f FPS).' % (w, h, fps))
thread.start()
print('') # newline
# check for common shapes
s = np.stack([letterbox(x, new_shape=self.img_size)[0].shape for x in self.imgs], 0) # inference shapes
self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal
if not self.rect:
print('WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.')
def update(self, index, cap):
# Read next stream frame in a daemon thread
n = 0
while cap.isOpened():
n += 1
# _, self.imgs[index] = cap.read()
cap.grab()
if n == 4: # read every 4th frame
_, self.imgs[index] = cap.retrieve()
n = 0
time.sleep(0.01) # wait time
def __iter__(self):
self.count = -1
return self
def __next__(self):
self.count += 1
img0 = self.imgs.copy()
if cv2.waitKey(1) == ord('q'): # q to quit
cv2.destroyAllWindows()
raise StopIteration
# Letterbox
img = [letterbox(x, new_shape=self.img_size, auto=self.rect)[0] for x in img0]
# Stack
img = np.stack(img, 0)
# Convert
img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416
img = np.ascontiguousarray(img)
return self.sources, img, img0, None
def __len__(self):
return 0 # 1E12 frames = 32 streams at 30 FPS for 30 years
this code return 'self.sources, img, img0, None'
if webcam:
view_img = True
cudnn.benchmark = True # set True to speed up constant image size inference
dataset = LoadStreams(source, img_size=imgsz)
print((dataset))
I use 'dataset'
for path, img, im0s, vid_cap in dataset:
img = torch.from_numpy(img).to(device)
img = img.half() if half else img.float() # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
How to use for path, img, im0s, vid_cap in dataset: ??
my lancam code
def livecame():
vimba = Vimba()
vimba.startup()
system = vimba.system()
system.run_feature_command("GeVDiscoveryAllOnce")
time.sleep(0.1)
camera_ids = vimba.camera_ids()
# for cam_id in camera_ids:
# print("Camera found: ", cam_id)
print(camera_ids[0])
c0 = vimba.camera(camera_ids[0])
c0.open()
pixel_format = c0.feature("PixelFormat")
pixel_format.value = "BayerBG8"
try:
c0.StreamBytesPerSecond = 100000000
except:
pass
frame = c0.new_frame()
frame.announce()
c0.start_capture()
try:
frame.queue_for_capture()
success = True
except:
success = False
c0.run_feature_command("AcquisitionStart")
c0.run_feature_command("AcquisitionStop")
frame.wait_for_capture(1000)
frame_data = frame.buffer_data()
k = cv2.waitKey(1)
if k == 0x1b:
cv2.destroyAllWindows()
if success:
img = np.ndarray(buffer=frame_data,
dtype=np.uint8,
shape=(frame.data.height, frame.data.width, 1))
img = cv2.cvtColor(img, cv2.COLOR_BAYER_BG2RGB)
img0 = img.copy()
img = img.tolist()
img = [letterbox(x, new_shape=(800,400), auto= True)[0] for x in img0]
#img = np.ascontiguousarray(img)
img = np.stack(img, 0)
#img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416
img = np.ascontiguousarray(img)
return ['0'], img, img0
but I use dataset = new_file.livecame()
I can see error ValueError: not enough values to unpack (expected 3, got 1)
in for path, img, im0s, vid_cap in dataset:
how to use many variable? in for loop?

In Python OpenCV, one way is simply to use zip.
for component in zip(contours, hierarchy):
cntr = component[0]
hier = component[1]

How can I convert pytorch cpu-based transformation to cuda-based?

The original issue for the code is availablehere.
I am using this repository for a line segmentation project and I developed this code to get an input (whether image or video) and draw road lines on it and give it in output:
import argparse
import sys
from time import time, clock
from os.path import splitext, basename, exists
from model import SCNN
from utils.check_extension import is_video, is_image
from utils.transforms import *
# I will put all the necessary code for utils.transforms after this
# ------------------------------------------------ SCNN parameters
time1 = time()
net = SCNN(input_size=(800, 288), pretrained=False)
mean = (0.3598, 0.3653, 0.3662) # CULane mean, std
std = (0.2573, 0.2663, 0.2756)
transform_img = Resize((800, 288))
transform_to_net = Compose(ToTensor(), Normalize(mean=mean, std=std))
# ------------------------------------------------ Arguments
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--weights', type=str,
default='models/vgg_SCNN_DULR_w9.pth',
help='path to vgg models')
parser.add_argument('--input', type=str, default='demo/line_3.mp4',
help='path to image file')
parser.add_argument('--output', type=str, default='public/',
help='path to the output directory')
args = parser.parse_args()
return args
def main():
args = parse_args()
filename, extension = splitext(basename(args.input))
print("Loading file [{}] ....".format(filename))
if not exists(args.input):
print("file [{}] is not recognized".format(args.input))
sys.exit()
if is_video(extension):
video_capture = cv2.VideoCapture()
fourcc = cv2.VideoWriter_fourcc(*'XVID')
output = args.output + filename + '.avi'
if video_capture.open(args.input):
property_id = int(cv2.CAP_PROP_FRAME_COUNT)
total_frames = int(cv2.VideoCapture.get(video_capture, property_id))
frame_no = 1
width, height = int(video_capture.get(cv2.CAP_PROP_FRAME_WIDTH)), \
int(video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = video_capture.get(cv2.CAP_PROP_FPS)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
save_dict = torch.load(args.weights, map_location=device)
net.load_state_dict(save_dict['net'])
net.eval()
# can't write out mp4, so try to write into an AVI file
video_writer = cv2.VideoWriter(output, fourcc, fps, (width, height))
while video_capture.isOpened():
start = time()
ret, frame = video_capture.read()
if not ret:
break
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame = transform_img({'img': frame})['img']
x = transform_to_net({'img': frame})['img']
x.unsqueeze_(0)
stop1 = time()
print('stop1: ', stop1 - start)
seg_pred, exist_pred = net(x)[:2]
seg_pred = seg_pred.detach().cpu().numpy()
exist_pred = exist_pred.detach().cpu().numpy()
seg_pred = seg_pred[0]
stop2 = time()
print('stop2: ', stop2 - stop1)
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
lane_img = np.zeros_like(frame)
color = np.array([[255, 125, 0], [0, 255, 0], [0, 0, 255], [0, 255, 255]], dtype='uint8')
coord_mask = np.argmax(seg_pred, axis=0)
for i in range(0, 4):
if exist_pred[0, i] > 0.5:
lane_img[coord_mask == (i + 1)] = color[i]
img = cv2.addWeighted(src1=lane_img, alpha=0.8, src2=frame, beta=1., gamma=0.)
img = cv2.resize(img, (width, height))
stop3 = time()
print('stop3: ', stop3 - stop2)
# if frame_no % 20 == 0:
# print('# {}/{} frames processed!'.format(frame_no, total_frames))
frame_no += 1
video_writer.write(img)
end = time()
print('Whole loop: {} seconds'.format(end - start))
print('------------')
print('------------')
print('# All frames processed ')
video_capture.release()
video_writer.release()
elif is_image(extension):
img = cv2.imread(args.input)
height, width, _ = img.shape
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = transform_img({'img': img})['img']
x = transform_to_net({'img': img})['img']
x.unsqueeze_(0)
save_dict = torch.load(args.weights, map_location='cpu')
net.load_state_dict(save_dict['net'])
net.eval()
seg_pred, exist_pred = net(x)[:2]
seg_pred = seg_pred.detach().cpu().numpy()
exist_pred = exist_pred.detach().cpu().numpy()
seg_pred = seg_pred[0]
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
lane_img = np.zeros_like(img)
color = np.array([[255, 125, 0], [0, 255, 0], [0, 0, 255], [0, 255, 255]], dtype='uint8')
coord_mask = np.argmax(seg_pred, axis=0)
for i in range(0, 4):
if exist_pred[0, i] > 0.5:
lane_img[coord_mask == (i + 1)] = color[i]
img = cv2.addWeighted(src1=lane_img, alpha=0.8, src2=img, beta=1., gamma=0.)
img = cv2.resize(img, (width, height))
output = args.output + filename + '.jpg'
cv2.imwrite(output, img)
else:
print("file format [{}] is not supported".format(args.input))
sys.exit()
if __name__ == '__main__':
main()
The code which belong to Resize, ToTensor, Normalize, Compose are here:
class Compose(CustomTransform):
"""
All transform in Compose should be able to accept two non None variable, img and boxes
"""
def __init__(self, *transforms):
self.transforms = [*transforms]
def __call__(self, sample):
for t in self.transforms:
sample = t(sample)
return sample
def __iter__(self):
return iter(self.transforms)
def modules(self):
yield self
for t in self.transforms:
if isinstance(t, Compose):
for _t in t.modules():
yield _t
else:
yield t
class Normalize(CustomTransform):
def __init__(self, mean, std):
self.transform = Normalize_th(mean, std)
def __call__(self, sample):
img = sample.get('img')
img = self.transform(img)
_sample = sample.copy()
_sample['img'] = img
return _sample
class ToTensor(CustomTransform):
def __init__(self, dtype=torch.float):
self.dtype=dtype
def __call__(self, sample):
img = sample.get('img')
segLabel = sample.get('segLabel', None)
exist = sample.get('exist', None)
img = img.transpose(2, 0, 1)
img = torch.from_numpy(img).type(self.dtype) / 255.
if segLabel is not None:
segLabel = torch.from_numpy(segLabel).type(torch.long)
if exist is not None:
exist = torch.from_numpy(exist).type(torch.float32) # BCEloss requires float tensor
_sample = sample.copy()
_sample['img'] = img
_sample['segLabel'] = segLabel
_sample['exist'] = exist
return _sample
class Resize(CustomTransform):
def __init__(self, size):
if isinstance(size, int):
size = (size, size)
self.size = size #(W, H)
def __call__(self, sample):
img = sample.get('img')
segLabel = sample.get('segLabel', None)
img = cv2.resize(img, self.size, interpolation=cv2.INTER_CUBIC)
if segLabel is not None:
segLabel = cv2.resize(segLabel, self.size, interpolation=cv2.INTER_NEAREST)
_sample = sample.copy()
_sample['img'] = img
_sample['segLabel'] = segLabel
return _sample
def reset_size(self, size):
if isinstance(size, int):
size = (size, size)
self.size = size
The code works fine but I found out that its too slow for testing in real-time application. I added some time measurement to see if I can find out the bottlenecks and this is the output for one loop:
------------
stop1: 0.002989053726196289
stop2: 1.4032211303710938
stop3: 0.004946708679199219
Whole loop: 1.41636061668396 seconds
These lines happened to be the most computationally expensive lines:
seg_pred, exist_pred = net(x)[:2]
seg_pred = seg_pred.detach().cpu().numpy()
exist_pred = exist_pred.detach().cpu().numpy()
seg_pred = seg_pred[0]
Now I am stuck with this issue that how I can modify the code to improve the computation speed.
Initially I thought of modifying the code to allow cuda computation. I asked the main author how I can modify the code for cuda version in here and he pointed out to these lines:
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame = transform_img({'img': frame})['img']
x = transform_to_net({'img': frame})['img']
x.unsqueeze_(0)
Unfortunately my experience with pytorch is not much, so I am asking for help now.
I hope the information I shared suffices for the readers. Any help would be appreciated
Thanks

Set device:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
what he means his putting the data on device:
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame = transform_img({'img': frame})['img']
x = transform_to_net({'img': frame})['img']
x.unsqueeze_(0).to(device)

TypeError: can't pickle _thread.lock objects in multithreading

I'm trying to accelerate an OPENCV video processing with multiprocessing, but I get an exception in the queue.
The class pipeline, it takes and image and some Kalman filter trackers, and returns an image with the tracked cars, peds,..etc.
It has all the calculations done there.
Error:
TypeError: can't pickle _thread.lock objects
Code:
class DummyTask:
def __init__(self, data):
self.data = data
def ready(self):
return True
def get(self):
return self.data
if __name__ == "__main__":
video_name = '2016-11-18_07-30-01.h264'
cap = cv2.VideoCapture(video_name)
det = detector.CarDetector()
car_tracker = Sort_Algorithm.Sort()
ped_tracker = Sort_Algorithm.Sort()
df_region, df_line = load_filter()
threadn = cv2.getNumberOfCPUs()
pool = Pool(processes = 2)
pending = Queue()
threaded_mode = True
while True:
while pending.qsize() > 0:
res = pending.get()
cv2.imshow('video ', res)
if pending.qsize() < 2:
ret, frame = cap.read()
if threaded_mode:
t1 = time.time()
H = [-2.01134074616, -16.6502442427, -1314.05715739, -3.35391526592, -22.3546973012, 2683.63584335,
-0.00130731963137, -0.0396207582264, 1]
matrix = np.reshape(H, (3, 3))
dst = cv2.warpPerspective(frame.copy(), matrix, (frame.shape[1], frame.shape[0]))
task = pool.apply_async(pipeline, (frame.copy(),car_tracker, ped_tracker,df_region, df_line, det, dst, matrix))
cv2.imshow('dst', dst)
else:
task = DummyTask(pipeline,(frame.copy(),car_tracker, ped_tracker,df_region, df_line, det, dst, matrix))
pending.put(task)
ch = cv2.waitKey(1)
if ch == ord(' '):
threaded_mode = not threaded_mode
if ch == 27:
break

Convolution preprocess consuming more RAM Python

I am trying to implement this CNN model for stock prize prediction: https://github.com/ZezhouLi/Convolutional-Networks-for-Stock-Predicting
But I am facing the issue while preprocessing the data for the implementation. The preprocess step is consuming a lot of RAM. (My system has 32 GB RAM and 256 GB SSD Hard disk)
Here is the file that is consuming the RAM and at last gives a Memory Error:
import numpy as np
import matplotlib.pyplot as plt
import glob
import math
from PIL import Image
import statsmodels.api as sm
def r_squared(y_true, y_hat):
ssr = 0
sst = 0
e = np.subtract(y_true, y_hat)
y_mean = np.mean(y_true)
for item in e:
ssr += item**2
for item in y_true:
sst += (item - y_mean)**2
r2 = 1 - ssr / sst
return r2
def data_process(data):
processed_data = []
for item in data:
m = np.mean(item)
s = np.std(item)
normal_item = [(float(i)-m)/s for i in item]
normal_item.insert(0, 1)
processed_data.append(normal_item)
return processed_data
def get_pixel_values():
file_name = r'\figures'
pixels = []
for filename in glob.glob(file_name + '\*.png'):
im = Image.open(filename)
temp_pixels = list(im.getdata())
pixels.append(temp_pixels)
return pixels
def find_returns(data):
returns = []
for group in data:
count = 30
while count <= (len(group)-5):
current_data = group[count-1]
future_data = group[count+4]
p1 = np.mean(current_data)
p2 = np.mean(future_data)
returns.append(math.log(p2/p1))
count += 1
return returns
def convert_image():
size = 54, 32
file_name = r'\figures'
for filename in glob.glob(file_name + '\*.png'):
img = Image.open(filename)
img.thumbnail(size)
img = img.convert('L')
img.save(filename)
def plot_data(data):
t = np.arange(0, 29, 1)
file_name_number = 0
fig = plt.figure(frameon=False)
for group in data:
count = 30
while count <= (len(group)-5):
high = []
low = []
for item in group[count-30:count]:
high.append(item[0])
low.append(item[1])
file_name = r'\fig_' + str(file_name_number)
ax = plt.Axes(fig, [0., 0., 1., 1.])
ax.set_axis_off()
fig.add_axes(ax)
ax.plot(t, high[0:-1], 'b', t, low[0:-1], 'g')
fig.savefig(r'\figures' + file_name)
fig.clf()
file_name_number += 1
count += 1
print('Created %d files!' % file_name_number)
def extract_useful_data(data):
groups = []
for group in data:
temp_buffer = []
for item in group:
temp = [item[2], item[3]]
temp = [float(i) for i in temp]
temp_buffer.append(temp)
groups.append(temp_buffer)
return groups
def split_data(data):
groups = []
for item in data:
temp_buffer = []
for string in item:
number = string.split(',')
temp_buffer.append(number)
groups.append(temp_buffer)
return groups
def extract_data():
file_name = r'\data.txt'
infile = open(file_name, 'r')
temp_buffer = []
for line in infile:
temp_buffer.append(line.strip('\n'))
temp_buffer = temp_buffer[8:]
i = 0
groups = []
temp = []
for item in temp_buffer:
if i != 390:
temp.append(item)
i += 1
else:
groups.append(temp)
temp = []
i = 0
groups.append(temp)
infile.close()
return groups
def main():
original_data = extract_data()
splitted_data = split_data(original_data)
useful_data = extract_useful_data(splitted_data)
plot_data(useful_data)
convert_image()
returns = np.asarray(find_returns(useful_data))
training_data = np.asarray(get_pixel_values())
training_data = sm.add_constant(training_data, has_constant='add')
results = sm.OLS(returns[0:4340], training_data[0:4340]).fit()
y_in_sample = results.predict(training_data[0:4340])
r2 = r_squared(returns[0:4340], y_in_sample)
print r2
if __name__ == "__main__":
main()
I have got Memory Error, which occurs when the program consumes all of the RAM memory of the system. Please improve on the program.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Do parallel processing for files in same folder - python

Related

Is it possible to run two YOLO (yolov4) object detection models in a single application?

How use for loop many variable?

How can I convert pytorch cpu-based transformation to cuda-based?

TypeError: can't pickle _thread.lock objects in multithreading

Convolution preprocess consuming more RAM Python

Categories

Resources