I am very new to Python, however I am in need of using Python for my project, I have been learning on the go. There is a point I don't understand with the piece of code below:
result['data'] = pool.map(image_processor, im_info)
So, I figured out pool.map(somefunc,[list of params]) is akin to parfor of Matlab. What I don't understand is the part relating image_processor. I am assuming it should be a function, however it is not defined as one using def. The code is working perfectly fine and I tried to trace it with pdb and it seems to process the images as intended in batches, for caffe input. This might be a very trivial question, I just am new to Python. So what is the order of execution that image_processor just works and how can I make changes in this function (I will be passing multiple parameters, I just need to know where I can make those changes as the function is not defined using a def) Thx.
#Data layer for video. Change flow_frames and RGB_frames to be the path to the flow and RGB frames.
import sys
sys.path.append('../../python')
import caffe
import io
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import scipy.misc
import time
import pdb
import glob
import pickle as pkl
import random
import h5py
from multiprocessing import Pool
from threading import Thread
import skimage.io
import copy
flow_frames = 'flow_images/'
RGB_frames = 'frames/'
test_frames = 16
train_frames = 16
test_buffer = 3
train_buffer = 24
def processImageCrop(im_info, transformer, flow):
im_path = im_info[0]
im_crop = im_info[1]
im_reshape = im_info[2]
im_flip = im_info[3]
data_in = caffe.io.load_image(im_path)
if (data_in.shape[0] < im_reshape[0]) | (data_in.shape[1] < im_reshape[1]):
data_in = caffe.io.resize_image(data_in, im_reshape)
if im_flip:
data_in = caffe.io.flip_image(data_in, 1, flow)
data_in = data_in[im_crop[0]:im_crop[2], im_crop[1]:im_crop[3], :]
processed_image = transformer.preprocess('data_in',data_in)
return processed_image
class ImageProcessorCrop(object):
def __init__(self, transformer, flow):
self.transformer = transformer
self.flow = flow
def __call__(self, im_info):
return processImageCrop(im_info, self.transformer, self.flow)
class sequenceGeneratorVideo(object):
def __init__(self, buffer_size, clip_length, num_videos, video_dict, video_order):
self.buffer_size = buffer_size
self.clip_length = clip_length
self.N = self.buffer_size*self.clip_length
self.num_videos = num_videos
self.video_dict = video_dict
self.video_order = video_order
self.idx = 0
def __call__(self):
label_r = []
im_paths = []
im_crop = []
im_reshape = []
im_flip = []
if self.idx + self.buffer_size >= self.num_videos:
idx_list = range(self.idx, self.num_videos)
idx_list.extend(range(0, self.buffer_size-(self.num_videos-self.idx)))
else:
idx_list = range(self.idx, self.idx+self.buffer_size)
for i in idx_list:
key = self.video_order[i]
label = self.video_dict[key]['label']
video_reshape = self.video_dict[key]['reshape']
video_crop = self.video_dict[key]['crop']
label_r.extend([label]*self.clip_length)
im_reshape.extend([(video_reshape)]*self.clip_length)
r0 = int(random.random()*(video_reshape[0] - video_crop[0]))
r1 = int(random.random()*(video_reshape[1] - video_crop[1]))
im_crop.extend([(r0, r1, r0+video_crop[0], r1+video_crop[1])]*self.clip_length)
f = random.randint(0,1)
im_flip.extend([f]*self.clip_length)
rand_frame = int(random.random()*(self.video_dict[key]['num_frames']-self.clip_length)+1+1)
frames = []
for i in range(rand_frame,rand_frame+self.clip_length):
frames.append(self.video_dict[key]['frames'] %i)
im_paths.extend(frames)
im_info = zip(im_paths,im_crop, im_reshape, im_flip)
self.idx += self.buffer_size
if self.idx >= self.num_videos:
self.idx = self.idx - self.num_videos
return label_r, im_info
def advance_batch(result, sequence_generator, image_processor, pool):
label_r, im_info = sequence_generator()
#tmp = image_processor(im_info[0])
result['data'] = pool.map(image_processor, im_info)
result['label'] = label_r
cm = np.ones(len(label_r))
cm[0::16] = 0
result['clip_markers'] = cm
class BatchAdvancer():
def __init__(self, result, sequence_generator, image_processor, pool):
self.result = result
self.sequence_generator = sequence_generator
self.image_processor = image_processor
self.pool = pool
def __call__(self):
return advance_batch(self.result, self.sequence_generator, self.image_processor, self.pool)
class videoRead(caffe.Layer):
def initialize(self):
self.train_or_test = 'test'
self.flow = False
self.buffer_size = test_buffer #num videos processed per batch
self.frames = test_frames #length of processed clip
self.N = self.buffer_size*self.frames
self.idx = 0
self.channels = 3
self.height = 227
self.width = 227
self.path_to_images = RGB_frames
self.video_list = 'ucf101_split1_testVideos.txt'
def setup(self, bottom, top):
random.seed(10)
self.initialize()
f = open(self.video_list, 'r')
f_lines = f.readlines()
f.close()
video_dict = {}
current_line = 0
self.video_order = []
for ix, line in enumerate(f_lines):
video = line.split(' ')[0].split('/')[1]
l = int(line.split(' ')[1])
frames = glob.glob('%s%s/*.jpg' %(self.path_to_images, video))
num_frames = len(frames)
video_dict[video] = {}
video_dict[video]['frames'] = frames[0].split('.')[0] + '.%04d.jpg'
video_dict[video]['reshape'] = (240,320)
video_dict[video]['crop'] = (227, 227)
video_dict[video]['num_frames'] = num_frames
video_dict[video]['label'] = l
self.video_order.append(video)
self.video_dict = video_dict
self.num_videos = len(video_dict.keys())
#set up data transformer
shape = (self.N, self.channels, self.height, self.width)
self.transformer = caffe.io.Transformer({'data_in': shape})
self.transformer.set_raw_scale('data_in', 255)
if self.flow:
image_mean = [128, 128, 128]
self.transformer.set_is_flow('data_in', True)
else:
image_mean = [103.939, 116.779, 128.68]
self.transformer.set_is_flow('data_in', False)
channel_mean = np.zeros((3,227,227))
for channel_index, mean_val in enumerate(image_mean):
channel_mean[channel_index, ...] = mean_val
self.transformer.set_mean('data_in', channel_mean)
self.transformer.set_channel_swap('data_in', (2, 1, 0))
self.transformer.set_transpose('data_in', (2, 0, 1))
self.thread_result = {}
self.thread = None
pool_size = 24
self.image_processor = ImageProcessorCrop(self.transformer, self.flow)
self.sequence_generator = sequenceGeneratorVideo(self.buffer_size, self.frames, self.num_videos, self.video_dict, self.video_order)
self.pool = Pool(processes=pool_size)
self.batch_advancer = BatchAdvancer(self.thread_result, self.sequence_generator, self.image_processor, self.pool)
self.dispatch_worker()
self.top_names = ['data', 'label','clip_markers']
print 'Outputs:', self.top_names
if len(top) != len(self.top_names):
raise Exception('Incorrect number of outputs (expected %d, got %d)' %
(len(self.top_names), len(top)))
self.join_worker()
for top_index, name in enumerate(self.top_names):
if name == 'data':
shape = (self.N, self.channels, self.height, self.width)
elif name == 'label':
shape = (self.N,)
elif name == 'clip_markers':
shape = (self.N,)
top[top_index].reshape(*shape)
def reshape(self, bottom, top):
pass
def forward(self, bottom, top):
if self.thread is not None:
self.join_worker()
#rearrange the data: The LSTM takes inputs as [video0_frame0, video1_frame0,...] but the data is currently arranged as [video0_frame0, video0_frame1, ...]
new_result_data = [None]*len(self.thread_result['data'])
new_result_label = [None]*len(self.thread_result['label'])
new_result_cm = [None]*len(self.thread_result['clip_markers'])
for i in range(self.frames):
for ii in range(self.buffer_size):
old_idx = ii*self.frames + i
new_idx = i*self.buffer_size + ii
new_result_data[new_idx] = self.thread_result['data'][old_idx]
new_result_label[new_idx] = self.thread_result['label'][old_idx]
new_result_cm[new_idx] = self.thread_result['clip_markers'][old_idx]
for top_index, name in zip(range(len(top)), self.top_names):
if name == 'data':
for i in range(self.N):
top[top_index].data[i, ...] = new_result_data[i]
elif name == 'label':
top[top_index].data[...] = new_result_label
elif name == 'clip_markers':
top[top_index].data[...] = new_result_cm
self.dispatch_worker()
def dispatch_worker(self):
assert self.thread is None
self.thread = Thread(target=self.batch_advancer)
self.thread.start()
def join_worker(self):
assert self.thread is not None
self.thread.join()
self.thread = None
def backward(self, top, propagate_down, bottom):
pass
class videoReadTrain_flow(videoRead):
def initialize(self):
self.train_or_test = 'train'
self.flow = True
self.buffer_size = train_buffer #num videos processed per batch
self.frames = train_frames #length of processed clip
self.N = self.buffer_size*self.frames
self.idx = 0
self.channels = 3
self.height = 227
self.width = 227
self.path_to_images = flow_frames
self.video_list = 'ucf101_split1_trainVideos.txt'
class videoReadTest_flow(videoRead):
def initialize(self):
self.train_or_test = 'test'
self.flow = True
self.buffer_size = test_buffer #num videos processed per batch
self.frames = test_frames #length of processed clip
self.N = self.buffer_size*self.frames
self.idx = 0
self.channels = 3
self.height = 227
self.width = 227
self.path_to_images = flow_frames
self.video_list = 'ucf101_split1_testVideos.txt'
class videoReadTrain_RGB(videoRead):
def initialize(self):
self.train_or_test = 'train'
self.flow = False
self.buffer_size = train_buffer #num videos processed per batch
self.frames = train_frames #length of processed clip
self.N = self.buffer_size*self.frames
self.idx = 0
self.channels = 3
self.height = 227
self.width = 227
self.path_to_images = RGB_frames
self.video_list = 'ucf101_split1_trainVideos.txt'
class videoReadTest_RGB(videoRead):
def initialize(self):
self.train_or_test = 'test'
self.flow = False
self.buffer_size = test_buffer #num videos processed per batch
self.frames = test_frames #length of processed clip
self.N = self.buffer_size*self.frames
self.idx = 0
self.channels = 3
self.height = 227
self.width = 227
self.path_to_images = RGB_frames
self.video_list = 'ucf101_split1_testVideos.txt'
The code is from https://github.com/LisaAnne/lisa-caffe-public/blob/lstm_video_deploy/examples/LRCN_activity_recognition/sequence_input_layer.py
See this line:
self.image_processor = ImageProcessorCrop(self.transformer, self.flow)
The ImageProcessorCrop class has a __call__ method, which allows these objects to be used as if they're functions. When you call them, it calls that method.
Related
I have written a simple digit recognition neural network and it does not seem to be learning. It has 2 hidden layers and uses the softmax activation function and whenever it runs it seems to converge on always picking 0. I would just like to check if the code for updating the weight matrices is correct
from cmath import exp
import numpy as np
from tensorflow.keras.datasets import mnist
class Run:
def __init__(self, num_inputs, num_hidden1, num_hidden2, num_outputs):
self.num_inputs = num_inputs
self.num_hidden1 = num_hidden1
self.num_hidden2 = num_hidden2
self.num_outputs = num_outputs
self.learningrate = 0.001
self.get = GetInput()
self.count = 0
self.countTrue = 0
self.count1 = 0
self.sum = 0
self.past = 0
self.inputLayer = Layer(num_inputs, num_hidden1)
self.hiddenLayer1 = Layer(num_hidden1, num_hidden2)
self.hiddenLayer2 = Layer(num_hidden2, num_outputs)
def getinput(self):
input, expected = self.get.get(self.count)
self.count +=1
self.count1 += 1
return input, expected
def runNN(self, input):
self.inputLayer.calc_output_1(input)
self.hiddenLayer1.calc_output_1(self.inputLayer.fin_outputs)
self.hiddenLayer2.calc_output_1(self.hiddenLayer1.fin_outputs)
self.NN_Output = self.hiddenLayer2.fin_outputs
def calculate_cost(self, expected):
error = 0
for i in range(self.num_outputs):
error += (self.NN_Output[i][0] - expected[i][0])**2 / self.num_outputs
list = []
list1 = []
for each in self.NN_Output:
list.append(float(each[0]))
self.sum += list.index(max(list))
for each in expected:
list1.append(float(each[0]))
if list1.index(max(list1)) == list.index(max(list)):
self.countTrue += 1
print(round(self.countTrue/self.count1, 3))
if self.count1 % 1000 == 0:
print(self.sum / 1000)
print('')
self.past = 0
self.sum = 0
self.count1 = 0
self.countTrue = 0
return error
def calc_new_hidden1(self, expected):
delta = self.NN_Output - expected
change = np.multiply(delta, self.hiddenLayer2.fin_outputs)
change_weights = np.matmul(change, np.transpose(self.hiddenLayer2.inputs)) * self.learningrate
change_bias = change * self.learningrate
self.hiddenLayer2.amend(change_weights, change_bias)
def calc_new_hidden2(self, expected):
delta = self.NN_Output - expected
change = np.multiply(np.matmul(np.transpose(self.hiddenLayer2.getter()[0]), delta), self.hiddenLayer1.fin_outputs)
change_weights = np.matmul(change, np.transpose(self.hiddenLayer1.inputs)) * self.learningrate
change_bias = change * self.learningrate
self.hiddenLayer1.amend(change_weights, change_bias)
def calc_new_input(self, expected):
delta = (self.NN_Output - expected)
change = np.multiply(np.matmul(np.transpose(self.hiddenLayer1.getter()[0]), np.matmul(np.transpose(self.hiddenLayer2.getter()[0]), delta)), self.inputLayer.fin_outputs)
change_weights = np.matmul(change, np.transpose(self.inputLayer.inputs)) * self.learningrate
change_bias = change * self.learningrate
self.inputLayer.amend(change_weights, change_bias)
class Layer:
def __init__(self, num_inputs, num_outputs):
self.__weights = np.random.uniform(-0.5, 0.5, (num_outputs, num_inputs))
self.__bias = np.matrix([[float(0)] for x in range(num_outputs)])
def calc_output_1(self, inputs):
self.inputs = inputs
self.__output_1 = np.matmul(self.__weights, inputs) + self.__bias
self.softmax()
def softmax(self):
sum = 0
for each in self.__output_1:
sum += np.exp(float(each[0]))
list1 = []
for each in self.__output_1:
list1.append([float(np.exp(each[0])/sum)])
self.fin_outputs = np.matrix(list1)
def amend(self, change_weights, change_bias):
self.__weights -= change_weights
self.__bias -= change_bias
def getter(self):
return self.__weights, self.__bias
class GetInput:
def __init__(self):
(self.X_train, self.Y_train), (X_test, Y_test) = mnist.load_data()
self.X_train = self.X_train.reshape(self.X_train.shape[0], 28, 28, 1)
x_test = X_test.reshape(X_test.shape[0], 28, 28, 1)
def get(self, i):
list = []
newPhoto = self.X_train[i].astype('float32')/255
for each in newPhoto:
for n in each:
list.append([float(n)])
input = np.matrix(list)
list = []
expect = self.Y_train[i]
for each in range(10):
if each == expect:
list.append([1])
else:
list.append([0])
expected = np.matrix(list)
return input, expected
if __name__ == "__main__":
initiate = Run(784, 600, 400, 10)
while True:
input, expected = initiate.getinput()
initiate.runNN(input)
initiate.calculate_cost(expected)
initiate.calc_new_hidden1(expected)
initiate.calc_new_hidden2(expected)
initiate.calc_new_input(expected)`
Here is the code I have created. The maths for updating the weight matrices is in the Run class: calc_new_hidden1(), calc_new_hidden2(), calc_new_inputs()
I think the error will probably be in the calc_new_inputs() function
I Have doubts about parallel processing, I extracted frames from the video it will store into the H5 file now I want to do this in parallel processing,
what I am tried :
I tried to generate a file for all videos and its features like features change point and all other things
expected :
Generate h5 file via parallel processing
import os
from networks.CNN import ResNet
from utils.KTS.cpd_auto import cpd_auto
from tqdm import tqdm
import math
import cv2
import numpy as np
import h5py
import numpy as np
class Generate_Dataset:
def __init__(self, video_path, save_path):
self.resnet = ResNet()
self.dataset = {}
self.video_list = []
self.video_path = ''
self.h5_file = h5py.File(save_path, 'w')
self._set_video_list(video_path)
def _set_video_list(self, video_path):
# import pdb;pdb.set_trace()
if os.path.isdir(video_path):
self.video_path = video_path
fileExt = r".mp4",".avi"
self.video_list = [_ for _ in os.listdir(video_path) if _.endswith(fileExt)]
self.video_list.sort()
else:
self.video_path = ''
self.video_list.append(video_path)
for idx, file_name in enumerate(self.video_list):
self.dataset['video_{}'.format(idx+1)] = {}
self.h5_file.create_group('video_{}'.format(idx+1))
def _extract_feature(self, frame):
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame = cv2.resize(frame, (224, 224))
res_pool5 = self.resnet(frame)
frame_feat = res_pool5.cpu().data.numpy().flatten()
return frame_feat
def _get_change_points(self, video_feat, n_frame, fps):
n = n_frame / fps
m = int(math.ceil(n/2.0))
K = np.dot(video_feat, video_feat.T)
change_points, _ = cpd_auto(K, m, 1)
change_points = np.concatenate(([0], change_points, [n_frame-1]))
temp_change_points = []
for idx in range(len(change_points)-1):
segment = [change_points[idx], change_points[idx+1]-1]
if idx == len(change_points)-2:
segment = [change_points[idx], change_points[idx+1]]
temp_change_points.append(segment)
change_points = np.array(list(temp_change_points))
# temp_n_frame_per_seg = []
# for change_points_idx in range(len(change_points)):
# n_frame = change_points[change_points_idx][1] - change_points[change_points_idx][0]
# temp_n_frame_per_seg.append(n_frame)
# n_frame_per_seg = np.array(list(temp_n_frame_per_seg))
# print(change_points)
arr = change_points
list1 = arr.tolist()
list2 = list1[-1].pop(1) #pop [-1]value
print(list2)
print(list1)
print("****************") # [-1][-1] value find and divided by 15
cps_m = math.floor(arr[-1][1]/15)
list1[-1].append(cps_m) #append to list
print(list1)
print("****************") #list to nd array convertion
arr = np.asarray(list1)
print(arr)
arrmul = arr * 15
print(arrmul)
print("****************")
# print(type(change_points))
# print(n_frame_per_seg)
# print(type(n_frame_per_seg))
median_frame = []
for x in arrmul:
print(x)
med = np.mean(x)
print(med)
int_array = med.astype(int)
median_frame.append(int_array)
print(median_frame)
# print(type(int_array))
return arrmul
# TODO : save dataset
def _save_dataset(self):
pass
def generate_dataset(self):
print('[INFO] CNN processing')
for video_idx, video_filename in enumerate(self.video_list):
video_path = video_filename
if os.path.isdir(self.video_path):
video_path = os.path.join(self.video_path, video_filename)
video_basename = os.path.basename(video_path).split('.')[0]
video_capture = cv2.VideoCapture(video_path)
fps = video_capture.get(cv2.CAP_PROP_FPS)
n_frames = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
frame_list = []
picks = []
video_feat = None
video_feat_for_train = None
for frame_idx in tqdm(range(n_frames-1)):
success, frame = video_capture.read()
if frame_idx % 15 == 0:
if success:
frame_feat = self._extract_feature(frame)
picks.append(frame_idx)
if video_feat_for_train is None:
video_feat_for_train = frame_feat
else:
video_feat_for_train = np.vstack((video_feat_for_train, frame_feat))
if video_feat is None:
video_feat = frame_feat
else:
video_feat = np.vstack((video_feat, frame_feat))
else:
break
video_capture.release()
arrmul = self._get_change_points(video_feat, n_frames, fps)
self.h5_file['video_{}'.format(video_idx+1)]['features'] = list(video_feat_for_train)
self.h5_file['video_{}'.format(video_idx+1)]['picks'] = np.array(list(picks))
self.h5_file['video_{}'.format(video_idx+1)]['n_frames'] = n_frames
self.h5_file['video_{}'.format(video_idx+1)]['fps'] = fps
self.h5_file['video_{}'.format(video_idx + 1)]['video_name'] = video_filename.split('.')[0]
self.h5_file['video_{}'.format(video_idx+1)]['change_points'] = arrmul
You can do it like this
'''
first import the following, here Parallel will parallelize the processing and
delayed is the wraper.
'''
from joblib import Parallel, delayed
'''
Now we create a new function and copy paste everything that was previously
inside the for loop and pass `video_idx and video_filename` as arguments.
'''
def _generator(self, video_idx, video_filename):
video_path = video_filename
if os.path.isdir(self.video_path):
video_path = os.path.join(self.video_path, video_filename)
video_basename = os.path.basename(video_path).split('.')[0]
video_capture = cv2.VideoCapture(video_path)
fps = video_capture.get(cv2.CAP_PROP_FPS)
n_frames = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
frame_list = []
picks = []
video_feat = None
video_feat_for_train = None
for frame_idx in tqdm(range(n_frames-1)):
success, frame = video_capture.read()
if frame_idx % 15 == 0:
if success:
frame_feat = self._extract_feature(frame)
picks.append(frame_idx)
if video_feat_for_train is None:
video_feat_for_train = frame_feat
else:
video_feat_for_train = np.vstack((video_feat_for_train, frame_feat))
if video_feat is None:
video_feat = frame_feat
else:
video_feat = np.vstack((video_feat, frame_feat))
else:
break
video_capture.release()
arrmul = self._get_change_points(video_feat, n_frames, fps)
self.h5_file['video_{}'.format(video_idx+1)]['features'] = list(video_feat_for_train)
self.h5_file['video_{}'.format(video_idx+1)]['picks'] = np.array(list(picks))
self.h5_file['video_{}'.format(video_idx+1)]['n_frames'] = n_frames
self.h5_file['video_{}'.format(video_idx+1)]['fps'] = fps
self.h5_file['video_{}'.format(video_idx + 1)]['video_name'] = video_filename.split('.')[0]
self.h5_file['video_{}'.format(video_idx+1)]['change_points'] = arrmul
'''
Finally we update our current function using Parallel and delayed.
'''
def generate_dataset(self):
print('[INFO] CNN processing')
Parallel(n_jobs=-1)(delayed(self._generator)(video_idx, video_filename) for video_idx, video_filename in enumerate(self.video_list))
I am not good with PyTorch so I would appreciate some help in converting this code to TensorFlow. I have trying going through some articles but it was a bit intensive so a little explanation would also be worthwhile so that the whole community can benefit from this.
"""
import torch
import copy
class PESG(torch.optim.Optimizer):
def __init__(self, model, a=None, b=None, alpha=None, imratio=0.1, margin=1.0, lr=0.1, gamma=500, clip_value=1.0, weight_decay=1e-5, **kwargs):
assert a is not None, 'Found no variable a!'
assert b is not None, 'Found no variable b!'
assert alpha is not None, 'Found no variable alpha!'
self.p = imratio
self.margin = margin
self.model = model
self.lr = lr
self.gamma = gamma
self.clip_value = clip_value
self.weight_decay = weight_decay
self.a = a
self.b = b
self.alpha = alpha
# TODO!
self.model_ref = []
for var in list(self.model.parameters())+[self.a, self.b]:
self.model_ref.append(torch.empty(var.shape).normal_(mean=0, std=0.01).cuda())
self.model_acc = []
for var in list(self.model.parameters())+[self.a, self.b]:
self.model_acc.append(torch.zeros(var.shape, dtype=torch.float32, device="cuda", requires_grad=False).cuda())
self.T = 0
self.step_counts = 0
def get_parameters(params):
for p in params:
yield p
self.params = get_parameters(list(model.parameters())+[a,b])
self.defaults = dict(lr=self.lr,
margin=margin,
gamma=gamma,
p=imratio,
a=self.a,
b=self.b,
alpha=self.alpha,
clip_value=clip_value,
weight_decay=weight_decay,
model_ref = self.model_ref,
model_acc = self.model_acc
)
super(PESG, self).__init__(self.params, self.defaults)
#property
def optim_steps(self):
return self.step_counts
def update_lr(self, lr):
self.param_groups[0]['lr']=lr
#torch.no_grad()
def step(self):
"""Performs a single optimization step.
"""
for group in self.param_groups:
weight_decay = group['weight_decay']
clip_value = group['clip_value']
self.lr = group['lr']
p = group['p']
gamma = group['gamma']
m = group['margin']
model_ref = group['model_ref']
model_acc = group['model_acc']
a = group['a']
b = group['b']
alpha = group['alpha']
# updates
for i, p in enumerate(group['params']):
if p.grad is None:
continue
p.data = p.data - group['lr']*( torch.clamp(p.grad.data , -clip_value, clip_value) + 1/gamma*(p.data - model_ref[i].data) ) - group['lr']*weight_decay*p.data
model_acc[i].data = model_acc[i].data + p.data
alpha.data = alpha.data + group['lr']*(2*(m + b.data - a.data)-2*alpha.data)
alpha.data = torch.clamp(alpha.data, 0, 999)
self.T += 1
self.step_counts += 1
def zero_grad(self):
self.model.zero_grad()
self.a.grad = None
self.b.grad = None
self.alpha.grad =None
def update_regularizer(self, decay_factor=None):
if decay_factor != None:
self.param_groups[0]['lr'] = self.param_groups[0]['lr']/decay_factor
print ('Reducing learning rate to %.5f # T=%s!'%(self.param_groups[0]['lr'], self.T))
print ('Updating regularizer # T=%s!'%(self.T))
for i, param in enumerate(self.model_ref):
self.model_ref[i].data = self.model_acc[i].data/self.T
for i, param in enumerate(self.model_acc):
self.model_acc[i].data = torch.zeros(param.shape, dtype=torch.float32, device="cuda", requires_grad=False).cuda()
self.T = 0
"""
For the last 3 days, I have been trying to build my first neural network to no avail. I'm asking this question here because I can't think of anywhere else to get quality feedback and I haven't found any solutions by searching so far.
The network takes in 784 inputs (pixels) and has 9 outputs (numbers 0-9), I am trying to train it on the mnist hand written digit dataset.
The problem is that after several iterations, the output explodes to a list of random +1s and -1s eg.
[-1., 1., 1., 1., -1., -1., 1., 1., 1.]
I will include code snippets of the ANNs init(), f_pass() and backpropagate() as I think if there is an error, it will likely be in the code containing the matrices. I can upload more code on request if needed.
class Feedforward:
def __init__(self, size_vector):
self.nLayers = len(size_vector)
self.size_vector = size_vector
weight_matrix = lambda x: np.random.random((x[0],x[1]))
self.weights = {}
self.b = {}
self.z = {}
self.a = {}
for i in range(0,self.nLayers,1):
z = size_vector[i:i+2]
try:
self.b[i] = np.random.random((1,size_vector[i+1]))
except IndexError:
pass
if len(z) == 2:
self.weights[i] = weight_matrix(z)
f_pass():
def f_pass(self, data):
for layer in range(self.nLayers-1):
if layer == 0:
self.z[layer] = data.dot(self.weights[0])+self.b[layer].reshape((1,self.size_vector[layer+1]))
self.a[layer] = self.activation(self.z[layer]).reshape((1,self.size_vector[layer+1]))
else:
self.z[layer] = self.a[layer-1].dot(self.weights[layer])+self.b[layer]
self.a[layer] = self.activation(self.z[layer]).reshape((1,self.size_vector[layer+1]))
Backprop():
def backporpagate(self, data):
lr = .01
expected, inputs = data
cost = np.square(expected-self.a[self.nLayers-2])
partial_layer_error = {}
partial_weight_error = {}
partial_bias_error = {}
for i in range(self.nLayers-1):
layer = self.nLayers-2-i
if i == 0:
partial_layer_error[layer] = 2*(expected-self.a[self.nLayers-2])*self.activation_prime(self.z[layer]).reshape(1,self.size_vector[layer+1])
partial_weight_error[layer] = self.a[layer-1].transpose().dot(partial_layer_error[layer])
partial_bias_error[layer] = partial_layer_error[layer].reshape(1,self.size_vector[layer+1])
elif layer == 0:
partial_layer_error[layer] = partial_layer_error[layer+1].dot(self.weights[layer+1].transpose())
partial_weight_error[layer] = inputs.transpose().dot(partial_layer_error[layer])
partial_bias_error[layer] = partial_layer_error[layer].reshape(1,self.size_vector[layer+1])
else:
partial_layer_error[layer] = self.weights[layer+1].transpose().dot(partial_layer_error[layer+1])*self.activation_prime(self.z[layer])
partial_weight_error[layer] = self.a[layer-1].transpose().dot(partial_layer_error[layer])
partial_bias_error[layer] = partial_layer_error[layer].reshape(1,self.size_vector[layer+1])
for i in range(len(self.size_vector)-1):
self.weights[i] -= lr*partial_weight_error[i]
self.b[i] -= lr*partial_bias_error[i]
print(self.a[len(self.size_vector)-2])
print(expected)
The full code if anyone would like to view it is:
import numpy as np
import random
import pandas as pd
import scipy
class Feedforward:
def __init__(self, size_vector):
self.nLayers = len(size_vector)
self.size_vector = size_vector
weight_matrix = lambda x: np.random.random((x[0],x[1]))
self.weights = {}
self.b = {}
self.z = {}
self.a = {}
for i in range(0,self.nLayers,1):
z = size_vector[i:i+2]
try:
self.b[i] = np.random.random((1,size_vector[i+1]))
except IndexError:
pass
if len(z) == 2:
self.weights[i] = weight_matrix(z)
def activation(self, matrix):
#print(matrix)
matrix = np.clip( matrix, -300, 300 )
return (np.exp(2*matrix)-1)/(np.exp(2*matrix)+1)
def activation_prime(self, matrix):
return 1/(1-np.square(matrix))
def f_pass(self, data):
for layer in range(self.nLayers-1):
if layer == 0:
self.z[layer] = data.dot(self.weights[0])+self.b[layer].reshape((1,self.size_vector[layer+1]))
self.a[layer] = self.activation(self.z[layer]).reshape((1,self.size_vector[layer+1]))
else:
self.z[layer] = self.a[layer-1].dot(self.weights[layer])+self.b[layer]
self.a[layer] = self.activation(self.z[layer]).reshape((1,self.size_vector[layer+1]))
def backporpagate(self, data):
lr = .01
expected, inputs = data
cost = np.square(expected-self.a[self.nLayers-2])
partial_layer_error = {}
partial_weight_error = {}
partial_bias_error = {}
for i in range(self.nLayers-1):
layer = self.nLayers-2-i
if i == 0:
partial_layer_error[layer] = 2*(expected-self.a[self.nLayers-2])*self.activation_prime(self.z[layer]).reshape(1,self.size_vector[layer+1])
partial_weight_error[layer] = self.a[layer-1].transpose().dot(partial_layer_error[layer])
partial_bias_error[layer] = partial_layer_error[layer].reshape(1,self.size_vector[layer+1])
elif layer == 0:
partial_layer_error[layer] = partial_layer_error[layer+1].dot(self.weights[layer+1].transpose())
partial_weight_error[layer] = inputs.transpose().dot(partial_layer_error[layer])
partial_bias_error[layer] = partial_layer_error[layer].reshape(1,self.size_vector[layer+1])
else:
partial_layer_error[layer] = self.weights[layer+1].transpose().dot(partial_layer_error[layer+1])*self.activation_prime(self.z[layer])
partial_weight_error[layer] = self.a[layer-1].transpose().dot(partial_layer_error[layer])
partial_bias_error[layer] = partial_layer_error[layer].reshape(1,self.size_vector[layer+1])
for i in range(len(self.size_vector)-1):
self.weights[i] -= lr*partial_weight_error[i]
self.b[i] -= lr*partial_bias_error[i]
print(self.a[len(self.size_vector)-2])
print(expected)
def train(self, data):
batch = data.sample(2000)
pairs = [batch.iloc[:,0],batch.iloc[:,1]]
avg_I = np.zeros(9)
avg_O = np.zeros(784)
for index, (label, img) in batch.iterrows():
label = np.array(label)
img = np.array(img)[:,np.newaxis].transpose()/255
self.f_pass(img)
self.backporpagate((label, img))
def prepare_mnist():
print('preparing MNIST: please wait' + '\n')
with open('mnist_test.csv') as f:
data = f.readlines()
mnist = []
print('Reading Data: \n')
for i in data:
key = []
for j in range(9):
if j == int(i[0])-1:
key.append(1)
else:
key.append(0)
value = i[1:]
value = value.replace('\n','')
value = value.split(',')
value.pop(0)
value = [int(x) for x in value]
mnist.append((key,value))
print("Converting to DataFrame \n")
df = pd.DataFrame(mnist)
return df
N = Feedforward([784,50,9])
N.train(prepare_mnist())
If anyone could help me out, I would really appreciate it. This has bothered me for days, and I really want to get to grips with building these things practically.
I have two working stream.
1 - receiving data from com port and sends signal.
def __packet_parser(self, *args):
while self.__thred_is_runing:
data = self.__connection.read_all()
if data != b'':
self.change_data.emit(self.__readline())
self.__callback(self.__readline())
2 - draw graph.
def set_value_by_plot_name(self, value, name='default'):
self.__plots[name].setData(np.append(self.__plots[name].getData()[1][1:], value))
def __test(self, value):
tmp = value.decode('windows-1252')
data = tmp.split('\t')
if len(data) == 10:
self.__gr.set_value_by_plot_name(int(data[1]))
def main(self):
self.__window = Window()
self.__gr = Graphics()
w = self.__gr.get_widget()
self.__window.add_widget(w)
connect = COMPortConnection('COM7', 38400)
connect.change_data.connect(self.__test)
connect.open()
self.__window.show()
few seconds everything works fine, and then ceases to be updated.
What problem?
Data is updated, picture is not.
I had a similar problem, i solved it by passing by an intermediate method i'm not sure why:
class GraphWindowController:
# init ...
def start_graph(self):
# ... process
self.view.run_graph(self.graph_nodes)
stream_signals_and_slots = []
my_steam_signal.connect(self.add_value_to_graph)
def add_value_to_graph(self, values):
self.view.update(values)
class GraphWindow(QtGui.QFrame, Ui_GraphWindow):
def __init__(self, parent=None):
"""Initializer."""
super(GraphWindow, self).__init__(parent)
self.setupUi(self)
self.connect_signals()
self.plot_widget = None
self.y_data = []
self.x_data = []
self.curve = []
self.display_range = None
self.velocity = None
self.values_to_display = None
self.count_value_to_display = 0
self.legend = None
self.build_gui()
# Doing stuff ...
def build_gui(self):
self.plot_widget = pg.PlotWidget()
layout = QtGui.QGridLayout()
self.PlotLayout.setLayout(layout)
layout.addWidget(self.plot_widget, 0,1)
def configure_graph(self, display_range, velocity):
self.display_range = display_range
self.velocity = velocity
self.plot_widget.setLabels(left='axis 1')
self.legend = pg.LegendItem((100, 60), offset=(70, 30))
self.legend.setParentItem(self.plot_widget.graphicsItem())
self.plot_widget.showGrid(True, True, 251)
self.plot_widget.showButtons()
self.plot_widget.setRange(rect=None, xRange=[0, self.display_range], yRange=[
1300, -300], padding=0.09, update=True, disableAutoRange=True)
def run_graph(self, values_to_display):
self.values_to_display = values_to_display
self.curve_number = len(self.values_to_display)
for i in range(self.curve_number):
self.y_data.append([])
colors_to_use = self.enumerate_colors(
self.curve_number)
# Blablabla doing stuff ...
def update(self, my_values):
value = my_values
for i in range(self.curve_number):
if len(self.y_data[i]) >= int((self.display_range * 1000) / self.velocity):
del self.y_data[i][0]
else:
my_var = len(self.x_data) * (self.velocity / 1000.0)
self.x_data.append(my_var)
self.y_data[i].append(int(value[i]))
my_data_to_display = list(
zip(self.x_data, self.y_data[i]))
my_array = np.array(my_data_to_display)
self.curve[i].setData(my_array)