Is something wrong with my backpropagation calculation? - python

For the last 3 days, I have been trying to build my first neural network to no avail. I'm asking this question here because I can't think of anywhere else to get quality feedback and I haven't found any solutions by searching so far.
The network takes in 784 inputs (pixels) and has 9 outputs (numbers 0-9), I am trying to train it on the mnist hand written digit dataset.
The problem is that after several iterations, the output explodes to a list of random +1s and -1s eg.
[-1., 1., 1., 1., -1., -1., 1., 1., 1.]
I will include code snippets of the ANNs init(), f_pass() and backpropagate() as I think if there is an error, it will likely be in the code containing the matrices. I can upload more code on request if needed.
class Feedforward:
def __init__(self, size_vector):
self.nLayers = len(size_vector)
self.size_vector = size_vector
weight_matrix = lambda x: np.random.random((x[0],x[1]))
self.weights = {}
self.b = {}
self.z = {}
self.a = {}
for i in range(0,self.nLayers,1):
z = size_vector[i:i+2]
try:
self.b[i] = np.random.random((1,size_vector[i+1]))
except IndexError:
pass
if len(z) == 2:
self.weights[i] = weight_matrix(z)
f_pass():
def f_pass(self, data):
for layer in range(self.nLayers-1):
if layer == 0:
self.z[layer] = data.dot(self.weights[0])+self.b[layer].reshape((1,self.size_vector[layer+1]))
self.a[layer] = self.activation(self.z[layer]).reshape((1,self.size_vector[layer+1]))
else:
self.z[layer] = self.a[layer-1].dot(self.weights[layer])+self.b[layer]
self.a[layer] = self.activation(self.z[layer]).reshape((1,self.size_vector[layer+1]))
Backprop():
def backporpagate(self, data):
lr = .01
expected, inputs = data
cost = np.square(expected-self.a[self.nLayers-2])
partial_layer_error = {}
partial_weight_error = {}
partial_bias_error = {}
for i in range(self.nLayers-1):
layer = self.nLayers-2-i
if i == 0:
partial_layer_error[layer] = 2*(expected-self.a[self.nLayers-2])*self.activation_prime(self.z[layer]).reshape(1,self.size_vector[layer+1])
partial_weight_error[layer] = self.a[layer-1].transpose().dot(partial_layer_error[layer])
partial_bias_error[layer] = partial_layer_error[layer].reshape(1,self.size_vector[layer+1])
elif layer == 0:
partial_layer_error[layer] = partial_layer_error[layer+1].dot(self.weights[layer+1].transpose())
partial_weight_error[layer] = inputs.transpose().dot(partial_layer_error[layer])
partial_bias_error[layer] = partial_layer_error[layer].reshape(1,self.size_vector[layer+1])
else:
partial_layer_error[layer] = self.weights[layer+1].transpose().dot(partial_layer_error[layer+1])*self.activation_prime(self.z[layer])
partial_weight_error[layer] = self.a[layer-1].transpose().dot(partial_layer_error[layer])
partial_bias_error[layer] = partial_layer_error[layer].reshape(1,self.size_vector[layer+1])
for i in range(len(self.size_vector)-1):
self.weights[i] -= lr*partial_weight_error[i]
self.b[i] -= lr*partial_bias_error[i]
print(self.a[len(self.size_vector)-2])
print(expected)
The full code if anyone would like to view it is:
import numpy as np
import random
import pandas as pd
import scipy
class Feedforward:
def __init__(self, size_vector):
self.nLayers = len(size_vector)
self.size_vector = size_vector
weight_matrix = lambda x: np.random.random((x[0],x[1]))
self.weights = {}
self.b = {}
self.z = {}
self.a = {}
for i in range(0,self.nLayers,1):
z = size_vector[i:i+2]
try:
self.b[i] = np.random.random((1,size_vector[i+1]))
except IndexError:
pass
if len(z) == 2:
self.weights[i] = weight_matrix(z)
def activation(self, matrix):
#print(matrix)
matrix = np.clip( matrix, -300, 300 )
return (np.exp(2*matrix)-1)/(np.exp(2*matrix)+1)
def activation_prime(self, matrix):
return 1/(1-np.square(matrix))
def f_pass(self, data):
for layer in range(self.nLayers-1):
if layer == 0:
self.z[layer] = data.dot(self.weights[0])+self.b[layer].reshape((1,self.size_vector[layer+1]))
self.a[layer] = self.activation(self.z[layer]).reshape((1,self.size_vector[layer+1]))
else:
self.z[layer] = self.a[layer-1].dot(self.weights[layer])+self.b[layer]
self.a[layer] = self.activation(self.z[layer]).reshape((1,self.size_vector[layer+1]))
def backporpagate(self, data):
lr = .01
expected, inputs = data
cost = np.square(expected-self.a[self.nLayers-2])
partial_layer_error = {}
partial_weight_error = {}
partial_bias_error = {}
for i in range(self.nLayers-1):
layer = self.nLayers-2-i
if i == 0:
partial_layer_error[layer] = 2*(expected-self.a[self.nLayers-2])*self.activation_prime(self.z[layer]).reshape(1,self.size_vector[layer+1])
partial_weight_error[layer] = self.a[layer-1].transpose().dot(partial_layer_error[layer])
partial_bias_error[layer] = partial_layer_error[layer].reshape(1,self.size_vector[layer+1])
elif layer == 0:
partial_layer_error[layer] = partial_layer_error[layer+1].dot(self.weights[layer+1].transpose())
partial_weight_error[layer] = inputs.transpose().dot(partial_layer_error[layer])
partial_bias_error[layer] = partial_layer_error[layer].reshape(1,self.size_vector[layer+1])
else:
partial_layer_error[layer] = self.weights[layer+1].transpose().dot(partial_layer_error[layer+1])*self.activation_prime(self.z[layer])
partial_weight_error[layer] = self.a[layer-1].transpose().dot(partial_layer_error[layer])
partial_bias_error[layer] = partial_layer_error[layer].reshape(1,self.size_vector[layer+1])
for i in range(len(self.size_vector)-1):
self.weights[i] -= lr*partial_weight_error[i]
self.b[i] -= lr*partial_bias_error[i]
print(self.a[len(self.size_vector)-2])
print(expected)
def train(self, data):
batch = data.sample(2000)
pairs = [batch.iloc[:,0],batch.iloc[:,1]]
avg_I = np.zeros(9)
avg_O = np.zeros(784)
for index, (label, img) in batch.iterrows():
label = np.array(label)
img = np.array(img)[:,np.newaxis].transpose()/255
self.f_pass(img)
self.backporpagate((label, img))
def prepare_mnist():
print('preparing MNIST: please wait' + '\n')
with open('mnist_test.csv') as f:
data = f.readlines()
mnist = []
print('Reading Data: \n')
for i in data:
key = []
for j in range(9):
if j == int(i[0])-1:
key.append(1)
else:
key.append(0)
value = i[1:]
value = value.replace('\n','')
value = value.split(',')
value.pop(0)
value = [int(x) for x in value]
mnist.append((key,value))
print("Converting to DataFrame \n")
df = pd.DataFrame(mnist)
return df
N = Feedforward([784,50,9])
N.train(prepare_mnist())
If anyone could help me out, I would really appreciate it. This has bothered me for days, and I really want to get to grips with building these things practically.

Related

Converting custom Optimizer implemented in Pytorch to Tensorflow

I am not good with PyTorch so I would appreciate some help in converting this code to TensorFlow. I have trying going through some articles but it was a bit intensive so a little explanation would also be worthwhile so that the whole community can benefit from this.
"""
import torch
import copy
class PESG(torch.optim.Optimizer):
def __init__(self, model, a=None, b=None, alpha=None, imratio=0.1, margin=1.0, lr=0.1, gamma=500, clip_value=1.0, weight_decay=1e-5, **kwargs):
assert a is not None, 'Found no variable a!'
assert b is not None, 'Found no variable b!'
assert alpha is not None, 'Found no variable alpha!'
self.p = imratio
self.margin = margin
self.model = model
self.lr = lr
self.gamma = gamma
self.clip_value = clip_value
self.weight_decay = weight_decay
self.a = a
self.b = b
self.alpha = alpha
# TODO!
self.model_ref = []
for var in list(self.model.parameters())+[self.a, self.b]:
self.model_ref.append(torch.empty(var.shape).normal_(mean=0, std=0.01).cuda())
self.model_acc = []
for var in list(self.model.parameters())+[self.a, self.b]:
self.model_acc.append(torch.zeros(var.shape, dtype=torch.float32, device="cuda", requires_grad=False).cuda())
self.T = 0
self.step_counts = 0
def get_parameters(params):
for p in params:
yield p
self.params = get_parameters(list(model.parameters())+[a,b])
self.defaults = dict(lr=self.lr,
margin=margin,
gamma=gamma,
p=imratio,
a=self.a,
b=self.b,
alpha=self.alpha,
clip_value=clip_value,
weight_decay=weight_decay,
model_ref = self.model_ref,
model_acc = self.model_acc
)
super(PESG, self).__init__(self.params, self.defaults)
#property
def optim_steps(self):
return self.step_counts
def update_lr(self, lr):
self.param_groups[0]['lr']=lr
#torch.no_grad()
def step(self):
"""Performs a single optimization step.
"""
for group in self.param_groups:
weight_decay = group['weight_decay']
clip_value = group['clip_value']
self.lr = group['lr']
p = group['p']
gamma = group['gamma']
m = group['margin']
model_ref = group['model_ref']
model_acc = group['model_acc']
a = group['a']
b = group['b']
alpha = group['alpha']
# updates
for i, p in enumerate(group['params']):
if p.grad is None:
continue
p.data = p.data - group['lr']*( torch.clamp(p.grad.data , -clip_value, clip_value) + 1/gamma*(p.data - model_ref[i].data) ) - group['lr']*weight_decay*p.data
model_acc[i].data = model_acc[i].data + p.data
alpha.data = alpha.data + group['lr']*(2*(m + b.data - a.data)-2*alpha.data)
alpha.data = torch.clamp(alpha.data, 0, 999)
self.T += 1
self.step_counts += 1
def zero_grad(self):
self.model.zero_grad()
self.a.grad = None
self.b.grad = None
self.alpha.grad =None
def update_regularizer(self, decay_factor=None):
if decay_factor != None:
self.param_groups[0]['lr'] = self.param_groups[0]['lr']/decay_factor
print ('Reducing learning rate to %.5f # T=%s!'%(self.param_groups[0]['lr'], self.T))
print ('Updating regularizer # T=%s!'%(self.T))
for i, param in enumerate(self.model_ref):
self.model_ref[i].data = self.model_acc[i].data/self.T
for i, param in enumerate(self.model_acc):
self.model_acc[i].data = torch.zeros(param.shape, dtype=torch.float32, device="cuda", requires_grad=False).cuda()
self.T = 0
"""

'tensorflow_federated' has no attribute 'NamedTupleType

I am following this code https://github.com/BUAA-BDA/FedShapley/tree/master/TensorflowFL and trying to run the file same_OR.py
I also place input file "initial_model_parameters.txt" and data folder "MNIST_data" in same folder
from __future__ import absolute_import, division, print_function
import tensorflow_federated as tff
import tensorflow.compat.v1 as tf
import numpy as np
import time
from scipy.special import comb, perm
import os
# tf.compat.v1.enable_v2_behavior()
# tf.compat.v1.enable_eager_execution()
# NUM_EXAMPLES_PER_USER = 1000
BATCH_SIZE = 100
NUM_AGENT = 5
def get_data_for_digit(source, digit):
output_sequence = []
all_samples = [i for i, d in enumerate(source[1]) if d == digit]
for i in range(0, len(all_samples), BATCH_SIZE):
batch_samples = all_samples[i:i + BATCH_SIZE]
output_sequence.append({
'x': np.array([source[0][i].flatten() / 255.0 for i in batch_samples],
dtype=np.float32),
'y': np.array([source[1][i] for i in batch_samples], dtype=np.int32)})
return output_sequence
def get_data_for_digit_test(source, digit):
output_sequence = []
all_samples = [i for i, d in enumerate(source[1]) if d == digit]
for i in range(0, len(all_samples)):
output_sequence.append({
'x': np.array(source[0][all_samples[i]].flatten() / 255.0,
dtype=np.float32),
'y': np.array(source[1][all_samples[i]], dtype=np.int32)})
return output_sequence
def get_data_for_federated_agents(source, num):
output_sequence = []
Samples = []
for digit in range(0, 10):
samples = [i for i, d in enumerate(source[1]) if d == digit]
samples = samples[0:5421]
Samples.append(samples)
all_samples = []
for sample in Samples:
for sample_index in range(int(num * (len(sample) / NUM_AGENT)), int((num + 1) * (len(sample) / NUM_AGENT))):
all_samples.append(sample[sample_index])
# all_samples = [i for i in range(int(num*(len(source[1])/NUM_AGENT)), int((num+1)*(len(source[1])/NUM_AGENT)))]
for i in range(0, len(all_samples), BATCH_SIZE):
batch_samples = all_samples[i:i + BATCH_SIZE]
output_sequence.append({
'x': np.array([source[0][i].flatten() / 255.0 for i in batch_samples],
dtype=np.float32),
'y': np.array([source[1][i] for i in batch_samples], dtype=np.int32)})
return output_sequence
BATCH_TYPE = tff.NamedTupleType([
('x', tff.TensorType(tf.float32, [None, 784])),
('y', tff.TensorType(tf.int32, [None]))])
MODEL_TYPE = tff.NamedTupleType([
('weights', tff.TensorType(tf.float32, [784, 10])),
('bias', tff.TensorType(tf.float32, [10]))])
#tff.tf_computation(MODEL_TYPE, BATCH_TYPE)
def batch_loss(model, batch):
predicted_y = tf.nn.softmax(tf.matmul(batch.x, model.weights) + model.bias)
return -tf.reduce_mean(tf.reduce_sum(
tf.one_hot(batch.y, 10) * tf.log(predicted_y), axis=[1]))
#tff.tf_computation(MODEL_TYPE, BATCH_TYPE, tf.float32)
def batch_train(initial_model, batch, learning_rate):
# Define a group of model variables and set them to `initial_model`.
model_vars = tff.utils.create_variables('v', MODEL_TYPE)
init_model = tff.utils.assign(model_vars, initial_model)
# Perform one step of gradient descent using loss from `batch_loss`.
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
with tf.control_dependencies([init_model]):
train_model = optimizer.minimize(batch_loss(model_vars, batch))
# Return the model vars after performing this gradient descent step.
with tf.control_dependencies([train_model]):
return tff.utils.identity(model_vars)
LOCAL_DATA_TYPE = tff.SequenceType(BATCH_TYPE)
#tff.federated_computation(MODEL_TYPE, tf.float32, LOCAL_DATA_TYPE)
def local_train(initial_model, learning_rate, all_batches):
# Mapping function to apply to each batch.
#tff.federated_computation(MODEL_TYPE, BATCH_TYPE)
def batch_fn(model, batch):
return batch_train(model, batch, learning_rate)
l = tff.sequence_reduce(all_batches, initial_model, batch_fn)
return l
#tff.federated_computation(MODEL_TYPE, LOCAL_DATA_TYPE)
def local_eval(model, all_batches):
#
return tff.sequence_sum(
tff.sequence_map(
tff.federated_computation(lambda b: batch_loss(model, b), BATCH_TYPE),
all_batches))
SERVER_MODEL_TYPE = tff.FederatedType(MODEL_TYPE, tff.SERVER, all_equal=True)
CLIENT_DATA_TYPE = tff.FederatedType(LOCAL_DATA_TYPE, tff.CLIENTS)
#tff.federated_computation(SERVER_MODEL_TYPE, CLIENT_DATA_TYPE)
def federated_eval(model, data):
return tff.federated_mean(
tff.federated_map(local_eval, [tff.federated_broadcast(model), data]))
SERVER_FLOAT_TYPE = tff.FederatedType(tf.float32, tff.SERVER, all_equal=True)
#tff.federated_computation(
SERVER_MODEL_TYPE, SERVER_FLOAT_TYPE, CLIENT_DATA_TYPE)
def federated_train(model, learning_rate, data):
l = tff.federated_map(
local_train,
[tff.federated_broadcast(model),
tff.federated_broadcast(learning_rate),
data])
return l
# return tff.federated_mean()
def readTestImagesFromFile(distr_same):
ret = []
if distr_same:
f = open(os.path.join(os.path.dirname(__file__), "test_images1_.txt"), encoding="utf-8")
else:
f = open(os.path.join(os.path.dirname(__file__), "test_images1_.txt"), encoding="utf-8")
lines = f.readlines()
for line in lines:
tem_ret = []
p = line.replace("[", "").replace("]", "").replace("\n", "").split("\t")
for i in p:
if i != "":
tem_ret.append(float(i))
ret.append(tem_ret)
return np.asarray(ret)
def readTestLabelsFromFile(distr_same):
ret = []
if distr_same:
f = open(os.path.join(os.path.dirname(__file__), "test_labels_.txt"), encoding="utf-8")
else:
f = open(os.path.join(os.path.dirname(__file__), "test_labels_.txt"), encoding="utf-8")
lines = f.readlines()
for line in lines:
tem_ret = []
p = line.replace("[", "").replace("]", "").replace("\n", "").split(" ")
for i in p:
if i!="":
tem_ret.append(float(i))
ret.append(tem_ret)
return np.asarray(ret)
def getParmsAndLearningRate(agent_no):
f = open(os.path.join(os.path.dirname(__file__), "weights_" + str(agent_no) + ".txt"))
content = f.read()
g_ = content.split("***\n--------------------------------------------------")
parm_local = []
learning_rate_list = []
for j in range(len(g_) - 1):
line = g_[j].split("\n")
if j == 0:
weights_line = line[0:784]
learning_rate_list.append(float(line[784].replace("*", "").replace("\n", "")))
else:
weights_line = line[1:785]
learning_rate_list.append(float(line[785].replace("*", "").replace("\n", "")))
valid_weights_line = []
for l in weights_line:
w_list = l.split("\t")
w_list = w_list[0:len(w_list) - 1]
w_list = [float(i) for i in w_list]
valid_weights_line.append(w_list)
parm_local.append(valid_weights_line)
f.close()
f = open(os.path.join(os.path.dirname(__file__), "bias_" + str(agent_no) + ".txt"))
content = f.read()
g_ = content.split("***\n--------------------------------------------------")
bias_local = []
for j in range(len(g_) - 1):
line = g_[j].split("\n")
if j == 0:
weights_line = line[0]
else:
weights_line = line[1]
b_list = weights_line.split("\t")
b_list = b_list[0:len(b_list) - 1]
b_list = [float(i) for i in b_list]
bias_local.append(b_list)
f.close()
ret = {
'weights': np.asarray(parm_local),
'bias': np.asarray(bias_local),
'learning_rate': np.asarray(learning_rate_list)
}
return ret
def train_with_gradient_and_valuation(agent_list, grad, bi, lr, distr_type):
f_ini_p = open(os.path.join(os.path.dirname(__file__), "initial_model_parameters.txt"), "r")
para_lines = f_ini_p.readlines()
w_paras = para_lines[0].split("\t")
w_paras = [float(i) for i in w_paras]
b_paras = para_lines[1].split("\t")
b_paras = [float(i) for i in b_paras]
w_initial_g = np.asarray(w_paras, dtype=np.float32).reshape([784, 10])
b_initial_g = np.asarray(b_paras, dtype=np.float32).reshape([10])
f_ini_p.close()
model_g = {
'weights': w_initial_g,
'bias': b_initial_g
}
for i in range(len(grad[0])):
# i->迭代轮数
gradient_w = np.zeros([784, 10], dtype=np.float32)
gradient_b = np.zeros([10], dtype=np.float32)
for j in agent_list:
gradient_w = np.add(np.multiply(grad[j][i], 1/len(agent_list)), gradient_w)
gradient_b = np.add(np.multiply(bi[j][i], 1/len(agent_list)), gradient_b)
model_g['weights'] = np.subtract(model_g['weights'], np.multiply(lr[0][i], gradient_w))
model_g['bias'] = np.subtract(model_g['bias'], np.multiply(lr[0][i], gradient_b))
test_images = readTestImagesFromFile(False)
test_labels_onehot = readTestLabelsFromFile(False)
m = np.dot(test_images, np.asarray(model_g['weights']))
test_result = m + np.asarray(model_g['bias'])
y = tf.nn.softmax(test_result)
correct_prediction = tf.equal(tf.argmax(y, 1), tf.arg_max(test_labels_onehot, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
return accuracy.numpy()
def remove_list_indexed(removed_ele, original_l, ll):
new_original_l = []
for i in original_l:
new_original_l.append(i)
for i in new_original_l:
if i == removed_ele:
new_original_l.remove(i)
for i in range(len(ll)):
if set(ll[i]) == set(new_original_l):
return i
return -1
def shapley_list_indexed(original_l, ll):
for i in range(len(ll)):
if set(ll[i]) == set(original_l):
return i
return -1
def PowerSetsBinary(items):
N = len(items)
set_all = []
for i in range(2 ** N):
combo = []
for j in range(N):
if (i >> j) % 2 == 1:
combo.append(items[j])
set_all.append(combo)
return set_all
if __name__ == "__main__":
start_time = time.time()
#data_num = np.asarray([5923,6742,5958,6131,5842])
#agents_weights = np.divide(data_num, data_num.sum())
for index in range(NUM_AGENT):
f = open(os.path.join(os.path.dirname(__file__), "weights_"+str(index)+".txt"), "w")
f.close()
f = open(os.path.join(os.path.dirname(__file__), "bias_" + str(index) + ".txt"), "w")
f.close()
mnist_train, mnist_test = tf.keras.datasets.mnist.load_data()
DISTRIBUTION_TYPE = "SAME"
federated_train_data_divide = None
federated_train_data = None
if DISTRIBUTION_TYPE == "SAME":
federated_train_data_divide = [get_data_for_federated_agents(mnist_train, d) for d in range(NUM_AGENT)]
federated_train_data = federated_train_data_divide
f_ini_p = open(os.path.join(os.path.dirname(__file__), "initial_model_parameters.txt"), "r")
para_lines = f_ini_p.readlines()
w_paras = para_lines[0].split("\t")
w_paras = [float(i) for i in w_paras]
b_paras = para_lines[1].split("\t")
b_paras = [float(i) for i in b_paras]
w_initial = np.asarray(w_paras, dtype=np.float32).reshape([784, 10])
b_initial = np.asarray(b_paras, dtype=np.float32).reshape([10])
f_ini_p.close()
initial_model = {
'weights': w_initial,
'bias': b_initial
}
model = initial_model
learning_rate = 0.1
for round_num in range(50):
local_models = federated_train(model, learning_rate, federated_train_data)
print("learning rate: ", learning_rate)
#print(local_models[0][0])#第0个agent的weights矩阵
#print(local_models[0][1])#第0个agent的bias矩阵
#print(len(local_models))
for local_index in range(len(local_models)):
f = open(os.path.join(os.path.dirname(__file__), "weights_"+str(local_index)+".txt"),"a",encoding="utf-8")
for i in local_models[local_index][0]:
line = ""
arr = list(i)
for j in arr:
line += (str(j)+"\t")
print(line, file=f)
print("***"+str(learning_rate)+"***",file=f)
print("-"*50,file=f)
f.close()
f = open(os.path.join(os.path.dirname(__file__), "bias_" + str(local_index) + ".txt"), "a", encoding="utf-8")
line = ""
for i in local_models[local_index][1]:
line += (str(i) + "\t")
print(line, file=f)
print("***" + str(learning_rate) + "***",file=f)
print("-"*50,file=f)
f.close()
m_w = np.zeros([784, 10], dtype=np.float32)
m_b = np.zeros([10], dtype=np.float32)
for local_model_index in range(len(local_models)):
m_w = np.add(np.multiply(local_models[local_model_index][0], 1/NUM_AGENT), m_w)
m_b = np.add(np.multiply(local_models[local_model_index][1], 1/NUM_AGENT), m_b)
model = {
'weights': m_w,
'bias': m_b
}
learning_rate = learning_rate * 0.9
loss = federated_eval(model, federated_train_data)
print('round {}, loss={}'.format(round_num, loss))
print(time.time()-start_time)
gradient_weights = []
gradient_biases = []
gradient_lrs = []
for ij in range(NUM_AGENT):
model_ = getParmsAndLearningRate(ij)
gradient_weights_local = []
gradient_biases_local = []
learning_rate_local = []
for i in range(len(model_['learning_rate'])):
if i == 0:
gradient_weight = np.divide(np.subtract(initial_model['weights'], model_['weights'][i]),
model_['learning_rate'][i])
gradient_bias = np.divide(np.subtract(initial_model['bias'], model_['bias'][i]),
model_['learning_rate'][i])
else:
gradient_weight = np.divide(np.subtract(model_['weights'][i - 1], model_['weights'][i]),
model_['learning_rate'][i])
gradient_bias = np.divide(np.subtract(model_['bias'][i - 1], model_['bias'][i]),
model_['learning_rate'][i])
gradient_weights_local.append(gradient_weight)
gradient_biases_local.append(gradient_bias)
learning_rate_local.append(model_['learning_rate'][i])
gradient_weights.append(gradient_weights_local)
gradient_biases.append(gradient_biases_local)
gradient_lrs.append(learning_rate_local)
all_sets = PowerSetsBinary([i for i in range(NUM_AGENT)])
group_shapley_value = []
for s in all_sets:
group_shapley_value.append(
train_with_gradient_and_valuation(s, gradient_weights, gradient_biases, gradient_lrs, DISTRIBUTION_TYPE))
print(str(s)+"\t"+str(group_shapley_value[len(group_shapley_value)-1]))
agent_shapley = []
for index in range(NUM_AGENT):
shapley = 0.0
for j in all_sets:
if index in j:
remove_list_index = remove_list_indexed(index, j, all_sets)
if remove_list_index != -1:
shapley += (group_shapley_value[shapley_list_indexed(j, all_sets)] - group_shapley_value[
remove_list_index]) / (comb(NUM_AGENT - 1, len(all_sets[remove_list_index])))
agent_shapley.append(shapley)
for ag_s in agent_shapley:
print(ag_s)
print("end_time", time.time()-start_time)
I installed tensor flow federated with this command
pip install --upgrade tensorflow_federated
and this line is also underlied with red color
import tensorflow.compat.v1 as tf
when i tried to execute go this error
File "same_OR.py", line 94, in
BATCH_TYPE = tff.NamedTupleType([ AttributeError: module 'tensorflow_federated' has no attribute 'NamedTupleType'
where is the problem? anyone can help?
tff.NamedTupleType was renamed to tff.StructType in TFF version 0.16.0 (release notes).
Two options:
Install a pre-0.16.0 version of TFF: this should be doable with pip install tensorflow_federated=0.15.0.
Update the code: the error should go away after replacing the tff.NamedTupleType with tff.StructType in the snippet:
BATCH_TYPE = tff.NamedTupleType([
('x', tff.TensorType(tf.float32, [None, 784])),
('y', tff.TensorType(tf.int32, [None]))])
MODEL_TYPE = tff.NamedTupleType([
('weights', tff.TensorType(tf.float32, [784, 10])),
('bias', tff.TensorType(tf.float32, [10]))])

My Neural Network algorithm is not working mnist numbers

I could use a second set of eyes on my neural network.
This is the mnist number recognition project.
I'm not sure where the issue is.
I previously implemented the ai with tensor flow successfully.
I'm not looking to use an api as a solution.
I would appreciate any help anyone can give.
Here's the project on github, it's only an init file and then the neural_network.
https://github.com/nealchawn/ai_trial_2
class NeuralNetwork(object):
def __init__(self, sizes):
self.activations = []
self.outputs = []
self.weights = []
self.biases = []
self.sizes = sizes
self.set_random_weights()
self.set_random_biases()
def set_random_weights(self):
for layer_index, layer_size in enumerate(self.sizes[1:], start=1):
layer_weights = []
for size in range(layer_size):
for size in range(self.sizes[layer_index-1]):
layer_weights.append(random.uniform(-5.0, 5.0))
self.weights.append(layer_weights)
def set_random_biases(self):
total_biases = 0
# add extra zero bias to help future indexing
#self.biases.append(0)
for index, size in enumerate(self.sizes[0:-1], start=1):
total_biases += 1
for x in range(total_biases):
self.biases.append(random.uniform(-5.0, 5.0))
def train_network(self, training_data, training_labels):
if len(training_data) != len(training_labels):
print("Error data and labels must be the same length")
data = list(zip(training_data, training_labels))
self.sgd(data)
def sgd(self, data, mini_batch_size = 1000):
# first we'll create batches of training data
n = len(data)
data_batches = [
data[k:k + mini_batch_size]
for k in range(0, n, mini_batch_size)
]
print(len(data_batches))
i = 0
for mini_batch in data_batches:
print("Batch: " + str(i))
i += 1
self.update_mini_batch(mini_batch)
self.network_outputs()
print("Finished All training data!")
def update_mini_batch(self, mini_data_batch):
weight_gradients = []
bias_gradients = []
i = 0
for training_input in mini_data_batch:
training_object, training_label = training_input
self.feedforward(training_object)
weights_gradient, bias_gradient = self.backpropogation(training_label)
weight_gradients.append(weights_gradient)
bias_gradients.append(bias_gradient)
# average gradients
weights_gradient = np.average(weight_gradients,axis=0)
biases_gradient = np.average(bias_gradients, axis=0)
# may need to convert to list
weights_gradient_list = []
for weight_gradient in weights_gradient:
weights_gradient_list.append(weight_gradient.tolist())
#weights_gradient = weights_gradient.tolist()
biases_gradient = biases_gradient.tolist()
for x in range(len(self.biases)):
self.biases[x] -= 0.1*biases_gradient[x]
weight_gradient_index = 0
for layer_index, layer_weights in enumerate(self.weights, start=0):
for weight_index, weight in enumerate(layer_weights):
self.weights[layer_index][weight_index] = weight - 0.1*weights_gradient_list[layer_index][weight_index]
weight_gradient_index += 1
def feedforward(self, training_object):
# set inputs
self.outputs = []
self.activations = []
temp_activations = []
for index in range(self.sizes[0]):
temp_activations.append(training_object[index])
self.activations.append(temp_activations)
for layer_index, layer_size in enumerate(self.sizes[1:], start=0):
layer_weights = self.weights[layer_index]
layer_inputs = self.activations[layer_index]
weight_index = 0
layer_outputs = []
layer_activations = []
for node_index in range(layer_size):
node_weights = []
# get node weights
#print(f"layer size: {layer_size}, previous_layer_size: {self.sizes[layer_index]}, layer weights: {len(layer_weights)}")
for x in range(self.sizes[layer_index]):
node_weights.append(layer_weights[weight_index])
weight_index += 1
output = 0
for indx in range(len(node_weights)):
output += layer_inputs[indx]*node_weights[indx]
output = output + self.biases[layer_index]
layer_outputs.append(output)
layer_activations.append(self.sigmoid(output))
self.outputs.append(layer_outputs)
self.activations.append(layer_activations)
def backpropogation(self, training_label):
costs = []
output_layer_activations = self.activations[-1]
output_layer_outputs = self.outputs[-1]
correct_labels = self.translate_label_to_array(training_label)
costs.append(self.compute_cost_derivative(correct_labels, output_layer_activations))
for cost_index, cost in enumerate(costs[0]):
costs[0][cost_index] = cost*self.sigmoid_prime(output_layer_outputs[cost_index])
# calculate costs for layers
for layer_index, layer_size in enumerate(self.sizes[::-1][1:-1], start=1):
layer_costs = []
layer_weights = self.weights[-layer_index]
layer_outputs = self.outputs[-(layer_index+1)]
previous_layer_costs = costs[layer_index-1]
next_layer_size = self.sizes[::-1][1:][layer_index]
layer_weights_formatted = []
for x in range(layer_size):
layer_weights_formatted.append([])
for weight_index, weight in enumerate(layer_weights, start=0):
#print(f"weight index:{weight_index % next_layer_size} layer_index: {weight_index}")
layer_weights_formatted[weight_index%layer_size].append(layer_weights[weight_index])
#print(f"next_layer_size:{layer_size} costs: {len(previous_layer_costs)}, layer_weights_formatted: {layer_weights_formatted}")
for x in range(layer_size):
node_cost = 0
for y, cost in enumerate(previous_layer_costs,start=0):
node_cost += layer_weights_formatted[x][y]*cost
layer_costs.append(node_cost)
# layer_costs same order as next layer's activations
for cost_index, cost in enumerate(layer_costs):
layer_costs[cost_index] = cost * self.sigmoid_prime(layer_outputs[cost_index])
costs.append(layer_costs)
# calculate weight errors
weight_errors = []
bias_errors = []
for layer_index, layer_costs in enumerate(costs[::-1]):
layer_activations = self.activations[layer_index]
layer_weight_errors = []
for cost_index, cost in enumerate(layer_costs,start=0):
for activation in layer_activations:
layer_weight_errors.append(activation * cost)
weight_errors.append(np.array(layer_weight_errors))
bias_errors.append(sum(layer_costs))
return weight_errors, bias_errors
# conversion tool
def translate_label_to_array(self, y):
translated_label = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
translated_label[y] = 1
return np.array(translated_label)
# output tools
def network_outputs(self):
print("Output layer: ")
for x in range(self.sizes[-1]):
print("node " + str(x) + ": " + str(self.activations[-1][x]))
def total_activations(self):
print(len(self.activations))
def compute_cost_derivative(self, y, output_activations):
"""Return the vector of partial derivatives \partial C_x /
\partial a for the output activations."""
return (output_activations - y)
def sigmoid(self, z):
""""The sigmoid function."""
return (1.0 / (1.0 + np.exp(-z)))
def sigmoid_prime(self, z):
return (self.sigmoid(z) * (1 - self.sigmoid(z)))

Convolution preprocess consuming more RAM Python

I am trying to implement this CNN model for stock prize prediction: https://github.com/ZezhouLi/Convolutional-Networks-for-Stock-Predicting
But I am facing the issue while preprocessing the data for the implementation. The preprocess step is consuming a lot of RAM. (My system has 32 GB RAM and 256 GB SSD Hard disk)
Here is the file that is consuming the RAM and at last gives a Memory Error:
import numpy as np
import matplotlib.pyplot as plt
import glob
import math
from PIL import Image
import statsmodels.api as sm
def r_squared(y_true, y_hat):
ssr = 0
sst = 0
e = np.subtract(y_true, y_hat)
y_mean = np.mean(y_true)
for item in e:
ssr += item**2
for item in y_true:
sst += (item - y_mean)**2
r2 = 1 - ssr / sst
return r2
def data_process(data):
processed_data = []
for item in data:
m = np.mean(item)
s = np.std(item)
normal_item = [(float(i)-m)/s for i in item]
normal_item.insert(0, 1)
processed_data.append(normal_item)
return processed_data
def get_pixel_values():
file_name = r'\figures'
pixels = []
for filename in glob.glob(file_name + '\*.png'):
im = Image.open(filename)
temp_pixels = list(im.getdata())
pixels.append(temp_pixels)
return pixels
def find_returns(data):
returns = []
for group in data:
count = 30
while count <= (len(group)-5):
current_data = group[count-1]
future_data = group[count+4]
p1 = np.mean(current_data)
p2 = np.mean(future_data)
returns.append(math.log(p2/p1))
count += 1
return returns
def convert_image():
size = 54, 32
file_name = r'\figures'
for filename in glob.glob(file_name + '\*.png'):
img = Image.open(filename)
img.thumbnail(size)
img = img.convert('L')
img.save(filename)
def plot_data(data):
t = np.arange(0, 29, 1)
file_name_number = 0
fig = plt.figure(frameon=False)
for group in data:
count = 30
while count <= (len(group)-5):
high = []
low = []
for item in group[count-30:count]:
high.append(item[0])
low.append(item[1])
file_name = r'\fig_' + str(file_name_number)
ax = plt.Axes(fig, [0., 0., 1., 1.])
ax.set_axis_off()
fig.add_axes(ax)
ax.plot(t, high[0:-1], 'b', t, low[0:-1], 'g')
fig.savefig(r'\figures' + file_name)
fig.clf()
file_name_number += 1
count += 1
print('Created %d files!' % file_name_number)
def extract_useful_data(data):
groups = []
for group in data:
temp_buffer = []
for item in group:
temp = [item[2], item[3]]
temp = [float(i) for i in temp]
temp_buffer.append(temp)
groups.append(temp_buffer)
return groups
def split_data(data):
groups = []
for item in data:
temp_buffer = []
for string in item:
number = string.split(',')
temp_buffer.append(number)
groups.append(temp_buffer)
return groups
def extract_data():
file_name = r'\data.txt'
infile = open(file_name, 'r')
temp_buffer = []
for line in infile:
temp_buffer.append(line.strip('\n'))
temp_buffer = temp_buffer[8:]
i = 0
groups = []
temp = []
for item in temp_buffer:
if i != 390:
temp.append(item)
i += 1
else:
groups.append(temp)
temp = []
i = 0
groups.append(temp)
infile.close()
return groups
def main():
original_data = extract_data()
splitted_data = split_data(original_data)
useful_data = extract_useful_data(splitted_data)
plot_data(useful_data)
convert_image()
returns = np.asarray(find_returns(useful_data))
training_data = np.asarray(get_pixel_values())
training_data = sm.add_constant(training_data, has_constant='add')
results = sm.OLS(returns[0:4340], training_data[0:4340]).fit()
y_in_sample = results.predict(training_data[0:4340])
r2 = r_squared(returns[0:4340], y_in_sample)
print r2
if __name__ == "__main__":
main()
I have got Memory Error, which occurs when the program consumes all of the RAM memory of the system. Please improve on the program.

Training speed on GPU become slower overtime

My model training speed becomes slower over time. Every epoch take longer time to train.
Here is the full source code with my preprocess sentiment tree bank data (put glove.840B.300d.txt into data/glove).
Install some python packages:
pip install meowlogtool
pip install tqdm
Command to run:
python sentiment.py --emblr 0 --rel_dim 0 --tag_dim 0 --optim adagrad --name basic --lr 0.05 --wd 1e-4 --at_hid_dim 0
Model source code for you to read
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable as Var
import utils
import Constants
from model import SentimentModule
from embedding_model import EmbeddingModel
class SimpleGRU(nn.Module):
"""
w[i] : (300, 1)
h[i] : (150, 1)
p[i] : (20, 1)
r[i] : (20, 1)
k[i] : (150, 1)
x[i] : (20 + 150 + 300 + 20 = 490, 1) (490, 1)
Uz, Ur, Uh : (150, 150) => 67500 => (450, 450)
Wz, Wr, Wh : (150, 20 + 150 + 300 + 20) (150, 490)
"""
def __init__(self, cuda, in_dim, hid_dim):
super(SimpleGRU, self).__init__()
self.cudaFlag = cuda
self.Uz = nn.Linear(hid_dim, hid_dim)
self.Ur = nn.Linear(hid_dim, hid_dim)
self.Uh = nn.Linear(hid_dim, hid_dim)
self.Wz = nn.Linear(in_dim, hid_dim)
self.Wr = nn.Linear(in_dim, hid_dim)
self.Wh = nn.Linear(in_dim, hid_dim)
if self.cudaFlag:
self.Uz = self.Uz.cuda()
self.Ur = self.Uz.cuda()
self.Uh = self.Uz.cuda()
self.Wz = self.Wz.cuda()
self.Wr = self.Wr.cuda()
self.Wh = self.Wh.cuda()
def forward(self, x, h_prev):
"""
Simple-GRU(compress_x[v], h[t-1]) :
z[t] := s(Wz *compress_x[t]+ Uz * h[t-1] + bz)
r[t] := s(Wr * compress_x[t] + Ur * h[t-1] + br)
h_temp[t] := g(Wh * compress_x[t] + Uh * h[t-1] + bh)
h[t] := r[t] .* h[t-1] + (1 - z[t]) .* h_temp[t]
return h[t]
:param x: compress_x[t]
:param h_prev: h[t-1]
:return:
"""
z = F.sigmoid(self.Wz(x) + self.Uz(h_prev))
r = F.sigmoid(self.Wr(x) + self.Ur(h_prev))
h_temp = F.tanh(self.Wh(x) + self.Uh(h_prev))
h = r*h_prev + (1-z)*h_temp
return h
class TreeSimpleGRU(nn.Module):
def __init__(self, cuda, word_dim, tag_dim, rel_dim, mem_dim, at_hid_dim, criterion, leaf_h = None):
super(TreeSimpleGRU, self).__init__()
self.cudaFlag = cuda
# self.gru_cell = nn.GRUCell(word_dim + tag_dim, mem_dim)
self.gru_cell = SimpleGRU(self.cudaFlag, word_dim+tag_dim, mem_dim)
self.gru_at = GRU_AT(self.cudaFlag, word_dim + tag_dim + rel_dim + mem_dim, at_hid_dim ,mem_dim)
self.mem_dim = mem_dim
self.in_dim = word_dim
self.tag_dim = tag_dim
self.rel_dim = rel_dim
self.leaf_h = leaf_h # init h for leaf node
if self.leaf_h == None:
self.leaf_h = Var(torch.rand(1, self.mem_dim))
torch.save(self.leaf_h, 'leaf_h.pth')
if self.cudaFlag:
self.leaf_h = self.leaf_h.cuda()
self.criterion = criterion
self.output_module = None
def getParameters(self):
"""
Get flatParameters
note that getParameters and parameters is not equal in this case
getParameters do not get parameters of output module
:return: 1d tensor
"""
params = []
for m in [self.gru_cell, self.gru_at]:
# we do not get param of output module
l = list(m.parameters())
params.extend(l)
one_dim = [p.view(p.numel()) for p in params]
params = F.torch.cat(one_dim)
return params
def set_output_module(self, output_module):
self.output_module = output_module
def forward(self, tree, w_emb, tag_emb, rel_emb, training = False):
loss = Var(torch.zeros(1)) # init zero loss
if self.cudaFlag:
loss = loss.cuda()
for idx in xrange(tree.num_children):
_, child_loss = self.forward(tree.children[idx], w_emb, tag_emb, rel_emb, training)
loss = loss + child_loss
if tree.num_children > 0:
child_rels, child_k = self.get_child_state(tree, rel_emb)
if self.tag_dim > 0:
tree.state = self.node_forward(w_emb[tree.idx - 1], tag_emb[tree.idx -1], child_rels, child_k)
else:
tree.state = self.node_forward(w_emb[tree.idx - 1], None, child_rels, child_k)
elif tree.num_children == 0:
if self.tag_dim > 0:
tree.state = self.leaf_forward(w_emb[tree.idx - 1], tag_emb[tree.idx -1])
else:
tree.state = self.leaf_forward(w_emb[tree.idx - 1], None)
if self.output_module != None:
output = self.output_module.forward(tree.state, training)
tree.output = output
if training and tree.gold_label != None:
target = Var(utils.map_label_to_target_sentiment(tree.gold_label))
if self.cudaFlag:
target = target.cuda()
loss = loss + self.criterion(output, target)
return tree.state, loss
def leaf_forward(self, word_emb, tag_emb):
"""
Forward function for leaf node
:param word_emb: word embedding of current node u
:param tag_emb: tag embedding of current node u
:return: k of current node u
"""
h = self.leaf_h
if self.cudaFlag:
h = h.cuda()
if self.tag_dim > 0:
x = F.torch.cat([word_emb, tag_emb], 1)
else:
x = word_emb
k = self.gru_cell(x, h)
return k
def node_forward(self, word_emb, tag_emb, child_rels, child_k):
"""
Foward function for inner node
:param word_emb: word embedding of current node u
:param tag_emb: tag embedding of current node u
:param child_rels (tensor): rels embedding of child node v
:param child_k (tensor): k of child node v
:return:
"""
n_child = child_k.size(0)
h = Var(torch.zeros(1, self.mem_dim))
if self.cudaFlag:
h = h.cuda()
for i in range(0, n_child):
k = child_k[i]
x_list = [word_emb, k]
if self.rel_dim >0:
rel = child_rels[i]
x_list.append(rel)
if self.tag_dim > 0:
x_list.append(tag_emb)
x = F.torch.cat(x_list, 1)
h = self.gru_at(x, h)
k = h
return k
def get_child_state(self, tree, rels_emb):
"""
Get child rels, get child k
:param tree: tree we need to get child
:param rels_emb (tensor):
:return:
"""
if tree.num_children == 0:
assert False # never get here
else:
child_k = Var(torch.Tensor(tree.num_children, 1, self.mem_dim))
if self.rel_dim>0:
child_rels = Var(torch.Tensor(tree.num_children, 1, self.rel_dim))
else:
child_rels = None
if self.cudaFlag:
child_k = child_k.cuda()
if self.rel_dim > 0:
child_rels = child_rels.cuda()
for idx in xrange(tree.num_children):
child_k[idx] = tree.children[idx].state
if self.rel_dim > 0:
child_rels[idx] = rels_emb[tree.children[idx].idx - 1]
return child_rels, child_k
class AT(nn.Module):
"""
AT(compress_x[v]) := sigmoid(Wa * tanh(Wb * compress_x[v] + bb) + ba)
"""
def __init__(self, cuda, in_dim, hid_dim):
super(AT, self).__init__()
self.cudaFlag = cuda
self.in_dim = in_dim
self.hid_dim = hid_dim
self.Wa = nn.Linear(hid_dim, 1)
self.Wb = nn.Linear(in_dim, hid_dim)
if self.cudaFlag:
self.Wa = self.Wa.cuda()
self.Wb = self.Wb.cuda()
def forward(self, x):
out = F.sigmoid(self.Wa(F.tanh(self.Wb(x))))
return out
class GRU_AT(nn.Module):
def __init__(self, cuda, in_dim, at_hid_dim ,mem_dim):
super(GRU_AT, self).__init__()
self.cudaFlag = cuda
self.in_dim = in_dim
self.mem_dim = mem_dim
self.at_hid_dim = at_hid_dim
if at_hid_dim > 0:
self.at = AT(cuda, in_dim, at_hid_dim)
self.gru_cell = SimpleGRU(self.cudaFlag, in_dim, mem_dim)
if self.cudaFlag:
if at_hid_dim > 0:
self.at = self.at.cuda()
self.gru_cell = self.gru_cell.cuda()
def forward(self, x, h_prev):
"""
:param x:
:param h_prev:
:return: a * m + (1 - a) * h[t-1]
"""
m = self.gru_cell(x, h_prev)
if self.at_hid_dim > 0:
a = self.at.forward(x)
h = torch.mm(a, m) + torch.mm((1-a), h_prev)
else:
h = m
return h
class TreeGRUSentiment(nn.Module):
def __init__(self, cuda, in_dim, tag_dim, rel_dim, mem_dim, at_hid_dim, num_classes, criterion):
super(TreeGRUSentiment, self).__init__()
self.cudaFlag = cuda
self.tree_module = TreeSimpleGRU(cuda, in_dim, tag_dim, rel_dim, mem_dim, at_hid_dim, criterion)
self.output_module = SentimentModule(cuda, mem_dim, num_classes, dropout=True)
self.tree_module.set_output_module(self.output_module)
def get_tree_parameters(self):
return self.tree_module.getParameters()
def forward(self, tree, sent_emb, tag_emb, rel_emb, training = False):
# sent_emb = F.torch.unsqueeze(self.word_embedding.forward(sent_inputs), 1)
# tag_emb = F.torch.unsqueeze(self.tag_emb.forward(tag_inputs), 1)
# rel_emb = F.torch.unsqueeze(self.rel_emb.forward(rel_inputs), 1)
# sent_emb, tag_emb, rel_emb = self.embedding_model(sent_inputs, tag_inputs, rel_inputs)
tree_state, loss = self.tree_module(tree, sent_emb, tag_emb, rel_emb, training)
output = tree.output
return output, loss
Why does neural network learning slow down as the error gets lower?
The reasons for the slowdown are not fully understood, but we have some basic ideas.
For classifiers, most training examples start out as incorrectly classified. Over time, more of them become correctly classified. Early in learning, you might have a nearly 100% error rate, so every example in the minibatch contributes to learning. Late in learning, you might have nearly a 0% error rate, so almost none of the examples in the minibatch contribute to learning. This problem can be resolved to some extent by using hard example mining or importance sampling. Both of these are just techniques for training on more difficult examples more often.
There are other more complicated reasons. One of them is that the condition number of the Hessian tends to worsen a lot as learning progresses, so that the optimal step size becomes smaller and smaller.

Categories

Resources