AttributeError: 'MultiLayerPerceptron' object has no attribute 'InputLayer' - python

The error is saying this is not the case - your InputLayer object does not have an attribute Input but I don't know how to fix it thank you for your helps
from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin
import random
class MultiLayerPerceptron(BaseEstimator, ClassifierMixin):
def __init__(self, params=None):
if (params == None):
self.inputLayer = 14 # Input Layer
self.hiddenLayer = 100 # Hidden Layer
self.outputLayer = 2 # Outpuy Layer
self.learningRate = 0.005 # Learning rate
self.max_epochs = 600 # Epochs
self.iasHiddenValue = -1 # Bias HiddenLayer
self.BiasOutputValue = -1 # Bias OutputLayer
self.activation = self.ativacao['sigmoid'] # Activation function
self.deriv = self.derivada['sigmoid']
else:
self.inputLayer = params['InputLayer']
self.hiddenLayer = params['HiddenLayer']
self.OutputLayer = params['OutputLayer']
self.learningRate = params['LearningRate']
self.max_epochs = params['Epocas']
self.BiasHiddenValue = params['BiasHiddenValue']
self.BiasOutputValue = params['BiasOutputValue']
self.activation = self.ativacao[params['ActivationFunction']]
self.deriv = self.derivada[params['ActivationFunction']]
'Starting Bias and Weights'
self.WEIGHT_hidden = self.starting_weights(self.hiddenLayer, self.inputLayer)
self.WEIGHT_output = self.starting_weights(self.OutputLayer, self.hiddenLayer)
self.BIAS_hidden = np.array([self.BiasHiddenValue for i in range(self.hiddenLayer)])
self.BIAS_output = np.array([self.BiasOutputValue for i in range(self.OutputLayer)])
self.classes_number = 2
pass
def starting_weights(self, x, y):
return [[2 * random.random() - 1 for i in range(x)] for j in range(y)]
ativacao = {
'sigmoid': (lambda x: 1/(1 + np.exp(-x))),
'tanh': (lambda x: np.tanh(x)),
'Relu': (lambda x: x*(x > 0)),
}
derivada = {
'sigmoid': (lambda x: x*(1-x)),
'tanh': (lambda x: 1-x**2),
'Relu': (lambda x: 1 * (x>0))
}
def Backpropagation_Algorithm(self, x):
DELTA_output = []
'Stage 1 - Error: OutputLayer'
ERROR_output = self.output - self.OUTPUT_L2
DELTA_output = ((-1)*(ERROR_output) * self.deriv(self.OUTPUT_L2))
arrayStore = []
'Stage 2 - Update weights OutputLayer and HiddenLayer'
for i in range(self.hiddenLayer):
for j in range(self.OutputLayer):
self.WEIGHT_output[i][j] -= (self.learningRate * (DELTA_output[j] * self.OUTPUT_L1[i]))
self.BIAS_output[j] -= (self.learningRate * DELTA_output[j])
'Stage 3 - Error: HiddenLayer'
delta_hidden = np.matmul(self.WEIGHT_output, DELTA_output)* self.deriv(self.OUTPUT_L1)
'Stage 4 - Update weights HiddenLayer and InputLayer(x)'
for i in range(self.OutputLayer):
for j in range(self.hiddenLayer):
self.WEIGHT_hidden[i][j] -= (self.learningRate * (delta_hidden[j] * x[i]))
self.BIAS_hidden[j] -= (self.learningRate * delta_hidden[j])
def show_err_graphic(self,v_erro,v_epoca):
plt.figure(figsize=(4,14))
plt.plot(v_epoca, v_erro, "m-",color="b", marker=11)
plt.xlabel("Number of Epochs")
plt.ylabel("Squared error (MSE) ");
plt.title("Error Minimization")
plt.show()
def predict(self, X, y):
'Returns the predictions for every element of X'
my_predictions = []
'Forward Propagation'
forward = np.matmul(X,self.WEIGHT_hidden) + self.BIAS_hidden
forward = np.matmul(forward, self.WEIGHT_output) + self.BIAS_output
for i in forward:
my_predictions.append(max(enumerate(i), key=lambda x:x[1])[0])
array_score = []
for i in range(len(my_predictions)):
if my_predictions[i] == 0:
array_score.append([i, 'No', my_predictions[i], y[i]])
elif my_predictions[i] == 1:
array_score.append([i, 'Yes', my_predictions[i], y[i]])
dataframe = pd.DataFrame(array_score, columns=['_id', 'class', 'output', 'hoped_output'])
return my_predictions, dataframe
def fit(self, X, y):
count_epoch = 1
total_error = 0
n = len(X);
epoch_array = []
error_array = []
W0 = []
W1 = []
while(count_epoch <= self.max_epochs):
for idx,inputs in enumerate(X):
self.output = np.zeros(self.classes_number)
'Stage 1 - (Forward Propagation)'
self.OUTPUT_L1 = self.activation((np.dot(self.InputLayer, self.WEIGHT_hidden) + self.BIAS_hidden.T))
self.OUTPUT_L2 = self.activation((np.dot(self.OUTPUT_L1, self.WEIGHT_output) + self.BIAS_output.T))
'Stage 2 - One-Hot-Encoding'
if(y[idx] == 0):
self.output = np.array([1,0,0]) #Class1 {1,0,0}
elif(y[idx] == 1):
self.output = np.array([0,1,0]) #Class2 {0,1,0}
square_error = 0
for i in range(self.OutputLayer):
erro = (self.output[i] - self.OUTPUT_L2[i])**2
square_error = (square_error + (0.05 * erro))
total_error = total_error + square_error
'Backpropagation : Update Weights'
self.Backpropagation_Algorithm(inputs)
total_error = (total_error / n)
if((count_epoch % 50 == 0)or(count_epoch == 1)):
print("Epoch ", count_epoch, "- Total Error: ",total_error)
error_array.append(total_error)
epoch_array.append(count_epoch)
W0.append(self.WEIGHT_hidden)
W1.append(self.WEIGHT_output)
count_epoch += 1
self.show_err_graphic(error_array,epoch_array)
plt.plot(W0[0])
plt.title('Weight Hidden update during training')
plt.legend(['neuron1', 'neuron2', 'neuron3', 'neuron4', 'neuron5'])
plt.ylabel('Value Weight')
plt.show()
plt.plot(W1[0])
plt.title('Weight Output update during training')
plt.legend(['neuron1', 'neuron2', 'neuron3'])
plt.ylabel('Value Weight')
plt.show()
return self
dictionary = {'InputLayer':14, 'HiddenLayer':100, 'OutputLayer':2,
'Epocas':700, 'LearningRate':0.005,'BiasHiddenValue':-1,
'BiasOutputValue':-1, 'ActivationFunction':'sigmoid'}
Perceptron = MultiLayerPerceptron(dictionary)
Perceptron.fit(train_X,train_y)
AttributeError: 'MultiLayerPerceptron' object has no attribute 'InputLayer'
I want after splitting the data to work on MLP Classifier but I found this error "AttributeError: 'MultiLayerPerceptron' object has no attribute 'InputLayer'

Related

Loss graph very strange PyTorch GAN

I am quite a young coder who has recently gotten into AI and image generation. I've been trying to make a text-to-image GAN but it isn't proving easy. For this one I am using BERT for the text encoder and PyTorch for the neural network: here are the crucial parts of the code, if you need any other details, don't hesitate to ask.
def tokenizeText(text, vectorizer):
vectorized = []
if len(text[0]) == 1:
tk = vectorizer.tokenize(text)
tk = vectorizer.convert_tokens_to_ids(tk)
tk = torch.LongTensor(tk)
vectorized.append(tk)
elif len(text) > 1:
for i in text:
tk = vectorizer.tokenize(i)
tk = vectorizer.convert_tokens_to_ids(tk)
tk = torch.LongTensor(tk)
vectorized.append(tk)
return vectorized
def prepareText1(textList):
tk = tokenizeText(textList, vectorizer)
if len(tk) > 1:
tknew = []
for i1 in tk:
size1 = len(i1)
size2 = 60 - size1
zeros = []
for _ in range(size2):
zeros.append(0)
zeros = torch.LongTensor(zeros)
zeros = zeros.view(1, zeros.size(0))
i1 = i1.view(1, len(i1))
tk = torch.cat([i1, zeros], dim=1)
tknew.append(tk)
return tknew
class Discriminator(nn.Module):
def __init__(self):
super(Discriminator, self).__init__()
self.inD = np.prod(imgShp) + embedDim
self.outD = 1
self.Embedlyr = nn.EmbeddingBag(30000, embedDim)
self.mainNet = nn.Sequential(
nn.Linear(self.inD, baseNeurons * 4),
nn.LeakyReLU(alpha, True),
nn.Dropout(dropout),
nn.Linear(baseNeurons * 4, baseNeurons * 2),
nn.LeakyReLU(alpha, True),
nn.Dropout(dropout),
nn.Linear(baseNeurons * 2, baseNeurons),
nn.LeakyReLU(alpha, True),
nn.Dropout(dropout),
nn.Linear(baseNeurons, self.outD),
nn.Sigmoid()
)
def forward(self, x, y):
y = prepareText1(y)
y = torch.stack(y).squeeze(1)
y = self.Embedlyr(y)
x = x.view(x.size(0), np.prod(imgShp))
x = torch.cat([x, y], dim=1)
x = self.mainNet(x)
return x.squeeze(0)
netD = Discriminator().to(device)
optD = optim.Adam(netD.parameters(), lr=lr)
class Generator(nn.Module):
def __init__(self):
super(Generator, self).__init__()
self.inD = nZDim + embedDim
self.outD = np.prod(imgShp)
self.Embedlyr = nn.EmbeddingBag(30000, embedDim)
self.mainNet = nn.Sequential(
nn.Linear(self.inD, baseNeurons),
nn.LeakyReLU(alpha, True),
nn.Linear(baseNeurons, baseNeurons * 2),
nn.LeakyReLU(alpha, True),
nn.Linear(baseNeurons * 2, baseNeurons * 4),
nn.LeakyReLU(alpha, True),
nn.Linear(baseNeurons * 4, self.outD),
nn.Tanh(),
)
def forward(self, x, y):
y = prepareText1(y)
y = torch.stack(y).squeeze(1)
y = self.Embedlyr(y)
x = x.view(x.size(0), nZDim)
x = torch.cat([x, y], dim=1)
x = self.mainNet(x)
return x.view(x.size(0), cC, imgSz, imgSz)
netG = Generator().to(device)
optG = optim.Adam(netG.parameters(), lr=lr)
lossFn = nn.BCEWithLogitsLoss()
writer = SummaryWriter()
def trainStepD(dataInp):
optD.zero_grad()
imgs = dataInp[0]
lbls = dataInp[1]
batchSz1 = imgs.size(0)
logitLblR = Variable(torch.ones((batchSz, 1))).to(device)
logitLblF = Variable(torch.zeros((batchSz, 1))).to(device)
output = netD(imgs, lbls)
lossR = lossFn(output, logitLblR)
nZ = Variable(torch.randn(batchSz1, nZDim)).to(device)
output = netG(nZ, lbls)
output = netD(output, lbls)
lossF = lossFn(output, logitLblF)
loss = lossR + lossF
loss.backward()
optD.step()
return loss.data.item()
def trainStepG(dataInp):
optG.zero_grad()
imgs = dataInp[0]
lbls = dataInp[1]
batchSz1 = imgs.size(0)
logitLblR = Variable(torch.ones((batchSz, 1))).to(device)
nZ = Variable(torch.randn(batchSz1, nZDim)).to(device)
output = netG(nZ, lbls)
output = netD(output, lbls)
loss = lossFn(output, logitLblR)
loss.backward()
optG.step()
return loss.data.item()
epochs = 50
displayStep = 5
criticNo = 5
for epoch in range(epochs):
print('Initializing Epoch [{}/{}]...'.format((epoch + 1), epochs), end=' ')
for batchNo, dataInp in enumerate(dLoader):
step = epoch * len(dLoader) + batchNo + 1
batchSz1 = dataInp[0].size(0)
netG.train()
for _ in range(criticNo):
DLoss = trainStepD(dataInp)
GLoss = trainStepG(dataInp)
writer.add_scalars('scalars', {'GLoss': GLoss, 'DLoss': (DLoss / criticNo)}, step)
if step % displayStep == 0:
netG.eval()
z = Variable(torch.randn(9, nZDim)).to(device)
labels = []
for i in range(9):
labels.append(dataInp[1][i])
sample_images = netG(z, labels)
grid = torchvision.utils.make_grid(sample_images, nrow=3, normalize=True)
writer.add_image("Batch Results", grid, step)
print('Done!')
These are the results I got from this:
Loss graph, Green - Discriminator Loss, Pink - Generator Loss:
My dataset is just a small folder of blue and red images labeled blue or red.
Dataset:
Please let me know if anyone can help.
Thanks

My Neural Network algorithm is not working mnist numbers

I could use a second set of eyes on my neural network.
This is the mnist number recognition project.
I'm not sure where the issue is.
I previously implemented the ai with tensor flow successfully.
I'm not looking to use an api as a solution.
I would appreciate any help anyone can give.
Here's the project on github, it's only an init file and then the neural_network.
https://github.com/nealchawn/ai_trial_2
class NeuralNetwork(object):
def __init__(self, sizes):
self.activations = []
self.outputs = []
self.weights = []
self.biases = []
self.sizes = sizes
self.set_random_weights()
self.set_random_biases()
def set_random_weights(self):
for layer_index, layer_size in enumerate(self.sizes[1:], start=1):
layer_weights = []
for size in range(layer_size):
for size in range(self.sizes[layer_index-1]):
layer_weights.append(random.uniform(-5.0, 5.0))
self.weights.append(layer_weights)
def set_random_biases(self):
total_biases = 0
# add extra zero bias to help future indexing
#self.biases.append(0)
for index, size in enumerate(self.sizes[0:-1], start=1):
total_biases += 1
for x in range(total_biases):
self.biases.append(random.uniform(-5.0, 5.0))
def train_network(self, training_data, training_labels):
if len(training_data) != len(training_labels):
print("Error data and labels must be the same length")
data = list(zip(training_data, training_labels))
self.sgd(data)
def sgd(self, data, mini_batch_size = 1000):
# first we'll create batches of training data
n = len(data)
data_batches = [
data[k:k + mini_batch_size]
for k in range(0, n, mini_batch_size)
]
print(len(data_batches))
i = 0
for mini_batch in data_batches:
print("Batch: " + str(i))
i += 1
self.update_mini_batch(mini_batch)
self.network_outputs()
print("Finished All training data!")
def update_mini_batch(self, mini_data_batch):
weight_gradients = []
bias_gradients = []
i = 0
for training_input in mini_data_batch:
training_object, training_label = training_input
self.feedforward(training_object)
weights_gradient, bias_gradient = self.backpropogation(training_label)
weight_gradients.append(weights_gradient)
bias_gradients.append(bias_gradient)
# average gradients
weights_gradient = np.average(weight_gradients,axis=0)
biases_gradient = np.average(bias_gradients, axis=0)
# may need to convert to list
weights_gradient_list = []
for weight_gradient in weights_gradient:
weights_gradient_list.append(weight_gradient.tolist())
#weights_gradient = weights_gradient.tolist()
biases_gradient = biases_gradient.tolist()
for x in range(len(self.biases)):
self.biases[x] -= 0.1*biases_gradient[x]
weight_gradient_index = 0
for layer_index, layer_weights in enumerate(self.weights, start=0):
for weight_index, weight in enumerate(layer_weights):
self.weights[layer_index][weight_index] = weight - 0.1*weights_gradient_list[layer_index][weight_index]
weight_gradient_index += 1
def feedforward(self, training_object):
# set inputs
self.outputs = []
self.activations = []
temp_activations = []
for index in range(self.sizes[0]):
temp_activations.append(training_object[index])
self.activations.append(temp_activations)
for layer_index, layer_size in enumerate(self.sizes[1:], start=0):
layer_weights = self.weights[layer_index]
layer_inputs = self.activations[layer_index]
weight_index = 0
layer_outputs = []
layer_activations = []
for node_index in range(layer_size):
node_weights = []
# get node weights
#print(f"layer size: {layer_size}, previous_layer_size: {self.sizes[layer_index]}, layer weights: {len(layer_weights)}")
for x in range(self.sizes[layer_index]):
node_weights.append(layer_weights[weight_index])
weight_index += 1
output = 0
for indx in range(len(node_weights)):
output += layer_inputs[indx]*node_weights[indx]
output = output + self.biases[layer_index]
layer_outputs.append(output)
layer_activations.append(self.sigmoid(output))
self.outputs.append(layer_outputs)
self.activations.append(layer_activations)
def backpropogation(self, training_label):
costs = []
output_layer_activations = self.activations[-1]
output_layer_outputs = self.outputs[-1]
correct_labels = self.translate_label_to_array(training_label)
costs.append(self.compute_cost_derivative(correct_labels, output_layer_activations))
for cost_index, cost in enumerate(costs[0]):
costs[0][cost_index] = cost*self.sigmoid_prime(output_layer_outputs[cost_index])
# calculate costs for layers
for layer_index, layer_size in enumerate(self.sizes[::-1][1:-1], start=1):
layer_costs = []
layer_weights = self.weights[-layer_index]
layer_outputs = self.outputs[-(layer_index+1)]
previous_layer_costs = costs[layer_index-1]
next_layer_size = self.sizes[::-1][1:][layer_index]
layer_weights_formatted = []
for x in range(layer_size):
layer_weights_formatted.append([])
for weight_index, weight in enumerate(layer_weights, start=0):
#print(f"weight index:{weight_index % next_layer_size} layer_index: {weight_index}")
layer_weights_formatted[weight_index%layer_size].append(layer_weights[weight_index])
#print(f"next_layer_size:{layer_size} costs: {len(previous_layer_costs)}, layer_weights_formatted: {layer_weights_formatted}")
for x in range(layer_size):
node_cost = 0
for y, cost in enumerate(previous_layer_costs,start=0):
node_cost += layer_weights_formatted[x][y]*cost
layer_costs.append(node_cost)
# layer_costs same order as next layer's activations
for cost_index, cost in enumerate(layer_costs):
layer_costs[cost_index] = cost * self.sigmoid_prime(layer_outputs[cost_index])
costs.append(layer_costs)
# calculate weight errors
weight_errors = []
bias_errors = []
for layer_index, layer_costs in enumerate(costs[::-1]):
layer_activations = self.activations[layer_index]
layer_weight_errors = []
for cost_index, cost in enumerate(layer_costs,start=0):
for activation in layer_activations:
layer_weight_errors.append(activation * cost)
weight_errors.append(np.array(layer_weight_errors))
bias_errors.append(sum(layer_costs))
return weight_errors, bias_errors
# conversion tool
def translate_label_to_array(self, y):
translated_label = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
translated_label[y] = 1
return np.array(translated_label)
# output tools
def network_outputs(self):
print("Output layer: ")
for x in range(self.sizes[-1]):
print("node " + str(x) + ": " + str(self.activations[-1][x]))
def total_activations(self):
print(len(self.activations))
def compute_cost_derivative(self, y, output_activations):
"""Return the vector of partial derivatives \partial C_x /
\partial a for the output activations."""
return (output_activations - y)
def sigmoid(self, z):
""""The sigmoid function."""
return (1.0 / (1.0 + np.exp(-z)))
def sigmoid_prime(self, z):
return (self.sigmoid(z) * (1 - self.sigmoid(z)))

Neural Network from scratch does not converge

I'm trying to implement a neural network from scratch in order to gain better insight about it and I run into a weird problem. When I use Relu function for hidden layers as an activation function, the model did not converge whereas it did converge once sigmoid function is used. Here is my vanilla code: When you change first 2 layers' activation function from relu to sigmoid, you can see that it converges, though it may have a problem sometimes. Where could be the problem? It's been three days and I still couldnt find it, though I found some little bugs. Thanks in advance.
Here is the toy dataset I've been using(just paste it to where this code is located).
Dataset
import numpy as np
import pandas as pd
class NeuralNetwork():
def __init__(self, epoch=10000, alpha=0.075, algorithm="gradient_descent"):
# hyperparameters
self.epoch = epoch
self.alpha = alpha
self.algorithm = algorithm
# parameters
self.params = {}
self.layer_no = 1
# logs
self.cost_vals = []
def createLayer(self, size, activation_func, randomness=True):
if randomness == True:
self.params["W" + str(self.layer_no)] = np.random.randn(size[0], size[1]) * 0.01
else:
self.params["W" + str(self.layer_no)] = np.zeros(size)
self.params["b" + str(self.layer_no)] = np.zeros((size[0], 1))
self.params["func" + str(self.layer_no)] = activation_func
self.layer_no += 1
def sigmoid(self, X):
return 1 / (1 + np.exp(-X))
def relu(self, X):
return np.maximum(X, 0) * 0.01
def tanh(self, X):
return (np.exp(X) - np.exp(-X)) / (np.exp(X) + np.exp(-X))
def derivative_sigmoid(self, X):
der_x = self.sigmoid(X)
return der_x * (1 - der_x)
def derivative_relu(self, X):
X[X<=0] = 0
X[X>0] = 1
return X
def derivative_tanh(self, X):
tanhx = self.tanh(X)
return 1 - np.power(tanhx, 2)
def activation_function(self, Zl, act_func_name):
if act_func_name == "sigmoid":
return self.sigmoid(Zl)
elif act_func_name == "relu":
return self.relu(Zl)
elif act_func_name == "tanh":
return self.tanh(Zl)
def derivative_activation_function(self, Zl, act_func_name):
if act_func_name == "sigmoid":
return self.derivative_sigmoid(Zl)
elif act_func_name == "relu":
return self.derivative_relu(Zl)
elif act_func_name == "tanh":
return self.derivative_tanh(Zl)
def train(self, X, Y):
m = Y.shape[0] # number of training examples
self.params["A0"] = X
self.params["Z0"] = None
for i in range(self.epoch):
# forward prop
for l in range(1, self.layer_no): # 1,2,3
Zl = np.dot(self.params["W" + str(l)], self.params["A" + str(l - 1)]) + self.params["b" + str(l)] # linear function of a layer with vectorization
Al = self.activation_function(Zl, self.params["func" + str(l)]) # activated form of Zl
self.params["Z" + str(l)] = Zl
self.params["A" + str(l)] = Al
# cost function
cost_val = - 1 / m * np.sum(np.multiply(Y, np.log(Al)) + np.multiply((1 - Y), np.log(1 - Al)))
cost_val = np.squeeze(cost_val)
if i % 500 == 0:
print(cost_val)
self.cost_vals.append(cost_val)
# backward prop
dAl = - (np.divide(Y, Al) - np.divide(1 - Y, 1 - Al)) # gradiant of last layer of A
for l in reversed(range(1, self.layer_no)): # 3,2,1
# backward prop
dZl = np.multiply(dAl,
self.derivative_activation_function(self.params["Z" + str(l)], self.params["func" + str(l)])) # gradient of layer l of Z
dAl1 = np.dot(self.params["W" + str(l)].T, dZl) # gradient of previous layer of A
dWl = 1 / m * np.dot(dZl, self.params["A" + str(l - 1)].T) # gradient of parameters W in layer l
dbl = 1 / m * np.sum(dZl, axis=1, keepdims=True) # gradient of parameters b in layer l
# update parameters
self.params["W" + str(l)] -= self.alpha * dWl
self.params["b" + str(l)] -= self.alpha * dbl
dAl = dAl1 # assign gradient of previous layer of A to the current one so as to use it while back-propagation
def iris_data():
from sklearn.model_selection import train_test_split
datas = pd.read_csv('iris_nn.data').to_numpy()
X = datas[:, 0:4].astype(float)
Y = datas[:, 4:5]
Y = np.asarray([1 if (y == 'Iris-setosa') else 0 for y in Y]).reshape((Y.shape[0], 1))
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=0)
return X_train.T, Y_train.T
X, Y = iris_data()
model = NeuralNetwork()
model.createLayer((5,4), "relu")
model.createLayer((7,5), "relu")
model.createLayer((1,7), "sigmoid")
model.train(X,Y)
#

How to get kfold splits for cross validation from scratch in python?

I think I've split my training data in 5 kold, is there a way for me to label/identify each of the 5 splits so I can then send each into my algorithm to calculate their own accuracies?
from sklearn.model_selection import KFold
kf = KFold(n_splits=5)
splits=kf.get_n_splits(X_train)
print(splits)
Separately, I have also tried splitting my data to then run in my logistic regression but this outputs nan % accuracy:
X_train1 = X[0:84]
Y_train1 = Y[0:84]
X_train2 = X[85:170]
Y_train2 = Y[85:170]
X_train3 = X[171:255]
Y_train3 = Y[171:255]
X_train4 = X[256:340]
Y_train4 = Y[256:340]
X_train5 = X[341:426]
Y_train5 = Y[341:426]
def Sigmoid(z):
return 1/(1 + np.exp(-z))
def Hypothesis(theta, x):
return Sigmoid(x # theta)
def Cost_Function(X,Y,theta,m):
hi = Hypothesis(theta, x)
_y = Y.reshape(-1, 1)
J = 1/float(m) * np.sum(-_y * np.log(hi) - (1-_y) * np.log(1-hi))
return J
def Cost_Function_Regularisation(X,Y,theta,m,alpha):
hi = Hypothesis(theta,X)
_y = Y.reshape(-1, 1)
J = alpha/float(m) * X.T # (hi - _y)
return J
def Cost_Function_Regularisation(X,Y,theta,m,alpha):
hi = Hypothesis(theta,X)
_y = Y.reshape(-1, 1)
J = alpha/float(m) * X.T # (hi - _y)
return J
def Gradient_Descent(X,Y,theta,m,alpha):
new_theta = theta - Cost_Function_Regularisation(X,Y,theta,m,alpha)
return new_theta
def Accuracy(theta):
correct = 0
length = len(X_test)
prediction = (Hypothesis(theta, X_test) > 0.5)
_y = Y_test.reshape(-1, 1)
correct = prediction == _y
my_accuracy = (np.sum(correct) / length)*100
print ('LR Accuracy CV: ', my_accuracy, "%")
def Logistic_Regression(X,Y,alpha,theta,num_iters):
m = len(Y)
for x in range(num_iters):
new_theta = Gradient_Descent(X,Y,theta,m,alpha)
theta = new_theta
if x % 100 == 0:
print #('theta: ', theta)
print #('cost: ', Cost_Function(X,Y,theta,m))
Accuracy(theta)
ep = .012
initial_theta = np.random.rand(X_train.shape[1],1) * 2 * ep - ep
alpha = 0.5
iterations = 10000
Logistic_Regression(X_train1,Y_train1,alpha,initial_theta,iterations)
Logistic_Regression(X_train2,Y_train2,alpha,initial_theta,iterations)
Logistic_Regression(X_train3,Y_train3,alpha,initial_theta,iterations)
Logistic_Regression(X_train4,Y_train4,alpha,initial_theta,iterations)
Logistic_Regression(X_train5,Y_train5,alpha,initial_theta,iterations
get_n_splits returns the "number of splits" you configured for skf.
Look at the documentation here for an example : http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.StratifiedKFold.html

Training speed on GPU become slower overtime

My model training speed becomes slower over time. Every epoch take longer time to train.
Here is the full source code with my preprocess sentiment tree bank data (put glove.840B.300d.txt into data/glove).
Install some python packages:
pip install meowlogtool
pip install tqdm
Command to run:
python sentiment.py --emblr 0 --rel_dim 0 --tag_dim 0 --optim adagrad --name basic --lr 0.05 --wd 1e-4 --at_hid_dim 0
Model source code for you to read
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable as Var
import utils
import Constants
from model import SentimentModule
from embedding_model import EmbeddingModel
class SimpleGRU(nn.Module):
"""
w[i] : (300, 1)
h[i] : (150, 1)
p[i] : (20, 1)
r[i] : (20, 1)
k[i] : (150, 1)
x[i] : (20 + 150 + 300 + 20 = 490, 1) (490, 1)
Uz, Ur, Uh : (150, 150) => 67500 => (450, 450)
Wz, Wr, Wh : (150, 20 + 150 + 300 + 20) (150, 490)
"""
def __init__(self, cuda, in_dim, hid_dim):
super(SimpleGRU, self).__init__()
self.cudaFlag = cuda
self.Uz = nn.Linear(hid_dim, hid_dim)
self.Ur = nn.Linear(hid_dim, hid_dim)
self.Uh = nn.Linear(hid_dim, hid_dim)
self.Wz = nn.Linear(in_dim, hid_dim)
self.Wr = nn.Linear(in_dim, hid_dim)
self.Wh = nn.Linear(in_dim, hid_dim)
if self.cudaFlag:
self.Uz = self.Uz.cuda()
self.Ur = self.Uz.cuda()
self.Uh = self.Uz.cuda()
self.Wz = self.Wz.cuda()
self.Wr = self.Wr.cuda()
self.Wh = self.Wh.cuda()
def forward(self, x, h_prev):
"""
Simple-GRU(compress_x[v], h[t-1]) :
z[t] := s(Wz *compress_x[t]+ Uz * h[t-1] + bz)
r[t] := s(Wr * compress_x[t] + Ur * h[t-1] + br)
h_temp[t] := g(Wh * compress_x[t] + Uh * h[t-1] + bh)
h[t] := r[t] .* h[t-1] + (1 - z[t]) .* h_temp[t]
return h[t]
:param x: compress_x[t]
:param h_prev: h[t-1]
:return:
"""
z = F.sigmoid(self.Wz(x) + self.Uz(h_prev))
r = F.sigmoid(self.Wr(x) + self.Ur(h_prev))
h_temp = F.tanh(self.Wh(x) + self.Uh(h_prev))
h = r*h_prev + (1-z)*h_temp
return h
class TreeSimpleGRU(nn.Module):
def __init__(self, cuda, word_dim, tag_dim, rel_dim, mem_dim, at_hid_dim, criterion, leaf_h = None):
super(TreeSimpleGRU, self).__init__()
self.cudaFlag = cuda
# self.gru_cell = nn.GRUCell(word_dim + tag_dim, mem_dim)
self.gru_cell = SimpleGRU(self.cudaFlag, word_dim+tag_dim, mem_dim)
self.gru_at = GRU_AT(self.cudaFlag, word_dim + tag_dim + rel_dim + mem_dim, at_hid_dim ,mem_dim)
self.mem_dim = mem_dim
self.in_dim = word_dim
self.tag_dim = tag_dim
self.rel_dim = rel_dim
self.leaf_h = leaf_h # init h for leaf node
if self.leaf_h == None:
self.leaf_h = Var(torch.rand(1, self.mem_dim))
torch.save(self.leaf_h, 'leaf_h.pth')
if self.cudaFlag:
self.leaf_h = self.leaf_h.cuda()
self.criterion = criterion
self.output_module = None
def getParameters(self):
"""
Get flatParameters
note that getParameters and parameters is not equal in this case
getParameters do not get parameters of output module
:return: 1d tensor
"""
params = []
for m in [self.gru_cell, self.gru_at]:
# we do not get param of output module
l = list(m.parameters())
params.extend(l)
one_dim = [p.view(p.numel()) for p in params]
params = F.torch.cat(one_dim)
return params
def set_output_module(self, output_module):
self.output_module = output_module
def forward(self, tree, w_emb, tag_emb, rel_emb, training = False):
loss = Var(torch.zeros(1)) # init zero loss
if self.cudaFlag:
loss = loss.cuda()
for idx in xrange(tree.num_children):
_, child_loss = self.forward(tree.children[idx], w_emb, tag_emb, rel_emb, training)
loss = loss + child_loss
if tree.num_children > 0:
child_rels, child_k = self.get_child_state(tree, rel_emb)
if self.tag_dim > 0:
tree.state = self.node_forward(w_emb[tree.idx - 1], tag_emb[tree.idx -1], child_rels, child_k)
else:
tree.state = self.node_forward(w_emb[tree.idx - 1], None, child_rels, child_k)
elif tree.num_children == 0:
if self.tag_dim > 0:
tree.state = self.leaf_forward(w_emb[tree.idx - 1], tag_emb[tree.idx -1])
else:
tree.state = self.leaf_forward(w_emb[tree.idx - 1], None)
if self.output_module != None:
output = self.output_module.forward(tree.state, training)
tree.output = output
if training and tree.gold_label != None:
target = Var(utils.map_label_to_target_sentiment(tree.gold_label))
if self.cudaFlag:
target = target.cuda()
loss = loss + self.criterion(output, target)
return tree.state, loss
def leaf_forward(self, word_emb, tag_emb):
"""
Forward function for leaf node
:param word_emb: word embedding of current node u
:param tag_emb: tag embedding of current node u
:return: k of current node u
"""
h = self.leaf_h
if self.cudaFlag:
h = h.cuda()
if self.tag_dim > 0:
x = F.torch.cat([word_emb, tag_emb], 1)
else:
x = word_emb
k = self.gru_cell(x, h)
return k
def node_forward(self, word_emb, tag_emb, child_rels, child_k):
"""
Foward function for inner node
:param word_emb: word embedding of current node u
:param tag_emb: tag embedding of current node u
:param child_rels (tensor): rels embedding of child node v
:param child_k (tensor): k of child node v
:return:
"""
n_child = child_k.size(0)
h = Var(torch.zeros(1, self.mem_dim))
if self.cudaFlag:
h = h.cuda()
for i in range(0, n_child):
k = child_k[i]
x_list = [word_emb, k]
if self.rel_dim >0:
rel = child_rels[i]
x_list.append(rel)
if self.tag_dim > 0:
x_list.append(tag_emb)
x = F.torch.cat(x_list, 1)
h = self.gru_at(x, h)
k = h
return k
def get_child_state(self, tree, rels_emb):
"""
Get child rels, get child k
:param tree: tree we need to get child
:param rels_emb (tensor):
:return:
"""
if tree.num_children == 0:
assert False # never get here
else:
child_k = Var(torch.Tensor(tree.num_children, 1, self.mem_dim))
if self.rel_dim>0:
child_rels = Var(torch.Tensor(tree.num_children, 1, self.rel_dim))
else:
child_rels = None
if self.cudaFlag:
child_k = child_k.cuda()
if self.rel_dim > 0:
child_rels = child_rels.cuda()
for idx in xrange(tree.num_children):
child_k[idx] = tree.children[idx].state
if self.rel_dim > 0:
child_rels[idx] = rels_emb[tree.children[idx].idx - 1]
return child_rels, child_k
class AT(nn.Module):
"""
AT(compress_x[v]) := sigmoid(Wa * tanh(Wb * compress_x[v] + bb) + ba)
"""
def __init__(self, cuda, in_dim, hid_dim):
super(AT, self).__init__()
self.cudaFlag = cuda
self.in_dim = in_dim
self.hid_dim = hid_dim
self.Wa = nn.Linear(hid_dim, 1)
self.Wb = nn.Linear(in_dim, hid_dim)
if self.cudaFlag:
self.Wa = self.Wa.cuda()
self.Wb = self.Wb.cuda()
def forward(self, x):
out = F.sigmoid(self.Wa(F.tanh(self.Wb(x))))
return out
class GRU_AT(nn.Module):
def __init__(self, cuda, in_dim, at_hid_dim ,mem_dim):
super(GRU_AT, self).__init__()
self.cudaFlag = cuda
self.in_dim = in_dim
self.mem_dim = mem_dim
self.at_hid_dim = at_hid_dim
if at_hid_dim > 0:
self.at = AT(cuda, in_dim, at_hid_dim)
self.gru_cell = SimpleGRU(self.cudaFlag, in_dim, mem_dim)
if self.cudaFlag:
if at_hid_dim > 0:
self.at = self.at.cuda()
self.gru_cell = self.gru_cell.cuda()
def forward(self, x, h_prev):
"""
:param x:
:param h_prev:
:return: a * m + (1 - a) * h[t-1]
"""
m = self.gru_cell(x, h_prev)
if self.at_hid_dim > 0:
a = self.at.forward(x)
h = torch.mm(a, m) + torch.mm((1-a), h_prev)
else:
h = m
return h
class TreeGRUSentiment(nn.Module):
def __init__(self, cuda, in_dim, tag_dim, rel_dim, mem_dim, at_hid_dim, num_classes, criterion):
super(TreeGRUSentiment, self).__init__()
self.cudaFlag = cuda
self.tree_module = TreeSimpleGRU(cuda, in_dim, tag_dim, rel_dim, mem_dim, at_hid_dim, criterion)
self.output_module = SentimentModule(cuda, mem_dim, num_classes, dropout=True)
self.tree_module.set_output_module(self.output_module)
def get_tree_parameters(self):
return self.tree_module.getParameters()
def forward(self, tree, sent_emb, tag_emb, rel_emb, training = False):
# sent_emb = F.torch.unsqueeze(self.word_embedding.forward(sent_inputs), 1)
# tag_emb = F.torch.unsqueeze(self.tag_emb.forward(tag_inputs), 1)
# rel_emb = F.torch.unsqueeze(self.rel_emb.forward(rel_inputs), 1)
# sent_emb, tag_emb, rel_emb = self.embedding_model(sent_inputs, tag_inputs, rel_inputs)
tree_state, loss = self.tree_module(tree, sent_emb, tag_emb, rel_emb, training)
output = tree.output
return output, loss
Why does neural network learning slow down as the error gets lower?
The reasons for the slowdown are not fully understood, but we have some basic ideas.
For classifiers, most training examples start out as incorrectly classified. Over time, more of them become correctly classified. Early in learning, you might have a nearly 100% error rate, so every example in the minibatch contributes to learning. Late in learning, you might have nearly a 0% error rate, so almost none of the examples in the minibatch contribute to learning. This problem can be resolved to some extent by using hard example mining or importance sampling. Both of these are just techniques for training on more difficult examples more often.
There are other more complicated reasons. One of them is that the condition number of the Hessian tends to worsen a lot as learning progresses, so that the optimal step size becomes smaller and smaller.

Categories

Resources