Python, building an Artificial Neural Network - python

I am trying to run below mentioned code, taking from https://github.com/stephencwelch/Neural-Networks-Demystified.
import numpy as np
%pylab inline
X = np.array(([3,5], [5,1], [10,2]), dtype=float)
y = np.array(([75], [82], [93]), dtype=float)
X = X/np.amax(X, axis=0)
y = y/100 #Max test score is 100
class Neural_Network(object):
def __init__(self):
#Define Hyperparameters
self.inputLayerSize = 2
self.outputLayerSize = 1
self.hiddenLayerSize = 3
self.W1 = np.random.randn(self.inputLayerSize, self.hiddenLayerSize)
self.W2 = np.random.randn(self.hiddenLayerSize,self.outputLayerSize)
def forward(self, X):
self.z2 = np.dot(X, self.W1)
self.a2 = self.sigmoid(self.z2)
self.z3 = np.dot(self.a2, self.W2)
yHat = self.sigmoid(self.z3)
return yHat
def sigmoid(self, z):
return 1/(1+np.exp(-z))
def sigmoidPrime(self,z):
return np.exp(-z)/((1+np.exp(-z))**2)
def costFunction(self, X, y):
self.yHat = self.forward(X)
J = 0.5*sum((y-self.yHat)**2)
return J
def costFunctionPrime(self, X, y):
self.yHat = self.forward(X)
delta3 = np.multiply(-(y-self.yHat), self.sigmoidPrime(self.z3))
dJdW2 = np.dot(self.a2.T, delta3)
delta2 = np.dot(delta3, self.W2.T)*self.sigmoidPrime(self.z2)
dJdW1 = np.dot(X.T, delta2)
return dJdW1, dJdW2
def getParams(self):
#Get W1 and W2 unrolled into vector:
params = np.concatenate((self.W1.ravel(), self.W2.ravel()))
return params
def setParams(self, params):
W1_start = 0
W1_end = self.hiddenLayerSize * self.inputLayerSize
self.W1 = np.reshape(params[W1_start:W1_end], (self.inputLayerSize , self.hiddenLayerSize))
W2_end = W1_end + self.hiddenLayerSize*self.outputLayerSize
self.W2 = np.reshape(params[W1_end:W2_end], (self.hiddenLayerSize, self.outputLayerSize))
def computeGradients(self, X, y):
dJdW1, dJdW2 = self.costFunctionPrime(X, y)
return np.concatenate((dJdW1.ravel(), dJdW2.ravel()))
def computeNumericalGradient(N, X, y):
paramsInitial = N.getParams()
numgrad = np.zeros(paramsInitial.shape)
perturb = np.zeros(paramsInitial.shape)
e = 1e-4
for p in range(len(paramsInitial)):
perturb[p] = e
N.setParams(paramsInitial + perturb)
loss2 = N.costFunction(X, y)
N.setParams(paramsInitial - perturb)
loss1 = N.costFunction(X, y)
numgrad[p] = (loss2 - loss1) / (2*e)
perturb[p] = 0
N.setParams(paramsInitial)
return numgrad
from scipy import optimize
class trainer(object):
def __init__(self, N):
self.N = N
def callbackF(self, params):
self.N.setParams(params)
self.J.append(self.N.costFunction(self.X, self.y))
def costFunctionWrapper(self, params, X, y):
self.N.setParams(params)
cost = self.N.costFunction(X, y)
grad = self.N.computeGradients(X,y)
return cost, grad
def train(self, X, y):
self.X = X
self.y = y
self.J = []
params0 = self.N.getParams()
options = {'maxiter': 200, 'disp' : True}
_res = optimize.minimize(self.costFunctionWrapper, params0, jac=True, method='BFGS', \
args=(X, y), options=options, callback=self.callbackF)
self.N.setParams(_res.x)
self.optimizationResults = _res
NN = Neural_Network()
T = trainer(NN)
T.train(X,y)
but I am having this error:
Traceback (most recent call last):
File "<ipython-input-50-6b098e89c488>", line 3, in <module>
T.train(X,y)
AttributeError: 'trainer' object has no attribute 'train'
Hence, I am wondering why the attribute "train" is not defined?
I am using Spyder (Python 2.7)

Related

Linear regression from scratch

So I am following along a youtube video showing how to setup the linear regression python code from scratch with gradient descent. In the video, the person initialized the regression with using X and y values. I am trying to apply the same code to a csv file. Here's is what the code looks like:
import numpy as np
import pandas as pd
class LinearRegression():
def __init__(self):
self.learning_rate = 0.001
self.total_iterations = 10000
def y_hat(self, X, w):
return np.dot(w.T, X)
def loss(self, yhat, y):
L =1/self.m * np.sum(np.power(yhat-y, 2))
return L
def gradient_descent(self, w, X, y, yhat):
dldW = np.dot(X, (yhat - y).T)
w = w - self.learning_rate * dldW
return w
def main(self, X, y):
x1 = np.ones((1, X.shape[1]))
x = np.append(X, x1, axis=0)
self.m = X.shape[1]
self.n = X.shape[0]
w = np.zeros((self.n, 1))
for it in range(self.total_iterations+1):
yhat = self.y_hat(X, w)
loss = self.loss(yhat, y)
if it % 2000 == 0:
print(f'Cost at iteration {it} is {loss}')
w = self.gradient_descent(w, X, y, yhat)
return w
if __name__ == '__main__':
#X = np.random.rand(1, 500)
#y = 3 * X + np.random.randn(1, 500) * 0.1
data = pd.read_csv('/Users/brasilgu/Downloads/student (1) 2/student-mat.csv', sep=";")
X = data['G1'].values
y = data['G2'].values
regression = LinearRegression()
w = regression.main(X, y)
I am getting the following error
Traceback (most recent call last):
File "/Users/brasilgu/PycharmProjects/LinReg2/main.py", line 51, in <module>
w = regression.main(X, y)
File "/Users/brasilgu/PycharmProjects/LinReg2/main.py", line 23, in main
x1 = np.ones((1, X.shape[1]))
IndexError: tuple index out of range

Multi class neural network for MNIST data set not working

Hi I'm trying to train my own designed neural network on the MNIST handwritten data set and every time I run this code the accuracy starts to increase then decreases and I get an overflow warning. Can someone explain whether my code is just poor and messy or whether I have just missed something little out. Thanks in advance
import numpy as np
import pandas as pd
df = pd.read_csv('../input/digit-recognizer/train.csv')
data = np.array(df.values)
data = data.T
data
Y = data[0,:]
X = data[1:,:]
Y_train = Y[:41000]
X_train = X[:,:41000]
X_train = X_train/255
Y_val = Y[41000:]
X_val = X[:,41000:]
X_val = X_val/255
print(np.max(X_train))
class NeuralNetwork:
def __init__(self, n_in, n_out):
self.w1, self.b1 = self.Generate_Weights_Biases(10,784)
self.w2, self.b2 = self.Generate_Weights_Biases(10,10)
def Generate_Weights_Biases(self, n_in, n_out):
weights = 0.01*np.random.randn(n_in, n_out)
biases = np.zeros((n_in,1))
return weights, biases
def forward(self, X):
self.Z1 = self.w1.dot(X) + self.b1
self.a1 = self.ReLu(self.Z1)
self.z2 = self.w2.dot(self.a1) + self.b1
y_pred = self.Softmax(self.z2)
return y_pred
def ReLu(self, Z):
return np.maximum(Z,0)
def Softmax(self, Z):
#exponentials = np.exp(Z)
#sumexp = np.sum(np.exp(Z), axis=0)
#print(Z)
return np.exp(Z)/np.sum(np.exp(Z))
def ReLu_Derv(self, x):
return np.greaterthan(x, 0).astype(int)
def One_hot_encoding(self, Y):
one_hot = np.zeros((Y.size, 10))
rows = np.arange(Y.size)
one_hot[rows, Y] = 1
one_hot = one_hot.T
return one_hot
def Get_predictions(self, y_pred):
return np.argmax(y_pred, 0)
def accuracy(self, pred, Y):
return np.sum(pred == Y)/Y.size
def BackPropagation(self, X, Y, y_pred, lr=0.01):
m = Y.size
one_hot_y = self.One_hot_encoding(Y)
e2 = y_pred - one_hot_y
derW2 = (1/m)* e2.dot(self.a1.T)
derB2 =(1/m) * np.sum(e2,axis=1)
derB2 = derB2.reshape(10,1)
e1 = self.w2.T.dot(e2) * self.ReLu(self.a1)
derW1 = (1/m) * e1.dot(X.T)
derB1 = (1/m) * np.sum(e1, axis=1)
derB1 = derB1.reshape(10,1)
self.w1 = self.w1 - lr*derW1
self.b1 = self.b1 - lr*derB1
self.w2 = self.w2 - lr*derW2
self.b2 = self.b2 - lr*derB2
def train(self, X, Y, epochs = 1000):
for i in range(epochs):
y_pred = self.forward(X)
predict = self.Get_predictions(y_pred)
accuracy = self.accuracy(predict, Y)
print(accuracy)
self.BackPropagation(X, Y, y_pred)
return self.w1, self.b1, self.w2, self.b2
NN = NeuralNetwork(X_train, Y_train)
w1,b1,w2,b2 = NN.train(X_train,Y_train)
You should use a different bias for the second layer
self.z2 = self.w2.dot(self.a1) + self.b1 # not b1
self.z2 = self.w2.dot(self.a1) + self.b2 # but b2
When doing something like this
derB2 =(1/m) * np.sum(e2,axis=1)
you would like to use (keepdims = True) to make sure that derB2.shape is (something,1) but not (something, ). It makes your code more rigorous.

MNIST ML output doesn't make sense

I'm new to ML and have tried to use this GitHub repository to build an MNIST Machine Learning Model.
Since I have to import the dataset from my computer, I had to change things a bit. My imported dataset also doesn't include all 10 digits, but only 5.
The calculated accuracy is 96%, however when I cross check the .png files on my computer with the outcome txt, the labels make zero sense. It labels some 4's as 7's, some 2's as 5's and so on.
This is what the folder structure looks like on my computer:
2
-->001.png
-->002.png
-->003.png
-->...
3
-->001.png
-->002.png
-->003.png
-->...
4
-->001.png
-->002.png
-->003.png
-->...
5
-->001.png
-->002.png
-->003.png
-->...
7
-->001.png
-->002.png
-->003.png
-->...
Question 1:
I previously had the error that it expected 8 different categories since 7 is the highest digit label. I didn't know how to fix this, so I rename the folders from 0 to 4. Any idea how to fix this, without having to rename all folders?
Question 2:
Do you know why the outcome doesn't make any sense? It doesn't seem to be an overfitting issue, I've tried adjusting the training-test split, which didn't have any impact.
from sklearn.datasets import fetch_openml
from keras.utils.np_utils import to_categorical
import numpy as np
from sklearn.model_selection import train_test_split
import time
#x, y = fetch_openml('mnist_784', version=1, return_X_y=True)
import os
from os import listdir
from os.path import isfile, join
import cv2
label_folder_training = []
label_files_training = []
total_size_training = 0
total_size_testing = 0
data_path_training = r"Training_data"
data_path_testing = r"Testing_data"
for root, dirs, files in os.walk(data_path_training):
for dir in dirs:
label_folder_training.append(dir)
total_size_training += len(files)
for file in files:
label_files_training.append(file)
for root, dirs, files in os.walk(data_path_testing):
total_size_testing += len(files)
#to ignore .DS_Store file
total_size_training = total_size_training - 1
total_size_testing = total_size_testing
print("found", total_size_training, "training files and", total_size_testing, "testing files.")
print("folder Training:",label_folder_training)
# Print returns the following:
#found 20000 training files and 5000 testing files.
#folder Training: ['0', '1', '4', '3', '2']
x = []
y = []
for i in range(len(label_folder_training)):
labelPath_training = os.path.join(data_path_training,label_folder_training[i])
FileName = [f for f in listdir(labelPath_training) if isfile(join(labelPath_training, f))]
for j in range(len(FileName)):
path = os.path.join(labelPath_training,FileName[j])
img = cv2.imread(path,cv2.IMREAD_GRAYSCALE)
x.append(img)
y.append(label_folder_training[i])
x = np.array(x)
x = np.reshape(x, (20000, 784))
x = (x/255).astype('float32')
y = to_categorical(y)
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)
import pandas as pd
class Module:
def __init__(self):
self._train = True
def forward(self, input):
raise NotImplementedError
def backward(self, input, grad_output):
raise NotImplementedError
def parameters(self):
"""
Returns list of its parameters
"""
return []
def grad_parameters(self):
"""
Returns list of tensors gradients of its parameters
"""
return []
def train(self):
self._train = True
def eval(self):
self._train = False
class Criterion:
def forward(self, input, target):
raise NotImplementedError
def backward(self, input, target):
raise NotImplementedError
class Linear(Module):
def __init__(self, dim_in, dim_out):
super().__init__()
self.W = np.random.randn(dim_in, dim_out)
self.b = np.random.randn(1, dim_out)
def forward(self, input):
self.output = (np.dot(input, self.W) + self.b)
return self.output
def backward(self, input, grad_output):
self.grad_b = np.mean(grad_output, axis=0)
self.grad_W = np.dot(input.T, grad_output)
self.grad_W /= input.shape[0]
grad_input = np.dot(grad_output, self.W.T)
return grad_input
def parameters(self):
return [self.W, self.b]
def grad_parameters(self):
return [self.grad_W, self.grad_b]
def softmax(xs):
xs = np.subtract(xs, xs.max(axis=1, keepdims=True))
xs = np.exp(xs) / np.sum(np.exp(xs), axis=1, keepdims=True)
return xs
class CrossEntropy(Criterion):
def __init__(self):
super().__init__()
def forward(self, input, target):
eps = 1e-9
predictions = np.clip(input, eps, 1. - eps)
N = predictions.shape[0]
ce = -np.sum(target * np.log(predictions))
return ce / N
def backward(self, input, target):
eps = 1e-9
input_clamp = np.clip(input, eps, 1 - eps)
return softmax(input_clamp) - target
class Sequential(Module):
def __init__(self, *layers):
super().__init__()
self.layers = layers
def forward(self, input):
for layer in self.layers:
input = layer.forward(input)
self.output = input
return self.output
def backward(self, input, grad_output):
for i in range(len(self.layers) - 1, 0, -1):
grad_output = self.layers[i].backward(self.layers[i-1].output, grad_output)
grad_output = self.layers[0].backward(input, grad_output)
return grad_output
def parameters(self):
res = []
for l in self.layers:
res += l.parameters()
return res
def grad_parameters(self):
res = []
for l in self.layers:
res += l.grad_parameters()
return res
def train(self):
for layer in self.layers:
layer.train()
def eval(self):
for layer in self.layers:
layer.eval()
def sigmoid(x):
return 1 / (1 + np.exp(-x))
class Sigmoid(Module):
def __init__(self):
super().__init__()
def forward(self, input):
self.output = sigmoid(input)
return self.output
def backward(self, input, grad_output):
grad_input = sigmoid(input) * (1 - sigmoid(input)) * grad_output
return grad_input
class SoftMax(Module):
def __init__(self):
super().__init__()
def forward(self, input):
self.output = np.subtract(input, input.max(axis=1, keepdims=True))
self.output = np.exp(self.output) / np.sum(np.exp(self.output), axis=1, keepdims=True)
return self.output
def backward(self, input, grad_output):
return grad_output
def DataLoader(X, Y, batch_size=32):
n = X.shape[0]
indices = np.arange(n)
np.random.shuffle(indices)
for start in range(0, n, batch_size):
end = min(start + batch_size, n)
batch_idx = indices[start:end]
yield X[batch_idx], Y[batch_idx]
def accuracy_score(y_true, y_pred):
a = np.argmax(y_true, axis=1)
b = np.argmax(y_pred, axis=1)
return np.count_nonzero(a == b) / y_true.shape[0]
class Adam:
def __init__(self, model):
self.prev_m = None
self.prev_v = None
self.model = model
self.t = 1
def step(self, lr, beta1, beta2):
prev_m_tmp = []
prev_v_tmp = []
eps = 1e-7
for i, (weights, gradient) in enumerate(zip(self.model.parameters(), self.model.grad_parameters())):
if self.prev_m and self.prev_v:
m = beta1 * self.prev_m[i] + (1 - beta1) * gradient
v = beta2 * self.prev_v[i] + (1 - beta2) * gradient ** 2
m_hat = m / (1 - beta1 ** self.t)
v_hat = v / (1 - beta2 ** self.t)
else:
m = beta1 * 0 + (1 - beta1) * gradient
v = beta2 * 0 + (1 - beta2) * gradient ** 2
m_hat = m / (1 - beta1 ** self.t)
v_hat = v / (1 - beta2 ** self.t)
weights -= lr * m_hat / (np.sqrt(v_hat) + eps)
prev_m_tmp.append(m)
prev_v_tmp.append(v)
self.prev_m = prev_m_tmp
self.prev_v = prev_v_tmp
self.t += 1
model = Sequential(
Linear(784, 512),
Sigmoid(),
Linear(512, 256),
Sigmoid(),
Linear(256, 128),
Sigmoid(),
Linear(128, 64),
Sigmoid(),
Linear(64, 5),
SoftMax(),
)
epochs = 20
eval_every = 1
batch_size = 1024
criterion = CrossEntropy()
optimizer = Adam(model)
for epoch in range(epochs):
for x, y in DataLoader(X_train, y_train, batch_size=batch_size):
model.train()
y_pred = model.forward(x)
grad = criterion.backward(y_pred, y)
model.backward(x, grad)
optimizer.step(lr=0.003, beta1=0.9, beta2=0.999)
if (epoch + 1) % eval_every == 0:
model.eval()
y_train_pred = model.forward(X_train)
y_test_pred = model.forward(X_test)
loss_train = criterion.forward(y_train_pred, y_train)
loss_test = criterion.forward(y_test_pred, y_test)
print(f'Epoch: {epoch + 1}/{epochs}')
print(f'Train Loss: {loss_train} Train Accuracy: {accuracy_score(y_train, y_train_pred)}')
print(f'Test Loss: {loss_test} Test Accuracy: {accuracy_score(y_test, y_test_pred)} \n')
# Returns the following in epoch 20/20:
# Epoch: 20/20
# Train Loss: 0.151567557756849 Train Accuracy: 0.9905
# Test Loss: 0.706321046620394 Test Accuracy: 0.9563333333333334
test_x=[]
FileName = [f for f in listdir(data_path_testing) if isfile(join(data_path_testing, f))]
for j in range(len(FileName)):
path = os.path.join(data_path_testing,FileName[j])
img = cv2.imread(path,cv2.IMREAD_GRAYSCALE)
test_x.append(img)
x_val = np.array(test_x)
x_val = np.reshape(x_val, (5000, 784))
x_val = (x_val/255).astype('float32')
df_test = pd.DataFrame(x_val,columns=range(784)).add_prefix('pixels_')
output = model.forward(df_test)
output_arg = np.argmax(output, axis=1)
ImageId = df_test.index +1
submission = pd.DataFrame({'ImageId': ImageId, 'Label': output})
submission['ImageId'] = submission['ImageId'].apply('{:0>4}'.format)
submission.to_csv('export.txt', sep=' ', index=False, header=False)
Found the answer to my problem. The output didn't make any sense since python was importing the testing files in a random order. All I had to do was to sort FileName before letting the model run.
I changed this
test_x=[]
FileName = [f for f in listdir(data_path_testing) if isfile(join(data_path_testing, f))]
for j in range(len(FileName)):
path = os.path.join(data_path_testing,FileName[j])
img = cv2.imread(path,cv2.IMREAD_GRAYSCALE)
test_x.append(img)
to this:
test_x=[]
FileName = sorted( filter( lambda x: os.path.isfile(os.path.join(data_path_testing, x)),
os.listdir(data_path_testing) ) )
for j in range(len(FileName)):
path = os.path.join(data_path_testing,FileName[j])
img = cv2.imread(path,cv2.IMREAD_GRAYSCALE)
test_x.append(img)

NameError: name 'K' is not defined

I'm following the guide to Transformers and the colab project https://colab.research.google.com/drive/1XBP0Zh8K4g_n0A2p1UlGFf3dij0EX_Kt
but when I run the cell with the line multi_head = build_model() I get the error.
this is the output from the console:
NameError Traceback (most recent call
last) in ()
----> 1 multi_head = build_model()
5 frames in (x)
40 self.dropout = Dropout(attn_dropout)
41 def call(self, q, k, v, mask):
---> 42 attn = Lambda(lambda x:K.batch_dot(x[0],x[1],axes=[2,2])/self.temper)([q, k])
43 if mask is not None:
44 mmask = Lambda(lambda x:(-1e+10)*(1-x))(mask)
NameError: name 'K' is not defined
It just runs after the model architecture code, which the error refers to.
Can you see where this Kshould be defined?
import random, os, sys
import numpy as np
from tensorflow.keras.models import *
from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import *
from tensorflow.keras.initializers import *
import tensorflow as tf
from tensorflow.python.keras.layers import Layer
try:
from dataloader import TokenList, pad_to_longest
# for transformer
except: pass
embed_size = 60
class LayerNormalization(Layer):
def __init__(self, eps=1e-6, **kwargs):
self.eps = eps
super(LayerNormalization, self).__init__(**kwargs)
def build(self, input_shape):
self.gamma = self.add_weight(name='gamma', shape=input_shape[-1:],
initializer=Ones(), trainable=True)
self.beta = self.add_weight(name='beta', shape=input_shape[-1:],
initializer=Zeros(), trainable=True)
super(LayerNormalization, self).build(input_shape)
def call(self, x):
mean = K.mean(x, axis=-1, keepdims=True)
std = K.std(x, axis=-1, keepdims=True)
return self.gamma * (x - mean) / (std + self.eps) + self.beta
def compute_output_shape(self, input_shape):
return input_shape
class ScaledDotProductAttention():
def __init__(self, d_model, attn_dropout=0.1):
self.temper = np.sqrt(d_model)
self.dropout = Dropout(attn_dropout)
def __call__(self, q, k, v, mask):
attn = Lambda(lambda x:K.batch_dot(x[0],x[1],axes=[2,2])/self.temper)([q, k])
if mask is not None:
mmask = Lambda(lambda x:(-1e+10)*(1-x))(mask)
attn = Add()([attn, mmask])
attn = Activation('softmax')(attn)
attn = self.dropout(attn)
output = Lambda(lambda x:K.batch_dot(x[0], x[1]))([attn, v])
return output, attn
class MultiHeadAttention():
# mode 0 - big martixes, faster; mode 1 - more clear implementation
def __init__(self, n_head, d_model, d_k, d_v, dropout, mode=0, use_norm=True):
self.mode = mode
self.n_head = n_head
self.d_k = d_k
self.d_v = d_v
self.dropout = dropout
if mode == 0:
self.qs_layer = Dense(n_head*d_k, use_bias=False)
self.ks_layer = Dense(n_head*d_k, use_bias=False)
self.vs_layer = Dense(n_head*d_v, use_bias=False)
elif mode == 1:
self.qs_layers = []
self.ks_layers = []
self.vs_layers = []
for _ in range(n_head):
self.qs_layers.append(TimeDistributed(Dense(d_k, use_bias=False)))
self.ks_layers.append(TimeDistributed(Dense(d_k, use_bias=False)))
self.vs_layers.append(TimeDistributed(Dense(d_v, use_bias=False)))
self.attention = ScaledDotProductAttention(d_model)
self.layer_norm = LayerNormalization() if use_norm else None
self.w_o = TimeDistributed(Dense(d_model))
def __call__(self, q, k, v, mask=None):
d_k, d_v = self.d_k, self.d_v
n_head = self.n_head
if self.mode == 0:
qs = self.qs_layer(q) # [batch_size, len_q, n_head*d_k]
ks = self.ks_layer(k)
vs = self.vs_layer(v)
def reshape1(x):
s = tf.shape(x) # [batch_size, len_q, n_head * d_k]
x = tf.reshape(x, [s[0], s[1], n_head, d_k])
x = tf.transpose(x, [2, 0, 1, 3])
x = tf.reshape(x, [-1, s[1], d_k]) # [n_head * batch_size, len_q, d_k]
return x
qs = Lambda(reshape1)(qs)
ks = Lambda(reshape1)(ks)
vs = Lambda(reshape1)(vs)
if mask is not None:
mask = Lambda(lambda x:K.repeat_elements(x, n_head, 0))(mask)
head, attn = self.attention(qs, ks, vs, mask=mask)
def reshape2(x):
s = tf.shape(x) # [n_head * batch_size, len_v, d_v]
x = tf.reshape(x, [n_head, -1, s[1], s[2]])
x = tf.transpose(x, [1, 2, 0, 3])
x = tf.reshape(x, [-1, s[1], n_head*d_v]) # [batch_size, len_v, n_head * d_v]
return x
head = Lambda(reshape2)(head)
elif self.mode == 1:
heads = []; attns = []
for i in range(n_head):
qs = self.qs_layers[i](q)
ks = self.ks_layers[i](k)
vs = self.vs_layers[i](v)
head, attn = self.attention(qs, ks, vs, mask)
heads.append(head); attns.append(attn)
head = Concatenate()(heads) if n_head > 1 else heads[0]
attn = Concatenate()(attns) if n_head > 1 else attns[0]
outputs = self.w_o(head)
outputs = Dropout(self.dropout)(outputs)
if not self.layer_norm: return outputs, attn
# outputs = Add()([outputs, q]) # sl: fix
return self.layer_norm(outputs), attn
class PositionwiseFeedForward():
def __init__(self, d_hid, d_inner_hid, dropout=0.1):
self.w_1 = Conv1D(d_inner_hid, 1, activation='relu')
self.w_2 = Conv1D(d_hid, 1)
self.layer_norm = LayerNormalization()
self.dropout = Dropout(dropout)
def __call__(self, x):
output = self.w_1(x)
output = self.w_2(output)
output = self.dropout(output)
output = Add()([output, x])
return self.layer_norm(output)
class EncoderLayer():
def __init__(self, d_model, d_inner_hid, n_head, d_k, d_v, dropout=0.1):
self.self_att_layer = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout)
self.pos_ffn_layer = PositionwiseFeedForward(d_model, d_inner_hid, dropout=dropout)
def __call__(self, enc_input, mask=None):
output, slf_attn = self.self_att_layer(enc_input, enc_input, enc_input, mask=mask)
output = self.pos_ffn_layer(output)
return output, slf_attn
def GetPosEncodingMatrix(max_len, d_emb):
pos_enc = np.array([
[pos / np.power(10000, 2 * (j // 2) / d_emb) for j in range(d_emb)]
if pos != 0 else np.zeros(d_emb)
for pos in range(max_len)
])
pos_enc[1:, 0::2] = np.sin(pos_enc[1:, 0::2]) # dim 2i
pos_enc[1:, 1::2] = np.cos(pos_enc[1:, 1::2]) # dim 2i+1
return pos_enc
def GetPadMask(q, k):
ones = K.expand_dims(K.ones_like(q, 'float32'), -1)
mask = K.cast(K.expand_dims(K.not_equal(k, 0), 1), 'float32')
mask = K.batch_dot(ones, mask, axes=[2,1])
return mask
def GetSubMask(s):
len_s = tf.shape(s)[1]
bs = tf.shape(s)[:1]
mask = K.cumsum(tf.eye(len_s, batch_shape=bs), 1)
return mask
class Transformer():
def __init__(self, len_limit, embedding_matrix, d_model=embed_size, \
d_inner_hid=512, n_head=10, d_k=64, d_v=64, layers=2, dropout=0.1, \
share_word_emb=False, **kwargs):
self.name = 'Transformer'
self.len_limit = len_limit
self.src_loc_info = False # True # sl: fix later
self.d_model = d_model
self.decode_model = None
d_emb = d_model
pos_emb = Embedding(len_limit, d_emb, trainable=False, \
weights=[GetPosEncodingMatrix(len_limit, d_emb)])
i_word_emb = Embedding(max_features, d_emb, weights=[embedding_matrix]) # Add Kaggle provided embedding here
self.encoder = Encoder(d_model, d_inner_hid, n_head, d_k, d_v, layers, dropout, \
word_emb=i_word_emb, pos_emb=pos_emb)
def get_pos_seq(self, x):
mask = K.cast(K.not_equal(x, 0), 'int32')
pos = K.cumsum(K.ones_like(x, 'int32'), 1)
return pos * mask
def compile(self, active_layers=999):
src_seq_input = Input(shape=(None, ))
x = Embedding(max_features, embed_size, weights=[embedding_matrix])(src_seq_input)
# LSTM before attention layers
x = Bidirectional(LSTM(128, return_sequences=True))(x)
x = Bidirectional(LSTM(64, return_sequences=True))(x)
x, slf_attn = MultiHeadAttention(n_head=3, d_model=300, d_k=64, d_v=64, dropout=0.1)(x, x, x)
avg_pool = GlobalAveragePooling1D()(x)
max_pool = GlobalMaxPooling1D()(x)
conc = concatenate([avg_pool, max_pool])
conc = Dense(64, activation="relu")(conc)
x = Dense(1, activation="sigmoid")(conc)
self.model = Model(inputs=src_seq_input, outputs=x)
self.model.compile(optimizer = 'adam', loss = 'mean_squared_error', metrics=['accuracy'])
If you look at where K is being used you will see:
K.expand_dims
K.cumsum
K.batch_dot
These are Keras backend functions. The code is missing a from keras import backend as K, which I think is a standard abbreviation.

scipy matrix to numpy array in classification task

I have X_train data (class 'pandas.core.series.Series') with content
print(X_train)
0 WASHINGTON — Congressional Republicans have...
1 After the bullet shells get counted, the blood...
2 When Walt Disney’s “Bambi” opened in 1942, cri...
3 Death may be the great equalizer, but it isn’t...
4 SEOUL, South Korea — North Korea’s leader, ...
then I want to prepare data for classification:
count_vect = CountVectorizer()
X_train_counts = count_vect.fit_transform(X_train)
tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
and X_train_tfidf and X_train_counts now is (class 'scipy.sparse.csr.csr_matrix')
But in my Logistic Regression function I can operate with numpy arrays. What should I do to fix it?
class LogisticRegression2:
def __init__(self, lr=0.01, num_iter=100000, fit_intercept=True, theta=0, verbose=False):
self.lr = lr
self.num_iter = num_iter
self.fit_intercept = fit_intercept
self.theta = theta
self.verbose = verbose
def __add_intercept(self, X):
intercept = np.ones((X.shape[0], 1))
return np.concatenate((intercept, X), axis=1)
def __sigmoid(self, z):
return 1 / (1 + np.exp(-z))
#return .5 * (1 + np.tanh(.5 * z))
def __loss(self, h, y):
return (-y * np.log(h) - (1 - y) * np.log(1 - h)).mean()
def fit(self, X, y):
if self.fit_intercept:
X = self.__add_intercept(X)
# weights initialization
self.theta = np.zeros(X.shape[1])
for i in range(self.num_iter):
z = np.dot(X, self.theta)
h = self.__sigmoid(z)
gradient = np.dot(X.T, (h - y)) / y.size
self.theta -= self.lr * gradient
if(self.verbose == True and i % 10000 == 0):
z = np.dot(X, self.theta)
h = self.__sigmoid(z)
print('loss: ', self.__loss(h, y))
def predict_prob(self, X):
if self.fit_intercept:
X = self.__add_intercept(X)
return self.__sigmoid(np.dot(X, self.theta))
def predict(self, X, threshold=0.5):
return self.predict_prob(X) >= threshold
If I use
X_train_dense = X_train_tfidf.toarray()
model = LogisticRegression2(lr=0.1, num_iter=100)
model.fit(X_train_dense, y_train)
preds = model.predict(X_train_dense)
I have have TypeError: unsupported operand type(s) for -: 'float' and 'str'
in
`gradient = np.dot(X.T, (h - y)) / y.size`
If i try
def __add_intercept(self, X):
intercept = np.ones((X.shape[0], 1))
return hstack((intercept, X))
I have memory error

Categories

Resources