Related
I am working on an implementation of MAML (see https://arxiv.org/pdf/1703.03400.pdf) in Jax.
When training on a distribution of simple linear regression tasks it seems to perform fine (takes a while to converge but ultimately works).
However when training on a tasks distributed like A * sin(B + X) where A, B are random variables all the weights in the network converge to 0. training results
This is clearly not right.
Thanks in advance for any help provided.
Full code here https://colab.research.google.com/drive/1YoOkwo5tI42LeIbBOxpImkN55Kg9wScl?usp=sharing or see below for minimal code.
Task Generation code:
class MAMLDataLoader:
def __init__(self, sample_task_fn, num_tasks, batch_size):
self.sample_task_fn = sample_task_fn
self.num_tasks = num_tasks
self.batch_size = batch_size
def sample_tasks(self, key):
XS = jnp.empty((self.num_tasks, 2 * self.batch_size, 1))
YS = jnp.empty((self.num_tasks, 2 * self.batch_size, 1))
for i in range(self.num_tasks):
key, subkey = random.split(key)
xs, ys = self.sample_task_fn(self.batch_size * 2, subkey)
XS = XS.at[i].set(xs)
YS = YS.at[i].set(ys)
x_train, x_test = XS[:, :self.batch_size], XS[:, self.batch_size:]
y_train, y_test = YS[:, :self.batch_size], YS[:, self.batch_size:]
return x_train, y_train, x_test, y_test
def dummy_input(self):
key = random.PRNGKey(0)
x = self.sample_task_fn(1, key)[0][0]
return x
def sample_sinusoidal_task(samples, key):
# y = a * sin(b + x)
xs_key, amplitude_key, phase_key = random.split(key, num=3)
amplitude = random.uniform(amplitude_key, (1, 1))
phase = random.uniform(phase_key, (1, 1)) * jnp.pi * 2
xs = (random.uniform(xs_key, (samples, 1)) * 4 - 2) * jnp.pi
ys = amplitude * jnp.sin(xs + phase)
return xs, ys
Here is the main MAML code:
class MAMLTrainer:
def __init__(self, model, alpha, optimiser, inner_steps=1):
self.model = model
self.alpha = alpha
self.optimiser = optimiser
self.inner_steps = inner_steps
self.jit_step = jit(self.step)
def loss(self, params, x, y):
preds = self.model.apply(params, x)
return jnp.mean(jnp.inner(y - preds, y - preds) / 2.0)
def update(self, params, x, y, inner_steps=None):
if inner_steps is None:
inner_steps = self.inner_steps
loss_grad = grad(self.loss)
def _update(i, params):
grads = loss_grad(params, x, y)
new_params = tree_map(lambda p, g: p - self.alpha * g, params, grads)
return new_params
return lax.fori_loop(0, inner_steps, _update, params)
def meta_loss(self, params, x1, y1, x2, y2):
return self.loss(self.update(params, x1, x2), x2, y2)
def batch_meta_loss(self, params, x1, y1, x2, y2):
return jnp.mean(vmap(partial(self.meta_loss, params))(x1, y1, x2, y2))
def step(self, params, optimiser, x1, y1, x2, y2):
loss, grads = value_and_grad(self.batch_meta_loss)(params, x1, y1, x2, y2)
updates, opt_state = self.optimiser.update(grads, optimiser, params)
params = optax.apply_updates(params, updates)
return params, loss
def train(self, dataloader, steps, key, params=None):
if params is None:
key, subkey = random.split(key)
params = self.model.init(subkey, dataloader.dummy_input())
optimiser = self.optimiser.init(params)
pbar, losses = tqdm(range(steps), desc='Training'), []
for epoch in pbar:
key, subkey = random.split(key)
params, loss = self.jit_step(params, optimiser, *dataloader.sample_tasks(subkey))
losses.append(loss)
if epoch % 100 == 0:
avg_loss = jnp.mean(jnp.array(losses[-100:]))
pbar.set_postfix_str(f'current_loss: {loss:.3f}, running_loss_100_epochs: {avg_loss:.3f}')
return params, jnp.array(losses)
def n_shot_learn(self, x_train, y_train, params, n):
return self.update(params, x_train, y_train, n)
Training Code:
class SimpleMLP(nn.Module):
features: Sequence[int]
#nn.compact
def __call__(self, inputs):
x = inputs
for i, feat in enumerate(self.features[:-1]):
x = nn.Dense(feat)(x)
x = nn.relu(x)
return nn.Dense(self.features[-1])(x)
model = SimpleMLP([64, 64, 1])
optimiser = optax.adam(1e-3)
trainer = MAMLTrainer(model, 0.1, optimiser, 1)
dataloader = MAMLDataLoader(sample_sinusoidal_task, 2, 100)
key = random.PRNGKey(0)
params, losses = trainer.train(dataloader, 10000, key)
I ma trying to create a neural network and link it with my django project, when I call the model it gaves me this error message
AttributeError at /predict/
Can't get attribute 'artificial_neuron' on <module 'main' from 'C:\Users\DELL\Desktop\PFE_AYA\Disease_prediction\manage.py'>
then I realised that the model is empty inside when I pickle it.
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
import pandas as pd
data=pd.read_csv('DATABASE_SPREADSHEET_1 - Copie.csv', sep=';')
X=data.iloc[:,1:].values
y=data.iloc[:,0].values
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
y = y.reshape((y.shape[0], 1))
print('dimensions de X:', X.shape)
print('dimensions de y:', y.shape)
plt.scatter(X[:,0], X[:, 1], c=y, cmap='summer')
plt.show()
def initialisation(X):
W = np.random.randn(X.shape[1], 1)
b = np.random.randn(1)
return (W, b)
def model(X, W, b):
Z = X.dot(W) + b
A = 1 / (1 + np.exp(-Z))
return A
def log_loss(A, y):
return 1 / len(y) * np.sum(-y * np.log(A) - (1 - y) * np.log(1 - A))
def gradients(A, X, y):
dW = 1 / len(y) * np.dot(X.T, A - y)
db = 1 / len(y) * np.sum(A - y)
return (dW, db)
def update(dW, db, W, b, learning_rate):
W = W - learning_rate * dW
b = b - learning_rate * db
return (W, b)
def predict(X, W, b):
A = model(X, W, b)
# print(A)
return A >= 0.5
from sklearn.metrics import accuracy_score
def artificial_neuron(X, y, learning_rate = 0.1, n_iter = 100):
# initialisation W, b
W, b = initialisation(X)
Loss = []
for i in range(n_iter):
A = model(X, W, b)
Loss.append(log_loss(A, y))
dW, db = gradients(A, X, y)
W, b = update(dW, db, W, b, learning_rate)
y_pred = predict(X, W, b)
print(accuracy_score(y, y_pred))
plt.plot(Loss)
plt.show()
return (W, b)
W, b = artificial_neuron(X, y)
fig, ax = plt.subplots(figsize=(9, 6))
ax.scatter(X[:,0], X[:, 1], c=y, cmap='summer')
x1 = np.linspace(-1, 4, 100)
x2 = ( - W[0] * x1 - b) / W[1]
ax.plot(x1, x2, c='orange', lw=3)
# Save Model Using Pickle
import pandas
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
import pickle
import pickle
# open a file, where you ant to store the data
file = open('model.pkl', 'wb')
# dump information to that file
pickle.dump(model, file)
Your model is a function without parameter, you should dump it with the parameters.
If you can use dill, you can dump all things in main.py
import dill
def foo():
return "function can be serialized by dill"
x = "Whatever you want to dump"
d = dict(vars())
with open("data.pkl", "wb") as f:
dill.dump(d, f)
import dill
with open("data.pkl", "rb") as f:
d = dill.load(f)
for k, v in d.items():
exec(f"{k} = v")
I tried to implement a class based convolutional neural network for face expression recognition data on kaggle using tensorflow. However, for some reason my network does not train and I keep getting the same cost and error rates at each iteration.
I tried using one hot vectors for labels, changing hyperparameters but they did not have any effect on the result.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.utils import shuffle
def get_data():
df = pd.read_csv('../large_files/fer2013/fer2013.csv')
Y = df.emotion.to_numpy()
XX = df.pixels
X = []
for i in range(len(XX)):
X.append(XX[i].split())
X = np.array(X).astype(np.float)
Z = df.Usage
train = (Z == 'Training').to_list()
test = [not i for i in train]
Xtrain = X[train].astype(np.float32)
Xtrain = Xtrain.reshape((Xtrain.shape[0], int(np.sqrt(Xtrain.shape[1])), int(np.sqrt(Xtrain.shape[1])), 1))
Xtest = X[test].astype(np.float32)
Xtest = Xtest.reshape((Xtest.shape[0], int(np.sqrt(Xtest.shape[1])), int(np.sqrt(Xtest.shape[1])), 1))
Ytrain = Y[train].astype(np.int32)
Ytest = Y[test].astype(np.int32)
return Xtrain / 255, Xtest / 255, Ytrain, Ytest
def convpool(X, W, b,poolsz):
conv_out = tf.nn.conv2d(X, W, strides = [1,1,1,1], padding = 'SAME')
conv_out = tf.nn.bias_add(conv_out, b)
pool_out = tf.nn.max_pool(conv_out, ksize=[1,poolsz,poolsz,1], strides=[1,poolsz,poolsz,1], padding = 'SAME')
return tf.nn.relu(pool_out)
def init_filter(shape):
w = np.random.rand(*shape) * np.sqrt(2 / np.prod(shape[:-1]))
return w.astype(np.float32)
def error_rate(Y,T):
return np.mean(Y != T)
class FullyConnectedLayer():
def __init__(self, M1, M2, activation = tf.nn.relu):
W = np.random.randn(M1,M2) / np.sqrt(M1 + M2)
self.W = tf.Variable(W.astype(np.float32))
b = np.zeros(M2)
self.b = tf.Variable(b.astype(np.float32))
self.activation = activation
def forward(self, X):
if self.activation == None:
return tf.matmul(X, self.W) + self.b
else:
return self.activation(tf.matmul(X, self.W) + self.b)
class ConvolutionLayer():
def __init__(self, filter_shape, b, poolsz = 2):
W = init_filter(filter_shape)
self.W = tf.Variable(W)
self.b = tf.Variable(b.astype(np.float32))
self.poolsize = poolsz
def forward(self, X):
return convpool(X, self.W, self.b, self.poolsize)
class CNN():
def __init__(self, filter_shapes, dense_layer_sizes):
self.filter_shapes = filter_shapes #List of shapes
self.dense_layer_sizes = dense_layer_sizes # List of hidden units for dense layers
def fit(self, trainset, testset, learning_rate = 0.001, momentum = 0.9, decay = 0.99, batch_sz = 200, poolsize = 2):
learning_rate = np.float32(learning_rate)
momentum = np.float32(momentum)
decay = np.float32(decay)
Xtrain = trainset[0]
Ytrain = trainset[1]
Xtest = testset[0]
Ytest = testset[1]
K = len(set(Ytrain))
# Crop Train and Test sets for divisibility to batch size
Ntrain = len(Ytrain)
Ntrain = Ntrain // batch_sz * batch_sz
Xtrain = Xtrain[:Ntrain,]
Ytrain = Ytrain[:Ntrain]
Ntest = len(Ytest)
Ntest = Ntest//batch_sz * batch_sz
Xtest = Xtest[:Ntest,]
Ytest = Ytest[:Ntest]
X_shape = Xtrain.shape
width = X_shape[1]
height = X_shape[2]
# Create Convolution Layers and Store Them
self.convolutionlayers = []
for shape in self.filter_shapes:
b = np.zeros(shape[-1], dtype = np.float32)
conv = ConvolutionLayer(shape, b, poolsz = poolsize)
self.convolutionlayers.append(conv)
# Size of both width and height is halved in each max pooling so in input size of first fully connected layer is found like this
final_filter_shape = self.filter_shapes[-1]
num_convs = len(self.convolutionlayers)
M1 = int((width/(2**num_convs)) * (height/(2**num_convs)) * final_filter_shape[-1])
# Create Fully Connected Layers and Store Them
self.vanillalayers = []
for M2 in self.dense_layer_sizes:
layer = FullyConnectedLayer(M1,M2)
self.vanillalayers.append(layer)
M1 = M2
final_layer = FullyConnectedLayer(M1, K, activation = None)
self.vanillalayers.append(final_layer)
self.AllLayers = self.convolutionlayers + self.vanillalayers
tfX = tf.placeholder(dtype=tf.float32, shape= (batch_sz, width, height, 1))
tfT = tf.placeholder(dtype=tf.int32, shape = (batch_sz,))
Yish = self.forward(tfX)
cost = tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(logits = Yish, labels=tfT))
train_op = tf.train.RMSPropOptimizer(learning_rate=learning_rate, decay=decay, momentum=momentum).minimize(cost)
predict_op = self.predict(tfX)
max_epoch = 10
print_period = 20
num_batches = Ntrain // batch_sz
TestCosts = []
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for i in range(max_epoch):
Xtrain, Ytrain = shuffle(Xtrain, Ytrain)
for j in range(num_batches):
Xbatch = Xtrain[j * batch_sz: (j + 1)*batch_sz,]
Ybatch = Ytrain[j * batch_sz: (j + 1)*batch_sz,]
sess.run(train_op, feed_dict = {tfX : Xbatch, tfT : Ybatch})
if j % print_period == 0:
test_cost = 0
prediction = np.zeros(Ntest)
for k in range(Ntest // batch_sz):
Xtestbatch = Xtest[k*batch_sz:(k*batch_sz + batch_sz),]
Ytestbatch = Ytest[k*batch_sz:(k*batch_sz + batch_sz),]
test_cost += sess.run(cost, feed_dict={tfX: Xtestbatch, tfT: Ytestbatch})
prediction[k*batch_sz:(k*batch_sz + batch_sz)] = sess.run(
predict_op, feed_dict={tfX: Xtestbatch})
err = error_rate(prediction, Ytest)
print("Cost / err at iteration i=%d, j=%d: %.3f / %.3f" % (i, j, test_cost, err))
TestCosts.append(test_cost)
plt.plot(TestCosts)
plt.show()
def forward(self, X):
Z = X
count = 0
for layer in self.AllLayers:
# If next layer is fully connected layer, reshape Z
if count >= len(self.convolutionlayers):
Z_shape = Z.get_shape().as_list()
Z = tf.reshape(Z, [Z_shape[0], np.prod(Z_shape[1:])])
Z = layer.forward(Z)
count += 1
return Z
def predict(self, X):
out = self.forward(X)
return tf.math.argmax(out, axis = 1)
def main():
Xtrain, Xtest, Ytrain, Ytest = get_data()
trainset = [Xtrain, Ytrain]
testset = [Xtest, Ytest]
filtershapes = [(5,5,1,10), (5,5,10,20), (5,5,20,40)]
fullylayers = [500,500]
cnn = CNN(filtershapes, fullylayers)
cnn.fit(trainset, testset)
if __name__ == '__main__':
main()
I have X_train data (class 'pandas.core.series.Series') with content
print(X_train)
0 WASHINGTON — Congressional Republicans have...
1 After the bullet shells get counted, the blood...
2 When Walt Disney’s “Bambi” opened in 1942, cri...
3 Death may be the great equalizer, but it isn’t...
4 SEOUL, South Korea — North Korea’s leader, ...
then I want to prepare data for classification:
count_vect = CountVectorizer()
X_train_counts = count_vect.fit_transform(X_train)
tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
and X_train_tfidf and X_train_counts now is (class 'scipy.sparse.csr.csr_matrix')
But in my Logistic Regression function I can operate with numpy arrays. What should I do to fix it?
class LogisticRegression2:
def __init__(self, lr=0.01, num_iter=100000, fit_intercept=True, theta=0, verbose=False):
self.lr = lr
self.num_iter = num_iter
self.fit_intercept = fit_intercept
self.theta = theta
self.verbose = verbose
def __add_intercept(self, X):
intercept = np.ones((X.shape[0], 1))
return np.concatenate((intercept, X), axis=1)
def __sigmoid(self, z):
return 1 / (1 + np.exp(-z))
#return .5 * (1 + np.tanh(.5 * z))
def __loss(self, h, y):
return (-y * np.log(h) - (1 - y) * np.log(1 - h)).mean()
def fit(self, X, y):
if self.fit_intercept:
X = self.__add_intercept(X)
# weights initialization
self.theta = np.zeros(X.shape[1])
for i in range(self.num_iter):
z = np.dot(X, self.theta)
h = self.__sigmoid(z)
gradient = np.dot(X.T, (h - y)) / y.size
self.theta -= self.lr * gradient
if(self.verbose == True and i % 10000 == 0):
z = np.dot(X, self.theta)
h = self.__sigmoid(z)
print('loss: ', self.__loss(h, y))
def predict_prob(self, X):
if self.fit_intercept:
X = self.__add_intercept(X)
return self.__sigmoid(np.dot(X, self.theta))
def predict(self, X, threshold=0.5):
return self.predict_prob(X) >= threshold
If I use
X_train_dense = X_train_tfidf.toarray()
model = LogisticRegression2(lr=0.1, num_iter=100)
model.fit(X_train_dense, y_train)
preds = model.predict(X_train_dense)
I have have TypeError: unsupported operand type(s) for -: 'float' and 'str'
in
`gradient = np.dot(X.T, (h - y)) / y.size`
If i try
def __add_intercept(self, X):
intercept = np.ones((X.shape[0], 1))
return hstack((intercept, X))
I have memory error
I am trying to run below mentioned code, taking from https://github.com/stephencwelch/Neural-Networks-Demystified.
import numpy as np
%pylab inline
X = np.array(([3,5], [5,1], [10,2]), dtype=float)
y = np.array(([75], [82], [93]), dtype=float)
X = X/np.amax(X, axis=0)
y = y/100 #Max test score is 100
class Neural_Network(object):
def __init__(self):
#Define Hyperparameters
self.inputLayerSize = 2
self.outputLayerSize = 1
self.hiddenLayerSize = 3
self.W1 = np.random.randn(self.inputLayerSize, self.hiddenLayerSize)
self.W2 = np.random.randn(self.hiddenLayerSize,self.outputLayerSize)
def forward(self, X):
self.z2 = np.dot(X, self.W1)
self.a2 = self.sigmoid(self.z2)
self.z3 = np.dot(self.a2, self.W2)
yHat = self.sigmoid(self.z3)
return yHat
def sigmoid(self, z):
return 1/(1+np.exp(-z))
def sigmoidPrime(self,z):
return np.exp(-z)/((1+np.exp(-z))**2)
def costFunction(self, X, y):
self.yHat = self.forward(X)
J = 0.5*sum((y-self.yHat)**2)
return J
def costFunctionPrime(self, X, y):
self.yHat = self.forward(X)
delta3 = np.multiply(-(y-self.yHat), self.sigmoidPrime(self.z3))
dJdW2 = np.dot(self.a2.T, delta3)
delta2 = np.dot(delta3, self.W2.T)*self.sigmoidPrime(self.z2)
dJdW1 = np.dot(X.T, delta2)
return dJdW1, dJdW2
def getParams(self):
#Get W1 and W2 unrolled into vector:
params = np.concatenate((self.W1.ravel(), self.W2.ravel()))
return params
def setParams(self, params):
W1_start = 0
W1_end = self.hiddenLayerSize * self.inputLayerSize
self.W1 = np.reshape(params[W1_start:W1_end], (self.inputLayerSize , self.hiddenLayerSize))
W2_end = W1_end + self.hiddenLayerSize*self.outputLayerSize
self.W2 = np.reshape(params[W1_end:W2_end], (self.hiddenLayerSize, self.outputLayerSize))
def computeGradients(self, X, y):
dJdW1, dJdW2 = self.costFunctionPrime(X, y)
return np.concatenate((dJdW1.ravel(), dJdW2.ravel()))
def computeNumericalGradient(N, X, y):
paramsInitial = N.getParams()
numgrad = np.zeros(paramsInitial.shape)
perturb = np.zeros(paramsInitial.shape)
e = 1e-4
for p in range(len(paramsInitial)):
perturb[p] = e
N.setParams(paramsInitial + perturb)
loss2 = N.costFunction(X, y)
N.setParams(paramsInitial - perturb)
loss1 = N.costFunction(X, y)
numgrad[p] = (loss2 - loss1) / (2*e)
perturb[p] = 0
N.setParams(paramsInitial)
return numgrad
from scipy import optimize
class trainer(object):
def __init__(self, N):
self.N = N
def callbackF(self, params):
self.N.setParams(params)
self.J.append(self.N.costFunction(self.X, self.y))
def costFunctionWrapper(self, params, X, y):
self.N.setParams(params)
cost = self.N.costFunction(X, y)
grad = self.N.computeGradients(X,y)
return cost, grad
def train(self, X, y):
self.X = X
self.y = y
self.J = []
params0 = self.N.getParams()
options = {'maxiter': 200, 'disp' : True}
_res = optimize.minimize(self.costFunctionWrapper, params0, jac=True, method='BFGS', \
args=(X, y), options=options, callback=self.callbackF)
self.N.setParams(_res.x)
self.optimizationResults = _res
NN = Neural_Network()
T = trainer(NN)
T.train(X,y)
but I am having this error:
Traceback (most recent call last):
File "<ipython-input-50-6b098e89c488>", line 3, in <module>
T.train(X,y)
AttributeError: 'trainer' object has no attribute 'train'
Hence, I am wondering why the attribute "train" is not defined?
I am using Spyder (Python 2.7)