Given data in the form x, y such that y = A sin(B(x) + C) + D, identify A, B, C, and D using Tensorflow.
I have written the following code to do so, but unfortunately it does not learn. Note here the problem is not to predict the sine curve correctly, but to identify the variables. Bonus points if it is possible to change the function's form to y = A * X_2 * sin (B(X_1) + C) + D.
x = np.linspace(0, 100, 1000)
A = np.random.normal(1)
B = np.random.normal(.5)
C = np.random.normal(1)
D = np.random.normal(1)
y = A*np.sin((B*x) + C) + D
x = tf.constant([x.astype('float32')])
y = tf.constant([y.astype('float32')])
class Addition(tf.Module):
def __init__(self, inputs, name=None):
super().__init__(name=name)
self.b_1 = tf.Variable(tf.random.normal([inputs]), name='b1')
self.b_2 = tf.Variable(tf.random.normal([inputs]), name='b2')
def __call__(self, x):
out = tf.math.multiply(x, self.b_1) + self.b_2
return out
class Sinusoid(tf.Module):
def __init__(self, inputs, name=None):
super().__init__(name=name)
def __call__(self, x):
sine = tf.math.sin(x)
return sine
class Sine_Model(tf.Module):
def __init__(self, name=None):
super().__init__(name=name)
self.add_1 = Addition(inputs=1)
self.sin_1 = Sinusoid(inputs=1)
self.add_2 = Addition(inputs=1)
def __call__(self, x):
x = self.add_1(x)
x = self.sin_1(x)
x = self.add_2(x)
return x
model = Sine_Model(name='sine')
loss_object = tf.keras.losses.MeanSquaredError()
optimizer = tf.keras.optimizers.Adam(learning_rate=.1)
train_loss = tf.keras.metrics.Mean(name='train_loss')
#tf.function
def train_step(x, y):
with tf.GradientTape() as tape:
predictions = model(x)
loss = loss_object(y, predictions)
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
train_loss(loss)
EPOCHS = 200
for epoch in range(EPOCHS):
# Reset the metrics at the start of the next epoch
train_loss.reset_states()
train_step(x, y)
template = 'Epoch {}, Loss: {}'
#print(template.format(epoch + 1,
# train_loss.result()))
y_predicted = sine_model(x)
plt.scatter(x, y_predicted.numpy()[0])
plt.scatter(x, y, c='r')
I did see an answer to this question using scipy here. But I would like to see if it is possible to do using Tensorflow specifically, as I am interested in modularity and would like to be able to solve the problem noted as a bonus above (y = A * X_2 * sin (B(X_1) + C) + D).
Thanks!
Related
I am working on an implementation of MAML (see https://arxiv.org/pdf/1703.03400.pdf) in Jax.
When training on a distribution of simple linear regression tasks it seems to perform fine (takes a while to converge but ultimately works).
However when training on a tasks distributed like A * sin(B + X) where A, B are random variables all the weights in the network converge to 0. training results
This is clearly not right.
Thanks in advance for any help provided.
Full code here https://colab.research.google.com/drive/1YoOkwo5tI42LeIbBOxpImkN55Kg9wScl?usp=sharing or see below for minimal code.
Task Generation code:
class MAMLDataLoader:
def __init__(self, sample_task_fn, num_tasks, batch_size):
self.sample_task_fn = sample_task_fn
self.num_tasks = num_tasks
self.batch_size = batch_size
def sample_tasks(self, key):
XS = jnp.empty((self.num_tasks, 2 * self.batch_size, 1))
YS = jnp.empty((self.num_tasks, 2 * self.batch_size, 1))
for i in range(self.num_tasks):
key, subkey = random.split(key)
xs, ys = self.sample_task_fn(self.batch_size * 2, subkey)
XS = XS.at[i].set(xs)
YS = YS.at[i].set(ys)
x_train, x_test = XS[:, :self.batch_size], XS[:, self.batch_size:]
y_train, y_test = YS[:, :self.batch_size], YS[:, self.batch_size:]
return x_train, y_train, x_test, y_test
def dummy_input(self):
key = random.PRNGKey(0)
x = self.sample_task_fn(1, key)[0][0]
return x
def sample_sinusoidal_task(samples, key):
# y = a * sin(b + x)
xs_key, amplitude_key, phase_key = random.split(key, num=3)
amplitude = random.uniform(amplitude_key, (1, 1))
phase = random.uniform(phase_key, (1, 1)) * jnp.pi * 2
xs = (random.uniform(xs_key, (samples, 1)) * 4 - 2) * jnp.pi
ys = amplitude * jnp.sin(xs + phase)
return xs, ys
Here is the main MAML code:
class MAMLTrainer:
def __init__(self, model, alpha, optimiser, inner_steps=1):
self.model = model
self.alpha = alpha
self.optimiser = optimiser
self.inner_steps = inner_steps
self.jit_step = jit(self.step)
def loss(self, params, x, y):
preds = self.model.apply(params, x)
return jnp.mean(jnp.inner(y - preds, y - preds) / 2.0)
def update(self, params, x, y, inner_steps=None):
if inner_steps is None:
inner_steps = self.inner_steps
loss_grad = grad(self.loss)
def _update(i, params):
grads = loss_grad(params, x, y)
new_params = tree_map(lambda p, g: p - self.alpha * g, params, grads)
return new_params
return lax.fori_loop(0, inner_steps, _update, params)
def meta_loss(self, params, x1, y1, x2, y2):
return self.loss(self.update(params, x1, x2), x2, y2)
def batch_meta_loss(self, params, x1, y1, x2, y2):
return jnp.mean(vmap(partial(self.meta_loss, params))(x1, y1, x2, y2))
def step(self, params, optimiser, x1, y1, x2, y2):
loss, grads = value_and_grad(self.batch_meta_loss)(params, x1, y1, x2, y2)
updates, opt_state = self.optimiser.update(grads, optimiser, params)
params = optax.apply_updates(params, updates)
return params, loss
def train(self, dataloader, steps, key, params=None):
if params is None:
key, subkey = random.split(key)
params = self.model.init(subkey, dataloader.dummy_input())
optimiser = self.optimiser.init(params)
pbar, losses = tqdm(range(steps), desc='Training'), []
for epoch in pbar:
key, subkey = random.split(key)
params, loss = self.jit_step(params, optimiser, *dataloader.sample_tasks(subkey))
losses.append(loss)
if epoch % 100 == 0:
avg_loss = jnp.mean(jnp.array(losses[-100:]))
pbar.set_postfix_str(f'current_loss: {loss:.3f}, running_loss_100_epochs: {avg_loss:.3f}')
return params, jnp.array(losses)
def n_shot_learn(self, x_train, y_train, params, n):
return self.update(params, x_train, y_train, n)
Training Code:
class SimpleMLP(nn.Module):
features: Sequence[int]
#nn.compact
def __call__(self, inputs):
x = inputs
for i, feat in enumerate(self.features[:-1]):
x = nn.Dense(feat)(x)
x = nn.relu(x)
return nn.Dense(self.features[-1])(x)
model = SimpleMLP([64, 64, 1])
optimiser = optax.adam(1e-3)
trainer = MAMLTrainer(model, 0.1, optimiser, 1)
dataloader = MAMLDataLoader(sample_sinusoidal_task, 2, 100)
key = random.PRNGKey(0)
params, losses = trainer.train(dataloader, 10000, key)
Hi I've been working on a neural network to tackle the MNIST dataset, but when I run the code the accuracy begins to increase but eventually results in 0.098 accuracy, I also encounter an overflow error in exp when calculating the SoftMax values. I have tried to debug my code but I don't understand where I'm going wrong. If anyone can point me in the right direction that would be great and if you can't find an error could you give me any tips on techniques to try to debug this. Thanks in advance.
import numpy as np
import pandas as pd
df = pd.read_csv('../input/digit-recognizer/train.csv')
data = np.array(df.values)
data = data.T
data
Y = data[0,:]
X = data[1:,:]
Y_train = Y[:41000]
X_train = X[:,:41000]
X_train = X_train/255
Y_val = Y[41000:]
X_val = X[:,41000:]
X_val = X_val/255
print(np.max(X_train))
class NeuralNetwork:
def __init__(self, n_in, n_out):
self.w1, self.b1 = self.Generate_Weights_Biases(10,784)
self.w2, self.b2 = self.Generate_Weights_Biases(10,10)
def Generate_Weights_Biases(self, n_in, n_out):
weights = 0.01*np.random.randn(n_in, n_out)
biases = np.zeros((n_in,1))
return weights, biases
def forward(self, X):
self.Z1 = self.w1.dot(X) + self.b1
self.a1 = self.ReLu(self.Z1)
self.z2 = self.w2.dot(self.a1) + self.b2
y_pred = self.Softmax(self.z2)
return y_pred
def ReLu(self, Z):
return np.maximum(Z,0)
def Softmax(self, Z):
#exponentials = np.exp(Z)
#sumexp = np.sum(np.exp(Z), axis=0)
#print(Z)
return np.exp(Z)/np.sum(np.exp(Z))
def ReLu_Derv(self, x):
return np.greaterthan(x, 0).astype(int)
def One_hot_encoding(self, Y):
one_hot = np.zeros((Y.size, 10))
rows = np.arange(Y.size)
one_hot[rows, Y] = 1
one_hot = one_hot.T
return one_hot
def Get_predictions(self, y_pred):
return np.argmax(y_pred, 0)
def accuracy(self, pred, Y):
return np.sum(pred == Y)/Y.size
def BackPropagation(self, X, Y, y_pred, lr=0.01):
m = Y.size
one_hot_y = self.One_hot_encoding(Y)
e2 = y_pred - one_hot_y
derW2 = (1/m)* e2.dot(self.a1.T)
derB2 =(1/m) * e2
#derB2 = derB2.reshape(10,1)
e1 = self.w2.T.dot(e2) * self.ReLu(self.a1)
derW1 = (1/m) * e1.dot(X.T)
derB1 = (1/m) * e1
#derB1 = derB1.reshape(10,1)
self.w1 = self.w1 - lr*derW1
self.b1 = self.b1 - lr*np.sum(derB1, axis=1, keepdims=True)
self.w2 = self.w2 - lr*derW2
self.b2 = self.b2 - lr*np.sum(derB2, axis=1, keepdims=True)
def train(self, X, Y, epochs = 1000):
for i in range(epochs):
y_pred = self.forward(X)
predict = self.Get_predictions(y_pred)
accuracy = self.accuracy(predict, Y)
print(accuracy)
self.BackPropagation(X, Y, y_pred)
return self.w1, self.b1, self.w2, self.b2
NN = NeuralNetwork(X_train, Y_train)
w1,b1,w2,b2 = NN.train(X_train,Y_train)
I found the following errors:
Your softmax implementation doesn't work because of terrific numeric errors you get exponentiating potentially large numbers to obtain something between 0 and 1. And besides, you forgot to specify the summation axis in the denominator. Here is a working implementation:
def Softmax(self, Z):
e = np.exp(Z - Z.max(axis=0, keepdims=True))
return e/e.sum(axis=0, keepdims=True)
(Here and below I skip coding-style remarks that are not essential in this context. Like that this should be a class method or a stand-alone function etc.)
Your ReLu derivative implementation doesn't work for me at all. May be I have a different numpy version. This one works:
def ReLu_Derv(self, x):
return (x > 0).astype(int)
You need to actually use this implementation in BackPropagation:
e1 = self.w2.T.dot(e2) * self.ReLu_Derv(self.a1)
With these amendments, I managed to achieve 91.0% accuracy after 100 iteration with LR=0.1. I loaded MNIST from Keras with this code:
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data()
X = train_images.reshape(-1, 28*28).T
Y = train_labels
Hi I'm trying to train my own designed neural network on the MNIST handwritten data set and every time I run this code the accuracy starts to increase then decreases and I get an overflow warning. Can someone explain whether my code is just poor and messy or whether I have just missed something little out. Thanks in advance
import numpy as np
import pandas as pd
df = pd.read_csv('../input/digit-recognizer/train.csv')
data = np.array(df.values)
data = data.T
data
Y = data[0,:]
X = data[1:,:]
Y_train = Y[:41000]
X_train = X[:,:41000]
X_train = X_train/255
Y_val = Y[41000:]
X_val = X[:,41000:]
X_val = X_val/255
print(np.max(X_train))
class NeuralNetwork:
def __init__(self, n_in, n_out):
self.w1, self.b1 = self.Generate_Weights_Biases(10,784)
self.w2, self.b2 = self.Generate_Weights_Biases(10,10)
def Generate_Weights_Biases(self, n_in, n_out):
weights = 0.01*np.random.randn(n_in, n_out)
biases = np.zeros((n_in,1))
return weights, biases
def forward(self, X):
self.Z1 = self.w1.dot(X) + self.b1
self.a1 = self.ReLu(self.Z1)
self.z2 = self.w2.dot(self.a1) + self.b1
y_pred = self.Softmax(self.z2)
return y_pred
def ReLu(self, Z):
return np.maximum(Z,0)
def Softmax(self, Z):
#exponentials = np.exp(Z)
#sumexp = np.sum(np.exp(Z), axis=0)
#print(Z)
return np.exp(Z)/np.sum(np.exp(Z))
def ReLu_Derv(self, x):
return np.greaterthan(x, 0).astype(int)
def One_hot_encoding(self, Y):
one_hot = np.zeros((Y.size, 10))
rows = np.arange(Y.size)
one_hot[rows, Y] = 1
one_hot = one_hot.T
return one_hot
def Get_predictions(self, y_pred):
return np.argmax(y_pred, 0)
def accuracy(self, pred, Y):
return np.sum(pred == Y)/Y.size
def BackPropagation(self, X, Y, y_pred, lr=0.01):
m = Y.size
one_hot_y = self.One_hot_encoding(Y)
e2 = y_pred - one_hot_y
derW2 = (1/m)* e2.dot(self.a1.T)
derB2 =(1/m) * np.sum(e2,axis=1)
derB2 = derB2.reshape(10,1)
e1 = self.w2.T.dot(e2) * self.ReLu(self.a1)
derW1 = (1/m) * e1.dot(X.T)
derB1 = (1/m) * np.sum(e1, axis=1)
derB1 = derB1.reshape(10,1)
self.w1 = self.w1 - lr*derW1
self.b1 = self.b1 - lr*derB1
self.w2 = self.w2 - lr*derW2
self.b2 = self.b2 - lr*derB2
def train(self, X, Y, epochs = 1000):
for i in range(epochs):
y_pred = self.forward(X)
predict = self.Get_predictions(y_pred)
accuracy = self.accuracy(predict, Y)
print(accuracy)
self.BackPropagation(X, Y, y_pred)
return self.w1, self.b1, self.w2, self.b2
NN = NeuralNetwork(X_train, Y_train)
w1,b1,w2,b2 = NN.train(X_train,Y_train)
You should use a different bias for the second layer
self.z2 = self.w2.dot(self.a1) + self.b1 # not b1
self.z2 = self.w2.dot(self.a1) + self.b2 # but b2
When doing something like this
derB2 =(1/m) * np.sum(e2,axis=1)
you would like to use (keepdims = True) to make sure that derB2.shape is (something,1) but not (something, ). It makes your code more rigorous.
I'm working on the signal compression and reconstruction with VAE. I've trained 1600 fragments but the values of 1600 reconstructed signals are very similar. Moreover, results from same batch are almost consistent. As using the VAE, loss function of the model contains binary cross entropy (BCE) and the output of the train model should be located between 0 to 1 (The input data also normalized to 0~1).
VAE model(LSTM) :
class LSTM_VAE(nn.Module):
def __init__(self,
input_size=3000,
hidden=[1024, 512, 256, 128, 64],
latent_size=64,
num_layers=8,
bidirectional=True):
super().__init__()
self.input_size = input_size
self.hidden = hidden
self.latent_size = latent_size
self.num_layers = num_layers
self.bidirectional = bidirectional
self.actv = nn.LeakyReLU()
self.encode = nn.LSTM(input_size=self.input_size,
hidden_size=self.hidden[0],
num_layers=self.num_layers,
batch_first=True,
bidirectional=True)
self.bn_encode = nn.BatchNorm1d(1)
self.decode = nn.LSTM(input_size=self.latent_size,
hidden_size=self.hidden[2],
num_layers=self.num_layers,
batch_first=True,
bidirectional=True)
self.bn_decode = nn.BatchNorm1d(1)
self.fc1 = nn.Linear(self.hidden[0]*2, self.hidden[1])
self.fc2 = nn.Linear(self.hidden[1], self.hidden[2])
self.fc31 = nn.Linear(self.hidden[2], self.latent_size)
self.fc32 = nn.Linear(self.hidden[2], self.latent_size)
self.bn1 = nn.BatchNorm1d(1)
self.bn2 = nn.BatchNorm1d(1)
self.bn3 = nn.BatchNorm1d(1)
self.fc4 = nn.Linear(self.hidden[2]*2, self.hidden[1])
self.fc5 = nn.Linear(self.hidden[1], self.hidden[0])
self.fc6 = nn.Linear(self.hidden[0], self.input_size)
self.bn4 = nn.BatchNorm1d(1)
self.bn5 = nn.BatchNorm1d(1)
self.bn6 = nn.BatchNorm1d(1)
def encoder(self, x):
x = torch.unsqueeze(x, 1)
x, _ = self.encode(x)
x = self.actv(x)
x = self.fc1(x)
x = self.actv(x)
x = self.fc2(x)
x = self.actv(x)
mu = self.fc31(x)
log_var = self.fc32(x)
return mu, log_var
def decoder(self, z):
z, _ = self.decode(z)
z = self.bn_decode(z)
z = self.actv(z)
z = self.fc4(z)
z = self.bn4(z)
z = self.fc5(z)
z = self.bn5(z)
z = self.fc6(z)
z = self.bn6(z)
z = torch.sigmoid(z)
return torch.squeeze(z)
def sampling(self, mu, log_var):
std = torch.exp(0.5 * log_var)
eps = torch.randn_like(std)
return mu + eps * std
def forward(self, x):
mu, log_var = self.encoder(x.view(-1, self.input_size))
z = self.sampling(mu, log_var)
z = self.decoder(z)
return z, mu, log_var
Loss function and Train code :
def lossF(recon_x, x, mu, logvar, input_size):
BCE = F.binary_cross_entropy(recon_x, x.view(-1, input_size), reduction='sum')
KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
return BCE + KLD
optim = torch.optim.Adam(model.parameters(), lr=opt.lr)
for epoch in range(opt.epoch):
for batch_idx, data in enumerate(train_set):
data = data.to(device)
optim.zero_grad()
recon_x, mu, logvar = model(data)
loss = lossF(recon_x, data, mu, logvar, opt.input_size)
loss.backward()
train_loss += loss.item()
optim.step()
I built the code by refer the example codes of others and only changed very few parameters. I rebuilt the code, change the dataset, update parameters but nothing worked. If you have any suggestion to solve this problem, PLEASE let me know.
I've find out the reason of the issue. It turns out that the decoder model derives output value in the range of 0.4 to 0.6 to stabilize the BCE loss. BCE loss can't be 0 even if the prediction is correct to answer. Also the loss value is non-linear to the range of the output. The easiest way to lower the loss is give 0.5 for the output, and my model did.
To avoid this error, I standardize my data and added some outlier data to avoid BCE issue. VAE is such complicated network for sure.
I have X_train data (class 'pandas.core.series.Series') with content
print(X_train)
0 WASHINGTON — Congressional Republicans have...
1 After the bullet shells get counted, the blood...
2 When Walt Disney’s “Bambi” opened in 1942, cri...
3 Death may be the great equalizer, but it isn’t...
4 SEOUL, South Korea — North Korea’s leader, ...
then I want to prepare data for classification:
count_vect = CountVectorizer()
X_train_counts = count_vect.fit_transform(X_train)
tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
and X_train_tfidf and X_train_counts now is (class 'scipy.sparse.csr.csr_matrix')
But in my Logistic Regression function I can operate with numpy arrays. What should I do to fix it?
class LogisticRegression2:
def __init__(self, lr=0.01, num_iter=100000, fit_intercept=True, theta=0, verbose=False):
self.lr = lr
self.num_iter = num_iter
self.fit_intercept = fit_intercept
self.theta = theta
self.verbose = verbose
def __add_intercept(self, X):
intercept = np.ones((X.shape[0], 1))
return np.concatenate((intercept, X), axis=1)
def __sigmoid(self, z):
return 1 / (1 + np.exp(-z))
#return .5 * (1 + np.tanh(.5 * z))
def __loss(self, h, y):
return (-y * np.log(h) - (1 - y) * np.log(1 - h)).mean()
def fit(self, X, y):
if self.fit_intercept:
X = self.__add_intercept(X)
# weights initialization
self.theta = np.zeros(X.shape[1])
for i in range(self.num_iter):
z = np.dot(X, self.theta)
h = self.__sigmoid(z)
gradient = np.dot(X.T, (h - y)) / y.size
self.theta -= self.lr * gradient
if(self.verbose == True and i % 10000 == 0):
z = np.dot(X, self.theta)
h = self.__sigmoid(z)
print('loss: ', self.__loss(h, y))
def predict_prob(self, X):
if self.fit_intercept:
X = self.__add_intercept(X)
return self.__sigmoid(np.dot(X, self.theta))
def predict(self, X, threshold=0.5):
return self.predict_prob(X) >= threshold
If I use
X_train_dense = X_train_tfidf.toarray()
model = LogisticRegression2(lr=0.1, num_iter=100)
model.fit(X_train_dense, y_train)
preds = model.predict(X_train_dense)
I have have TypeError: unsupported operand type(s) for -: 'float' and 'str'
in
`gradient = np.dot(X.T, (h - y)) / y.size`
If i try
def __add_intercept(self, X):
intercept = np.ones((X.shape[0], 1))
return hstack((intercept, X))
I have memory error