Multiple input linear regression cannot predict outputs of large variance - python

I succeeded to build a linear regression neural network with 1 inputs and 1 outputs.
I am building a linear regression neural network with 5 inputs and 1 outputs now.
Here is the formula:
y = 3e + d^2 + 9c + 11b^6 + a + 19
However, no matter how many neurons, epochs and hidden layers I use, I cannot predict the a good result.
The predicted outputs are always within a small range. However, there are large variance among the expected outputs.
Predicted output vs Expected output
I guess it may be because of the choice of activation function, loss function and optimizer.
If not, multiple input neural network may need alternative method to build.
Here is my code:
import torch
import torch.nn as nn #neural network model
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch.nn.functional as F
from torch.autograd import Variable
from sklearn.preprocessing import MinMaxScaler
from pickle import dump
#Load datasets
dataset = pd.read_csv('testB_200.csv')
X = dataset.iloc[:, :-1].values
Y = dataset.iloc[:, -1:].values
X_scaler = MinMaxScaler()
Y_scaler = MinMaxScaler()
print(X_scaler.fit(X))
print(Y_scaler.fit(Y))
X = X_scaler.transform(X)
Y = Y_scaler.transform(Y)
#save the scaler
dump(X_scaler, open('X_scaler.pkl', 'wb'))
dump(Y_scaler, open('Y_scaler.pkl', 'wb'))
train = int((len(dataset)+1)*0.8)
test = train + 1
print(train)
print(test)
x_temp_train = X[:train]
y_temp_train = Y[:train]
x_temp_test = X[test:]
y_temp_test = Y[test:]
X_train = torch.FloatTensor(x_temp_train)
Y_train = torch.FloatTensor(y_temp_train)
X_test = torch.FloatTensor(x_temp_test)
Y_test = torch.FloatTensor(y_temp_test)
D_in = 5 # D_in is input features
H = 12 # H is hidden dimension
H2 =8
H3 =4
D_out = 1 # D_out is output features.
#Define a Artifical Neural Network model
class Net(nn.Module):
#------------------3 hidden Layers------------------------------
def __init__(self, D_in, H, H2, H3, D_out):
super(Net, self).__init__()
self.linear1 = nn.Linear(D_in, H)
self.linear2 = nn.Linear(H, H2)
self.linear3 = nn.Linear(H2, H3)
self.linear4 = nn.Linear(H3, D_out)
def forward(self, x):
#activation function should be used here e.g: hidden = F.relu(...)
h_relu = self.linear1(x).clamp(min=0) #min=0 is like ReLU
middle = self.linear2(h_relu).clamp(min=0)
middle2 = self.linear3(middle).clamp(min=0)
prediction = self.linear4(middle2)
return prediction
model = Net(D_in, H, H2, H3, D_out)
print(model)
#Define a Loss function and optimizer
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.2) #2e-7, lr=learning rate=0.2
#Training model
inputs = Variable(X_train)
outputs = Variable(Y_train)
inputs_val = Variable(X_test)
outputs_val = Variable(Y_test)
loss_values = []
val_values = []
epoch = []
epoch_value=25
for i in range(epoch_value):
for phase in ['train', 'val']:
if phase == 'train':
#print('train loss')
model.train() # Set model to training mode
prediction = model(inputs)
loss = criterion(prediction, outputs)
#print(loss)
loss_values.append(loss.item())
optimizer.zero_grad() #zero the parameter gradients
epoch.append(i)
loss.backward() #compute gradients(dloss/dx)
optimizer.step() #updates the parameters
elif phase == 'val':
#print('validation loss')
model.eval() # Set model to evaluate mode
prediction_val = model(inputs_val)
loss_val = criterion(prediction_val, outputs_val)
#print(loss_val)
val_values.append(loss_val.item())
optimizer.zero_grad() #zero the parameter gradients
torch.save(model.state_dict(), 'formula2.pth') #save model
#Plot train_loss vs validation loss
plt.plot(epoch,loss_values)
plt.plot(epoch, val_values)
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train','validation'], loc='upper left')
plt.show()
#plot prediciton vs expected value
prediction_val = prediction_val.detach().numpy()
prediction_val = Y_scaler.inverse_transform(prediction_val)
#print('predict')
#print(prediction_val)
Y_test = Y_scaler.inverse_transform(Y_test)
#print('test')
#print(Y_test)
plt.plot(Y_test)
plt.plot(prediction_val)
plt.legend(['expected','predict'], loc='upper left')
plt.show()
Model Loss vs Validation Loss
Validation vs Expected outputs
Thanks for your time.

Related

How do I retrieve the weights estimated from a neural net using skorch?

I've trained a simple neural net using skorch to make it sklearn compatible and I would like to know how to retrieve the actual estimated weights.
Here's a replicable example of what I need.
The neural net presented here uses 10 features, has one hidden layer of 2 nodes, uses ReLu activation functions and linearly combines the output of the 2 nodes.
import torch
import numpy as np
from torch.autograd import Variable
# Create example data
np.random.seed(2022)
train_size = 1000
n_features= 10
X_train = np.random.rand(n_features, train_size).astype("float32")
l2_params_1 = np.random.rand(1,n_features).astype("float32")
l2_params_2 = np.random.rand(1,n_features).astype("float32")
l1_X = np.matmul(l2_params_1, X_train)
l2_X = np.matmul(l2_params_2, X_train)
y_train = l1_X + l2_X
# Defining my NN
class NNModule(torch.nn.Module):
def __init__(self, in_features):
super(NNModule, self).__init__()
self.l1 = torch.nn.Linear(in_features, 2)
self.a1 = torch.nn.ReLU()
self.l2 = torch.nn.Linear(2, 1)
def forward(self, x):
x = self.l1(x)
x = self.a1(x)
return self.l2(x)
# Initialize the NN
torch.manual_seed(200)
model = NNModule(in_features = 10)
model.l1.weight.data.uniform_(0.0, 1.0)
model.l1.bias.data.uniform_(0.0, 1.0)
# Define criterion and optimizer
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
# Train the NN
torch.manual_seed(200)
for epoch in range(100):
inputs = Variable(torch.from_numpy(np.transpose(X_train)))
labels = Variable(torch.from_numpy(np.transpose(y_train)))
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
The parameters at which I'm arriving are the following:
list(model.parameters())
[Output]:
[Parameter containing:
tensor([[0.8997, 0.8345, 0.8284, 0.6950, 0.5949, 0.1217, 0.9067, 0.1824, 0.8272,
0.2372],
[0.7525, 0.6577, 0.4358, 0.6109, 0.8817, 0.5429, 0.5263, 0.7531, 0.1552,
0.7066]], requires_grad=True),
Parameter containing:
tensor([0.6617, 0.1079], requires_grad=True),
Parameter containing:
tensor([[0.9225, 0.8339]], requires_grad=True),
Parameter containing:
tensor([0.0786], requires_grad=True)]
Now, to wrap my NNModule with skorch, I'm using this:
from skorch import NeuralNetRegressor
torch.manual_seed(200)
net = NeuralNetRegressor(
module=NNModule(in_features=10),
criterion=torch.nn.MSELoss,
optimizer=torch.optim.SGD,
optimizer__lr=0.01,
max_epochs=100,
verbose=0
)
net.fit(np.transpose(X_train), np.transpose(y_train))
And I'd like to retrieve the weights obtained in the training. I've used dir(net) to see if the weights are stored in any attributes to no avail.
To retrieve the weights one needs to output them like this:
list(net.module.parameters())

FailedPreconditionError with custom Tensorflow 2 Model. Variable cannot be found

I have set up custom training and testing functions in my project so I can minutely customise the training process. I use k-fold cross-validation to evaluate my model. For whatever reason, the model trains correctly for the first fold, and then on the second in throws this error.
tensorflow.python.framework.errors_impl.FailedPreconditionError: Could not find variable _AnonymousVar13. This could mean that the variable has been deleted. In TF1, it can also mean the variable is uninitialized. Debug info: container=localhost, status=Not found: Resource localhost/_AnonymousVar13/N10tensorflow3VarE does not exist. [[node test_model/dense_2/Tensordot/ReadVariableOp (defined at training_example.py:33) ]] [Op:__inference__train_step_1082]
I have no idea what's happening. I assumed the error arose because of poor initialisation, so I model.build() with the input shape. I have tried initialising the graph's weights with blank tensor, too, but that didn't work. I have also reset the backend on the last line in case there was a conflict with names, but that doesn't do the trick.
import numpy as np
import sklearn.model_selection
import tensorflow as tf
from tensorflow.python.keras.metrics import Mean, Precision, Recall
from tensorflow.python.keras.optimizer_v2.adam import Adam
n_splits = 5
batch_size = 16
n_epochs = 2
loss_function = tf.keras.losses.BinaryCrossentropy()
optimiser_fn = Adam
metrics = [
Mean(name='loss'),
Precision(name='prec'),
Recall(name='recall'),
]
learning_rate = 1e-2
dense_outputs = [10,10]
activation = 'relu'
class TestModel(tf.keras.Model):
def __init__(self):
super().__init__()
self._dense_ops = [tf.keras.layers.Dense(o) for o in dense_outputs]
self._output = tf.keras.layers.Dense(1)
def call(self, inputs):
hidden = inputs
for l in self._dense_ops:
hidden = l(hidden)
return self._output(hidden)
def _load_fold_sets_for_training(fold, fold_idcs, features, labels, batch_size):
# Get the indices for the sets.
train_idcs, validation_idcs, _ = fold_idcs[fold]
# Get the training data and labels.
training_data = features[train_idcs]
training_labels = labels[train_idcs]
# Load the training, validation and testing sets.
training_set = tf.data.Dataset.from_tensor_slices(
(training_data, training_labels)
)
training_set = training_set.batch(batch_size, drop_remainder=False)
validation_set = tf.data.Dataset.from_tensor_slices(
(features[validation_idcs], labels[validation_idcs])
)
validation_set = validation_set.batch(batch_size, drop_remainder=False)
return training_set, validation_set
#tf.function
def _train_step(batch_samples, batch_labels):
batch_predictions = model(batch_samples, training=True)
loss = loss_function(batch_predictions, batch_labels)
gradients = tf.gradients(loss, model.trainable_variables)
optimiser.apply_gradients(
zip(gradients, model.trainable_variables)
)
batch_predictions = tf.sigmoid(batch_predictions)
metrics[0].update_state(loss)
[m.update_state(batch_labels, batch_predictions) for m in metrics[1:]]
#tf.function
def _inference_step(batch_samples, batch_labels):
batch_predictions = model(batch_samples, training=False)
loss = loss_function(batch_predictions, batch_labels)
batch_predictions = tf.sigmoid(batch_predictions)
metrics[0].update_state(loss)
[m.update_state(batch_labels, batch_predictions) for m in metrics[1:]]
# Generate dataset.
features = np.random.rand(15,1440,1)
labels = np.random.rand(15,1440)
# Set up splits.
kfold = sklearn.model_selection.KFold(n_splits=n_splits, shuffle=True)
splits = []
for train_idcs, test_idcs in kfold.split(features):
train_idcs, val_idcs = sklearn.model_selection.train_test_split(train_idcs)
splits += [[train_idcs, val_idcs, test_idcs]]
fold = 0
while fold < n_splits:
# Load datasets for fold.
training_set, validation_set = _load_fold_sets_for_training(fold, splits, features, labels, batch_size)
# Load model.
model = TestModel()
# Build model.
model.build((1440, 1))
# Initialise Adam optimiser.
optimiser = optimiser_fn(learning_rate)
epoch = 0
while epoch < n_epochs:
epoch += 1
# Training.
for batch_features, batch_labels in training_set: _train_step(batch_features, batch_labels)
print(f'fold {fold}: epoch {epoch}:', ' '.join(f'train_{m.name}: {m.result():0.05f}' for m in metrics))
# Validation.
for batch_features, batch_labels in validation_set: _inference_step(batch_features, batch_labels)
print(f'fold {fold}: epoch {epoch}:', ' '.join(f'val_{m.name}: {m.result():0.05f}' for m in metrics))
tf.keras.backend.clear_session()
fold += 1
Any ideas?
The issue was the placement of the _train_step and _inference_step. If the two functions are redefined on every iteration of the fold, the error disappears and the model trains. I don't know why they must be redefined every step.
import numpy as np
import sklearn.model_selection
import tensorflow as tf
from tensorflow.python.keras.metrics import Mean, Precision, Recall
from tensorflow.python.keras.optimizer_v2.adam import Adam
n_splits = 5
batch_size = 2
n_epochs = 2
loss_function = tf.keras.losses.BinaryCrossentropy()
optimiser_fn = Adam
metrics = [
Mean(name='loss'),
Precision(name='prec'),
Recall(name='recall'),
]
learning_rate = 1e-2
dense_outputs = [10, 10]
activation = 'relu'
class TestModel(tf.keras.Model):
def __init__(self):
super().__init__()
self._dense_ops = [tf.keras.layers.Dense(o) for o in dense_outputs]
self._output = tf.keras.layers.Dense(1)
def call(self, inputs):
hidden = inputs
for l in self._dense_ops:
hidden = l(hidden)
return self._output(hidden)
def _load_fold_sets_for_training(fold, fold_idcs, features, labels, batch_size):
# Get the indices for the sets.
train_idcs, validation_idcs, _ = fold_idcs[fold]
# Get the training data and labels.
training_data = features[train_idcs]
training_labels = labels[train_idcs]
# Load the training, validation and testing sets.
training_set = tf.data.Dataset.from_tensor_slices(
(training_data, training_labels)
)
training_set = training_set.batch(batch_size, drop_remainder=False)
validation_set = tf.data.Dataset.from_tensor_slices(
(features[validation_idcs], labels[validation_idcs])
)
validation_set = validation_set.batch(batch_size, drop_remainder=False)
return training_set, validation_set
# Generate dataset.
features = np.random.rand(15, 1440, 1)
labels = np.random.rand(15, 1440)
# Set up splits.
kfold = sklearn.model_selection.KFold(n_splits=n_splits, shuffle=True)
splits = []
for train_idcs, test_idcs in kfold.split(features):
train_idcs, val_idcs = sklearn.model_selection.train_test_split(train_idcs)
splits += [[train_idcs, val_idcs, test_idcs]]
fold = 0
while fold < n_splits:
#tf.function
def _train_step(batch_samples, batch_labels):
batch_predictions = model(batch_samples, training=True)
loss = loss_function(batch_predictions, batch_labels)
gradients = tf.gradients(loss, model.trainable_variables)
optimiser.apply_gradients(
zip(gradients, model.trainable_variables)
)
batch_predictions = tf.sigmoid(batch_predictions)
metrics[0].update_state(loss)
[m.update_state(batch_labels, batch_predictions) for m in metrics[1:]]
#tf.function
def _inference_step(batch_samples, batch_labels):
batch_predictions = model(batch_samples, training=False)
loss = loss_function(batch_predictions, batch_labels)
batch_predictions = tf.sigmoid(batch_predictions)
metrics[0].update_state(loss)
[m.update_state(batch_labels, batch_predictions) for m in metrics[1:]]
# Load datasets for fold.
training_set, validation_set = _load_fold_sets_for_training(fold, splits, features, labels,
batch_size)
# Load model.
model = TestModel()
# Build model.
model.build((1440, 1))
# Initialise Adam optimiser.
optimiser = optimiser_fn(learning_rate)
epoch = 0
while epoch < n_epochs:
epoch += 1
# Training.
for batch_features, batch_labels in training_set: _train_step(batch_features,
batch_labels)
print(f'fold {fold}: epoch {epoch}:', ' '.join(f'train_{m.name}: {m.result():0.05f}' for
m in metrics))
# Validation.
for batch_features, batch_labels in validation_set: _inference_step(batch_features,
batch_labels)
print(f'fold {fold}: epoch {epoch}:', ' '.join(f'val_{m.name}: {m.result():0.05f}' for m
in metrics))
tf.keras.backend.clear_session()
fold += 1

Why my train and validation accuracy raising too slow

Here is my code.I dont know why my train and validation accuracy increase too slow.Is that normal? I’m new at deep learning.This is my homework.Train and validation values dont change nearly till loop 500.Is that normal? I changed learning rate and add weight_decay etc. but i didnt see difference
# -*- coding: utf-8 -*-
#Libraries
import torch
import torch.nn.functional as F
from torch import autograd, nn
from torch.autograd import Variable
import numpy as np
import matplotlib.pyplot as plt
from torchvision import transforms, datasets
from torch.utils import data
"""
Olivetti face dataset
"""
from sklearn.datasets import fetch_olivetti_faces
# Olivetti dataset download
olivetti = fetch_olivetti_faces()
train = olivetti.images
label = olivetti.target
X = train
Y = label
print("\nDownload Ok")
"""
Set for train
"""
train_rate = 0.8
X_train = np.zeros([int(train_rate * X.shape[0]),64,64], dtype=float)
Y_train = np.zeros([int(train_rate * X.shape[0])], dtype=int)
X_val = np.zeros([int((1-train_rate) * X.shape[0]+1),64,64], dtype=float)
Y_val = np.zeros([int((1-train_rate) * X.shape[0]+1)], dtype=int)
#Split data for train and validation
ie=0
iv=0
for i in range(X.shape[0]):
if (i%10)/9 <= train_rate:
X_train[ie] = X[i]
Y_train[ie] = Y[i]
ie += 1
else:
X_val[iv] = X[i]
Y_val[iv] = Y[i]
iv += 1
X_train = X_train.reshape(320,-1,64,64)
X_val = X_val.reshape(80,-1,64,64)
print(Y_train.shape)
X_train = torch.Tensor(X_train)
Y_train = torch.Tensor(Y_train)
X_val = torch.Tensor(X_val)
Y_val = torch.Tensor(Y_val)
batch_size = 16
train_loader = torch.utils.data.DataLoader(X_train,
batch_size=batch_size,
)
val_loader = torch.utils.data.DataLoader(X_val,
batch_size=batch_size,
)
class CNNModule(nn.Module):
def __init__(self):
super(CNNModule, self).__init__()
self.conv1 = nn.Conv2d(1, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 13 * 13, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 40)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 13 * 13)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
def make_train(model,dataset,n_iters,gpu):
# Organize data
X_train,Y_train,X_val,Y_val = dataset
kriter = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(),lr=0.03)
#Arrays to save loss and accuracy
tl=np.zeros(n_iters) #For train loss
ta=np.zeros(n_iters) #For train accuracy
vl=np.zeros(n_iters) #For validation loss
va=np.zeros(n_iters) #For validation accuracy
# Convert labels to long
Y_train = Y_train.long()
Y_val = Y_val.long()
# GPU control
if gpu:
X_train,Y_train = X_train.cuda(),Y_train.cuda()
X_val,Y_val = X_val.cuda(),Y_val.cuda()
model = model.cuda() # Parameters to GPU!
print("Using GPU")
else:
print("Using CPU")
# print(X_train.shape)
# print(Y_train.shape)
for i in range(n_iters):
# train forward
train_out = model.forward(X_train)
train_loss = kriter(train_out,Y_train)
# Backward and optimization
train_loss.backward()
optimizer.step()
optimizer.zero_grad()
# Compute train accuracy
train_predict = train_out.cpu().detach().argmax(dim=1)
train_accuracy = (train_predict.cpu().numpy()==Y_train.cpu().numpy()).mean()
# For validation
val_out = model.forward(X_val)
val_loss = kriter(val_out,Y_val)
# Compute validation accuracy
val_predict = val_out.cpu().detach().argmax(dim=1)
val_accuracy = (val_predict.cpu().numpy()==Y_val.cpu().numpy()).mean()
tl[i] = train_loss.cpu().detach().numpy()
ta[i] = train_accuracy
vl[i] = val_loss.cpu().detach().numpy()
va[i] = val_accuracy
# Show result each 5 loop
if i%5==0:
print("Loop --> ",i)
print("Train Loss :",train_loss.cpu().detach().numpy())
print("Train Accuracy :",train_accuracy)
print("Validation Loss :",val_loss.cpu().detach().numpy())
print("Validation Accuracy :",val_accuracy)
model = model.cpu()
#Print result
plt.subplot(2,2,1)
plt.plot(np.arange(n_iters), tl, 'r-')
plt.subplot(2,2,2)
plt.plot(np.arange(n_iters), ta, 'b--')
plt.subplot(2,2,3)
plt.plot(np.arange(n_iters), vl, 'r-')
plt.subplot(2,2,4)
plt.plot(np.arange(n_iters), va, 'b--')
dataset = X_train,Y_train,X_val,Y_val
gpu = True
gpu = gpu and torch.cuda.is_available()
model = CNNModule()
make_train(model,dataset,1000,gpu)
OUTPUT:
Loop --> 0
Train Loss : 3.6910985
Train Accuracy : 0.025
Validation Loss : 3.6908844
Validation Accuracy : 0.025
Loop --> 5
Loop --> 215
Train Loss : 3.6849258
Train Accuracy : 0.025
Validation Loss : 3.6850574
Validation Accuracy : 0.025
Loop --> 500
Train Loss : 3.4057992
Train Accuracy : 0.103125
Validation Loss : 3.5042462
Validation Accuracy : 0.0875
Loop --> 995
Train Loss : 0.007807272
Train Accuracy : 1.0
Validation Loss : 0.64222467
Validation Accuracy : 0.8375
OUTPUT GRAPH IMAGE:
I don't know if this is the only problem - but please note that you zero the gradient, then do forward pass over the validation data. which means that new gradients of the validation data are stored in the model before the next iteration. The common practice should be to create some evaluation method, and use it to make prediction over the validation set without saving the gradients. something like:
def eval_model(data, X_val, Y_val):
model.eval(); # this sets the model to be in inferrence mode (for example if you have batchNorm or droput layers)
with torch.no_grad(): # tells the model to not compute gradients.
val_out = model.forward(X_val)
val_loss = criterion(val_out,Y_val)
# here put some prints or whatever you want to do
model.train() # this returns the model to be in training mode

Making predictions with tensorflow

I'm really a beginner with tensor flow and in all of this field, but I've seen all the lectures of Andrej Karpathy in CS231n class so I'm understanding the code.
So this is the code (not mine): https://github.com/nfmcclure/tensorflow_cookbook/tree/master/09_Recurrent_Neural_Networks/02_Implementing_RNN_for_Spam_Prediction
# Implementing an RNN in TensorFlow
# ----------------------------------
#
# We implement an RNN in TensorFlow to predict spam/ham from texts
#
# https://github.com/nfmcclure/tensorflow_cookbook/blob/master/09_Recurrent_Neural_Networks/02_Implementing_RNN_for_Spam_Prediction/02_implementing_rnn.py
import os
import re
import io
import glob
import requests
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from zipfile import ZipFile
from tensorflow.python.framework import ops
ops.reset_default_graph()
# Start a graph
sess = tf.Session()
# Set RNN parameters
epochs = 20
batch_size = 250
max_sequence_length = 25
rnn_size = 10
embedding_size = 50
min_word_frequency = 10
learning_rate = 0.0005
dropout_keep_prob = tf.placeholder(tf.float32)
# Download or open data
data_dir = 'temp'
data_file = 'text_data.txt'
if not os.path.exists(data_dir):
os.makedirs(data_dir)
if not os.path.isfile(os.path.join(data_dir, data_file)):
zip_url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/00228/smsspamcollection.zip'
r = requests.get(zip_url)
z = ZipFile(io.BytesIO(r.content))
file = z.read('SMSSpamCollection')
# Format Data
text_data = file.decode()
text_data = text_data.encode('ascii', errors='ignore')
text_data = text_data.decode().split('\n')
# Save data to text file
with open(os.path.join(data_dir, data_file), 'w') as file_conn:
for text in text_data:
file_conn.write("{}\n".format(text))
else:
# Open data from text file
text_data = []
with open(os.path.join(data_dir, data_file), 'r') as file_conn:
for row in file_conn:
text_data.append(row)
text_data = text_data[:-1]
text_data = [x.split('\t') for x in text_data if len(x) >= 1]
text_data = [x for x in text_data if len(x) > 1]
print([list(x) for x in zip(*text_data)])
[text_data_target, text_data_train] = [list(x) for x in zip(*text_data)]
# Create a text cleaning function
def clean_text(text_string):
text_string = re.sub(r'([^\s\w]|_|[0-9])+', '', text_string)
text_string = " ".join(text_string.split())
text_string = text_string.lower()
return (text_string)
# Clean texts
text_data_train = [clean_text(x) for x in text_data_train]
# Change texts into numeric vectors
vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor(max_sequence_length,
min_frequency=min_word_frequency)
text_processed = np.array(list(vocab_processor.fit_transform(text_data_train)))
# Shuffle and split data
text_processed = np.array(text_processed)
text_data_target = np.array([1 if x == 'ham' else 0 for x in text_data_target])
shuffled_ix = np.random.permutation(np.arange(len(text_data_target)))
x_shuffled = text_processed[shuffled_ix]
y_shuffled = text_data_target[shuffled_ix]
# Split train/test set
ix_cutoff = int(len(y_shuffled) * 0.80)
x_train, x_test = x_shuffled[:ix_cutoff], x_shuffled[ix_cutoff:]
y_train, y_test = y_shuffled[:ix_cutoff], y_shuffled[ix_cutoff:]
vocab_size = len(vocab_processor.vocabulary_)
print("Vocabulary Size: {:d}".format(vocab_size))
print("80-20 Train Test split: {:d} -- {:d}".format(len(y_train), len(y_test)))
# Create placeholders
x_data = tf.placeholder(tf.int32, [None, max_sequence_length])
y_output = tf.placeholder(tf.int32, [None])
# Create embedding
embedding_mat = tf.Variable(tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0))
embedding_output = tf.nn.embedding_lookup(embedding_mat, x_data)
# embedding_output_expanded = tf.expand_dims(embedding_output, -1)
# Define the RNN cell
# tensorflow change >= 1.0, rnn is put into tensorflow.contrib directory. Prior version not test.
if tf.__version__[0] >= '1':
cell = tf.contrib.rnn.BasicRNNCell(num_units=rnn_size)
else:
cell = tf.nn.rnn_cell.BasicRNNCell(num_units=rnn_size)
output, state = tf.nn.dynamic_rnn(cell, embedding_output, dtype=tf.float32)
output = tf.nn.dropout(output, dropout_keep_prob)
# Get output of RNN sequence
output = tf.transpose(output, [1, 0, 2])
last = tf.gather(output, int(output.get_shape()[0]) - 1)
weight = tf.Variable(tf.truncated_normal([rnn_size, 2], stddev=0.1))
bias = tf.Variable(tf.constant(0.1, shape=[2]))
logits_out = tf.matmul(last, weight) + bias
# Loss function
losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_out,
labels=y_output) # logits=float32, labels=int32
loss = tf.reduce_mean(losses)
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(logits_out, 1), tf.cast(y_output, tf.int64)), tf.float32))
optimizer = tf.train.RMSPropOptimizer(learning_rate)
train_step = optimizer.minimize(loss)
init = tf.global_variables_initializer()
sess.run(init)
train_loss = []
test_loss = []
train_accuracy = []
test_accuracy = []
# Start training
for epoch in range(epochs):
# Shuffle training data
shuffled_ix = np.random.permutation(np.arange(len(x_train)))
x_train = x_train[shuffled_ix]
y_train = y_train[shuffled_ix]
num_batches = int(len(x_train) / batch_size) + 1
# TO DO CALCULATE GENERATIONS ExACTLY
for i in range(num_batches):
# Select train data
min_ix = i * batch_size
max_ix = np.min([len(x_train), ((i + 1) * batch_size)])
x_train_batch = x_train[min_ix:max_ix]
y_train_batch = y_train[min_ix:max_ix]
# Run train step
train_dict = {x_data: x_train_batch, y_output: y_train_batch, dropout_keep_prob: 0.5}
sess.run(train_step, feed_dict=train_dict)
# Run loss and accuracy for training
temp_train_loss, temp_train_acc = sess.run([loss, accuracy], feed_dict=train_dict)
train_loss.append(temp_train_loss)
train_accuracy.append(temp_train_acc)
# Run Eval Step
test_dict = {x_data: x_test, y_output: y_test, dropout_keep_prob: 1.0}
temp_test_loss, temp_test_acc = sess.run([loss, accuracy], feed_dict=test_dict)
test_loss.append(temp_test_loss)
test_accuracy.append(temp_test_acc)
print('Epoch: {}, Test Loss: {:.2}, Test Acc: {:.2}'.format(epoch + 1, temp_test_loss, temp_test_acc))
# Plot loss over time
epoch_seq = np.arange(1, epochs + 1)
plt.plot(epoch_seq, train_loss, 'k--', label='Train Set')
plt.plot(epoch_seq, test_loss, 'r-', label='Test Set')
plt.title('Softmax Loss')
plt.xlabel('Epochs')
plt.ylabel('Softmax Loss')
plt.legend(loc='upper left')
plt.show()
# Plot accuracy over time
plt.plot(epoch_seq, train_accuracy, 'k--', label='Train Set')
plt.plot(epoch_seq, test_accuracy, 'r-', label='Test Set')
plt.title('Test Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(loc='upper left')
plt.show()
def findFiles(path): return glob.glob(path)
pred_array = "words"
pred_num = np.array(list(vocab_processor.fit_transform(pred_array)))
print(pred_num)
pred_output = tf.placeholder(tf.float32,[1,len(pred_array),max_sequence_length])
feed_dict = {pred_output: [pred_num]}
classification = sess.run(losses, feed_dict)
print(classification)
It's a RNN spam classifier, and It's working great (accept for the part I wrote at the end where I'm trying to create the predictions).
I'm just want to understand how to create a prediction function to this, something that looks like that:
def predict(text): # text is a string (my mail)
# Doing prediction stuff
return (top result) # ham or spam
The last few lines are my last try is giving me the following error:
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'Placeholder' with dtype float
[[Node: Placeholder = Placeholder[dtype=DT_FLOAT, shape=<unknown>, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
Also I tried to do something using Making predictions with a TensorFlow model, and I also read https://www.tensorflow.org/serving/serving_basic and every thing I've tried failed...
Since I'm just a beginner explanations are welcomed, but I'm not sure I'll know how to code it so can you please post the code answer too.
(Python 3.6 btw)
Thanks!
If you take a look at how the original code does the training and testing steps, specifically how they set up their train_dict and test_dict, you see that they feed values to each of the tensors defined as placeholder in the graph. Basically placeholders need to be given some value if they are going to be used in whatever calculation you are asking your network to do. Since you are looking for predictions from the network, you probably do not need to provide an expected output, but you will need to give it input data x_data, and a value for dropout_keep_prob. This should be dropout_keep_prob=1.0 for prediction.
You also want a prediction, not the loss of the network. The loss is basically a measure of how far your network's output is from what you expect, but since you are trying to predict something for new data you really just want to see what the network says it is. You can do this using the logits_out op directly, or we can add an op that converts your logits into a probability distribution over your classes. Either way you can look at the distribution to get an idea of how likely the network thinks your data falls into each category, or you can take the max value of this vector to just output the network's best guess.
So you might try something like:
prediction = tf.nn.softmax(logits_out)
feed_dict = {x_data: your_input_data, dropout_keep_prob: 1.0}
pred = sess.run(prediction, feed_dict)
best_guess = np.argmax(pred) # highest-rated class

Why is my simple neural network not learning?

I am new to TensorFlow and neural networks. I am trying to build a neural network that can classify images in the CIFAR-10 dataset.
Here is my code:
import tensorflow as tf
import pickle
import numpy as np
import random
image_size= 32*32*3 # because 3 channels
n_classes = 10
lay1_size = 50
batch_size = 100
def unpickle(filename):
with open(filename,'rb') as f:
data = pickle.load(f, encoding='latin1')
x = data['data']
y = data['labels']
# shuffle the data
z = list(zip(x,y))
random.shuffle(z)
x, y = zip(*z)
x = x[:batch_size]
y = y[:batch_size]
# covert decimals to one hot arrays
y = np.eye(n_classes)[[y]]
return x, y
# set up network
def add_layer(inputs, in_size, out_size, activation_function=None):
W = tf.Variable(tf.random_normal([in_size, out_size]), dtype=tf.float32)
b = tf.Variable(tf.zeros([1,out_size]) + 0.1, dtype=tf.float32)
Wx_plus_b = tf.matmul(inputs, W) + b
if activation_function is None:
output = Wx_plus_b
else:
output = activation_function(Wx_plus_b)
return output
def compute_accuracy(v_xs, v_ys):
global prediction
y_pre = sess.run(prediction, feed_dict={xs:v_xs})
correct_prediction = tf.equal(tf.argmax(y_pre,1), tf.argmax(v_ys, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
result = sess.run(accuracy, feed_dict={xs:v_xs, ys:v_ys})
return result
xs = tf.placeholder(tf.float32, [None,image_size])
ys = tf.placeholder(tf.float32)
lay1 = add_layer(xs, image_size, lay1_size, activation_function=tf.nn.tanh)
lay2 = add_layer(lay1, lay1_size, lay1_size, activation_function=tf.nn.tanh)
prediction = add_layer(lay2, lay1_size, n_classes, activation_function=tf.nn.softmax)
cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys*tf.log(prediction), reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
#train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
# run network
sess = tf.Session()
sess.run(tf.initialize_all_variables())
x_test, y_test = unpickle('test_batch')
for i in range(1000):
x_train, y_train = unpickle('data_batch_1')
sess.run(train_step, feed_dict={xs:x_train,ys:y_train})
if i % 50 == 0:
print(compute_accuracy(x_test, y_test))
sess.close()
I am using two hidden layers with 50 nodes in each layer. I am running 1,000 cycles, where in each cycle I shuffle data in the dataset and pick the first 100 images of that shuffle to train on.
I am consistently getting ~0.1 accuracy, the machine is not learning at all.
When I modify the code to use the MNIST dataset instead of the CIFAR-10 dataset I get ~0.87 accuracy.
I took code from an MNIST tutorial and am trying to modify it to classify CIFAR-10 data.
I can't figure out what's wrong here. How do I get my algorithm to learn?

Categories

Resources