Tensorflow netwrok parameters nog updating - python

Iris dataset classification, network parameters not updating
Hey, i tried to build a classifier with a logistic regression netwrok but my parameters are not updating, my weights,bias,output and cost stay the same can somebody help me? I have no idea why my parameters are not updating how can I solve this? Thank you!
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
df = pd.read_csv('/Users/Laurens-Wissels/Desktop/iris.csv')
x = np.array(df[["sepal_length","sepal_width","petal_length","petal_width"]])
scaler_model = MinMaxScaler()
x = scaler_model.fit_transform(x)
y = df["species"]
def yvalue(y):
if y =="setosa":
return [1,0,0]
elif y == "versicolor":
return [0,1,0]
else:
return [0,0,1]
y = y.apply(yvalue)
y = y.reshape(150,1)
x_train, x_test , y_train,y_test = train_test_split(x,y,test_size=0.3)
print(y_train)
n_features = 4
n_species = 1
traing_epochs = 2000
learning_rate = 0.0001
n_samples = 105
display_step = 50
X = tf.placeholder(tf.float32,[105,n_features])
Y = tf.placeholder(tf.float32,[105,1])
W = tf.Variable(tf.random_normal([n_features,n_species]))
b = tf.Variable(tf.random_normal([1]))
_y = tf.add(tf.matmul(X,W),b)
output = tf.nn.softmax(_y)
cost = tf.reduce_mean(tf.pow(Y - output , 2))/(2*n_samples)
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
for i in range(traing_epochs):
sess.run(optimizer, feed_dict={X: x_train, Y: y_train}) # Take a gradient descent step using our inputs and labels
sess.run(output,feed_dict={X: x_train, Y: y_train})
# That's all! The rest of the cell just outputs debug messages.
# Display logs per epoch step
if (i) % display_step == 0:
cc = sess.run(cost, feed_dict={X: x_train, Y:y_train})
print("_y:",_y)
print("output:",output)
print("w:",sess.run(W, feed_dict={X: x_train, Y:y_train}))
print "Training step:", '%04d' % (i), "cost=",sess.run(cost, feed_dict={X: x_train, Y:y_train}) #, \"W=", sess.run(W), "b=", sess.run(b)
print("-------------------------------------")
plotData.append(sess.run(cost, feed_dict={X: x_train, Y:y_train}) )
print "Optimization Finished!"
training_cost = sess.run(cost, feed_dict={X: x_train, Y:y_train})
print "Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b), '\n'
plt.plot(plotData)
plt.show()

tf.nn.softmax divides by the sum of the exponentiated elements (see the expression in the docs). If you only have one element in the dimension being summed over (last by default):
print(_y.shape)
(105, 1)
Then you end up with exp(x) / sum(exp(x)), which is a constant 1. So the gradient is 0 and therefore no training.
You could switch to tf.nn.sigmoid.

Related

PyTorch minibatch training very slow

When training my model on the adult income data set and using minibatches training is very slow regardless if I use PyTorch's DataLoader or a basic implementation for minibatch training.
Is there a problem with my code or is there another way to speed up training for the adult income data set? I want to use one-hot encoding and cross-entropy loss + softmax. Do I have to use a different loss function or remove the softmax layer?
import pandas as pd
from pandas import read_csv
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import KBinsDiscretizer
from sklearn.model_selection import train_test_split
import torch
from torch.autograd import Variable
from torch.utils.data import DataLoader, Dataset, TensorDataset
import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
import warnings
warnings.filterwarnings('ignore')
device = torch.device("cpu")
class Model(nn.Module):
def __init__(self, input_dim):
super(Model, self).__init__()
self.layer1 = nn.Linear(input_dim, 12)
self.layer2 = nn.Linear(12, 2)
def forward(self, x):
x = F.sigmoid(self.layer1(x))
x = F.softmax(self.layer2(x)) # To check with the loss function
return x
# load dataset
filename = './datasets/adult-all.csv'
dataframe = read_csv(filename, header=None, na_values='?')
# drop rows with missing
dataframe = dataframe.dropna()
# summarize the class distribution
target = dataframe.values[:, -1]
# split into inputs and outputs
last_ix = len(dataframe.columns) - 1
X_, y = dataframe.drop(last_ix, axis=1), dataframe[last_ix]
# select categorical and numerical features
cat_ix = X_.select_dtypes(include=['object', 'bool']).columns
num_ix = X_.select_dtypes(include=['int64', 'float64']).columns
# label encode the target variable to have the classes 0 and 1
y = LabelEncoder().fit_transform(y)
# one-hot encoding of categorical features
df_cat = pd.get_dummies(X_[cat_ix])
# binning of numerical features
x = X_.drop(columns=cat_ix, axis=1)
est = KBinsDiscretizer(n_bins=3, encode='onehot-dense', strategy='uniform')
df_num = est.fit_transform(x)
X = pd.concat([df_cat.reset_index(drop=True), pd.DataFrame(df_num).reset_index(drop=True)], axis=1)
# split training and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
X_tr = Variable(torch.tensor(X_train.values, dtype=torch.float))
X_te = Variable(torch.tensor(X_test.values, dtype=torch.float))
y_tr = Variable(torch.tensor(y_train, dtype=torch.long))
y_te = Variable(torch.tensor(y_test, dtype=torch.long))
def binary_cross_entropy_one_hot(input, target):
return torch.nn.CrossEntropyLoss()(input, target)
def _accuracy(y_pred, y_true):
classes = torch.argmax(y_pred, dim=1)
labels = y_true
accuracy = torch.mean((classes == labels).float())
return accuracy
model = Model(X.shape[1])
learning_rate = 1e-3
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
epochs = 1000
accuracy = 0.0
minibatch = True
# training loop
train_loss = []
for epoch in range(epochs):
if minibatch:
batch_size = 128 # or whatever
permutation = torch.randperm(X_tr.size()[0])
for i in range(0, X_tr.size()[0], batch_size):
optimizer.zero_grad()
indices = permutation[i:i + batch_size]
batch_x, batch_y = X_tr[indices], y_tr[indices]
# in case you wanted a semi-full example
outputs = model.forward(batch_x)
loss = binary_cross_entropy_one_hot(outputs, batch_y)
loss.backward()
optimizer.step()
if epoch % 100 == 0:
print(f'epoch: {epoch:2} loss: {loss:10.8f}')
# train_ds = TensorDataset(X_tr, y_tr)
# train_dl = DataLoader(train_ds, batch_size=256, shuffle=True)
# batch_loss = 0.0
# batch_accuracy = 0.0
# for nb, (x_batch, y_batch) in enumerate(train_dl): # manually set number of batches?
# optimizer.zero_grad()
# y_pred_train = model(x_batch)
# loss = binary_cross_entropy_one_hot(y_pred_train, y_batch)
# loss.backward()
# optimizer.step()
# batch_loss += loss.item()
# batch_accuracy += _accuracy(y_pred_train, y_batch)
# train_loss.append(batch_loss / (nb + 1))
# accuracy = batch_accuracy / (nb + 1)
# if epoch % 100 == 0:
# print(f'epoch: {epoch:2} loss: {train_loss[epoch]:10.8f}')
else:
optimizer.zero_grad()
y_pred = model(X_tr)
# computing the loss function
loss = binary_cross_entropy_one_hot(y_pred, y_tr)
if epoch % 100 == 0:
print(f'epoch: {epoch:2} loss: {loss.item():10.8f}')
loss.backward()
optimizer.step()
accuracy = _accuracy(y_pred, y_tr)
# evaluation on test data
with torch.no_grad():
model.eval()
y_pred = model(X_te)
test_loss = binary_cross_entropy_one_hot(y_pred, y_te)
test_acc = _accuracy(y_pred, y_te)
print("Loss on test data: {:.4}".format(test_loss))
print("Accuracy on test data: {:.4}".format(test_acc))
Time would depend on your input_dim, the size of your dataset, and the number of updates per epoch (// the batch size). From what you've shared with us, I'm not exactly sure what the issue is and if there is actually any bottleneck. However, here are a couple of things I would point out, which might help you (in no particular order):
No need to wrap your data with torch.autograd.Variable. It has been deprecated and is no longer needed, Autograd automatically supports torch.tensors with requires_grad set to True.
If you are using torch.nn.CrossEntropyLoss, you shouldn't use F.softmax on your model's output. That's because CrossEntropyLoss includes nn.LogSoftmax() and nn.NLLLoss(). Also no need to initialize the module each time you want to call it:
criterion = torch.nn.CrossEntropyLoss()
def binary_cross_entropy_one_hot(input, target):
return criterion(input, target)
I see you are redefining your data loader on each epoch. Is that what you really want? If not you can just define it outside the training loop:
train_ds = TensorDataset(X_tr, y_tr)
train_dl = DataLoader(train_ds, batch_size=256, shuffle=True)
for epoch in range(epochs):
for x, y in train_dl:
# ...
I would call .item() on your accuracy (when calling _accuracy) to not keep it attached to the computation graph and release it from memory when it is ready.

Tensorflow not giving any output

I'll keep this short and sweet, i'm trying to use tensorflow for forex learning, and any time I run my code I got from a youtube tutorial, I get 0 output. It just says In again. Can someone help me out? I have code below.
I have tried changing variables, simplifying code, everything.
import tensorflow as tf
import numpy
import pandas as pd
import matplotlib.pyplot as plt
rng = numpy.random
data = pd.read_csv("/Users/adamh/OneDrive/Desktop/data.csv")
server_time = data['server_time'].values
bid = data['bid'].values
ask = data['ask'].values
#hyperparameters
learning_rate = 0.01
training_epochs = 10000
#parameter
display_step = 50
train_X = numpy.asarray(server_time)
train_Y = numpy.asarray(ask)
n_samples = train_X.shape[0]
X = tf.placeholder('float32')
Y = tf.placeholder('float32')
W = tf.Variable(rng.randn(),name = "Weight")
b = tf.Variable(rng.randn(), name = 'bias')
pred = tf.add(tf.multiply(X,W),b)
error = tf.reduce_sum(tf.pow(pred-(Y+Y2),2))/(2*n_samples)
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(error)
init = tf.global_variables_initializer()
# Start training
with tf.Session() as sess:
# Run the initializer
sess.run(init)
# Fit all training data
for epoch in range(training_epochs):
for (x, y) in zip(train_X, train_Y):
sess.run(optimizer, feed_dict={X: x, Y: y})
# Display logs per epoch step
if (epoch+1) % display_step == 0:
c = sess.run(error, feed_dict={X: train_X, Y:train_Y})
print("Epoch:", '%04d' % (epoch+1), "error=", "{:.9f}".format(c), \
"W=", sess.run(W), "b=", sess.run(b))
print("Optimization Finished!")
training_error = sess.run(error, feed_dict={X: train_X, Y: train_Y})
print("Training error=", training_error, "W=", sess.run(W), "b=", sess.run(b), '\n')
# Graphic display
plt.plot(train_X, train_Y, 'ro', label='Original data')
plt.plot(train_X, sess.run(W) * train_X + sess.run(b), label='Fitted line')
plt.legend()
plt.show()
# Testing example, as requested (Issue #2)
test_X = numpy.asarray([2,4,6,8,10])
test_Y = numpy.asarray([25,23,21,19,17])
print("Testing... (Mean square loss Comparison)")
testing_error = sess.run(
tf.reduce_sum(tf.pow(pred - (Y), 2)) / (2 * test_X.shape[0]),
feed_dict={X: test_X, Y: test_Y}) # same function as cost above
print("Testing error=", testing_error)
print("Absolute mean square loss difference:", abs(
training_error - testing_error))
plt.plot(test_X, test_Y, 'bo', label='Testing data')
plt.plot(train_X, sess.run(W) * train_X + sess.run(b), label='Fitted line')
plt.legend()
plt.show()
No output is given.

training accuracy drops in tensorflow

I was trying to create a model for character recognition.
This model was working fine with 28*28 dataset and for characters from 0-9 but it training accuracy is dropping if changed to 64*64 and characters ranges from 0-9, a-z, A-Z.
While iterating through accuracy it goes till 0.3 and then stays there afterwards. I tried to train with different dataset as well but the same thing is happening.
Changing learning rate to 0.001 also does not help.
Can anyone tell what is the issue with this?
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import random as ran
import os
import tensorflow as tf
def TRAIN_SIZE(num):
images = np.load("data/train/images64.npy").reshape([2852,4096])
labels = np.load("data/train/labels.npy")
print ('Total Training Images in Dataset = ' + str(images.shape))
print ('--------------------------------------------------')
x_train = images[:num,:]
print ('x_train Examples Loaded = ' + str(x_train.shape))
y_train = labels[:num,:]
print ('y_train Examples Loaded = ' + str(y_train.shape))
print('')
return x_train, y_train
def TEST_SIZE(num):
images = np.load("data/test/images64.npy").reshape([558,4096])
labels = np.load("data/test/labels.npy")
print ('Total testing Images in Dataset = ' + str(images.shape))
print ('--------------------------------------------------')
x_test = images[:num,:]
print ('x_test Examples Loaded = ' + str(x_test.shape))
y_test = labels[:num,:]
print ('y_test Examples Loaded = ' + str(y_test.shape))
print('')
return x_test, y_test
def display_digit(num):
# print(y_train[num])
label = y_train[num].argmax(axis=0)
image = x_train[num].reshape([64,64])
# plt.axis("off")
plt.title('Example: %d Label: %d' % (num, label))
plt.imshow(image, cmap=plt.get_cmap('gray_r'))
plt.show()
def display_mult_flat(start, stop):
images = x_train[start].reshape([1,4096])
for i in range(start+1,stop):
images = np.concatenate((images, x_train[i].reshape([1,4096])))
plt.imshow(images, cmap=plt.get_cmap('gray_r'))
plt.show()
def get_char(a):
if(a<10):
return a
elif(a>=10 and a<36):
return chr(a+55)
else:
return chr(a+61)
x_train, y_train = TRAIN_SIZE(2850)
x_test, y_test = TRAIN_SIZE(1900)
x = tf.placeholder(tf.float32, shape=[None, 4096])
y_ = tf.placeholder(tf.float32, shape=[None, 62])
W = tf.Variable(tf.zeros([4096,62]))
b = tf.Variable(tf.zeros([62]))
y = tf.nn.softmax(tf.matmul(x,W) + b)
with tf.Session() as sess:
# x_test = x_test[1400:,:]
# y_test = y_test[1400:,:]
x_test, y_test =TEST_SIZE(400)
LEARNING_RATE = 0.2
TRAIN_STEPS = 1000
sess.run(tf.global_variables_initializer())
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
training = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
for i in range(TRAIN_STEPS+1):
sess.run(training, feed_dict={x: x_train, y_: y_train})
if i%100 == 0:
print('Training Step:' + str(i) + ' Accuracy = ' + str(sess.run(accuracy, feed_dict={x: x_test, y_: y_test})) + ' Loss = ' + str(sess.run(cross_entropy, {x: x_train, y_: y_train})))
savedPath = tf.train.Saver().save(sess, "/tmp/model.ckpt")
print("Model saved at: " ,savedPath)
You are trying to classify 62 different numbers and characters, but use a single fully connected layer to do that. Your model simply has not enough parameters for that task. In other words, you are underfitting the data. So either expand your network by adding parameters (layers) and/or use CNNs, which generally have good performance for image classification tasks.
Try different CNN mode. the model you are using like inception v1, v2,v3 alexnet etc..

K-fold cross validation with Tensorflow

How can I implement a K-fold Cross Validation on a model in Tensorflow? I have done it before using scikit learn but not with Tensorflow. For example, let's say I have the following model...
def random_forest(target, data):
# Drop the target label, which we save separately.
X = data.drop([target], axis=1).values
y = data[target].values
# Run Cross Validation on Random Forest Classifier.
clf_tree = ske.RandomForestClassifier(n_estimators=50)
Then I would run clf_tree through a cross validation function...
unique_permutations_cross_val(X, y, clf_tree)
...which is defined as...
def unique_permutations_cross_val(X, y, model):
# Split data 20/80 to be used in a K-Fold Cross Validation with unique permutations.
shuffle_validator = model_selection.ShuffleSplit(n_splits=10, test_size=0.2, random_state=0)
# Calculate the score of the model after Cross Validation has been applied to it.
scores = model_selection.cross_val_score(model, X, y, cv=shuffle_validator)
# Print out the score (mean), as well as the variance.
print("Accuracy: %0.4f (+/- %0.2f)" % (scores.mean(), scores.std()))
This is very easy to do. However, let's say I have a working linear regression model defined as so...
# Drop the target label, which we save separately.
X = data.drop([target], axis=1).values
Y = data[target].values
iterable_X = np.asarray(X)
iterable_Y = np.asarray(Y)
rng = np.random
n_rows = X.shape[0]
X = tf.placeholder("float")
Y = tf.placeholder("float")
W = tf.Variable(rng.randn(), name="weight")
b = tf.Variable(rng.randn(), name="bias")
pred = tf.add(tf.multiply(X, W), b)
cost = tf.reduce_sum(tf.pow(pred-Y, 2)/(2*n_rows))
optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate).minimize(cost)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for epoch in range(FLAGS.training_epochs):
avg_cost = 0
for (x, y) in zip(iterable_X, iterable_Y):
_, c = sess.run([optimizer, cost], feed_dict={X:x, Y:y})
avg_cost += c / (n_rows/FLAGS.batch_size)
# display logs per epoch step
if (epoch + 1) % FLAGS.display_step == 0:
print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(avg_cost))
print("Optimization Finished!")
The model works, but how can I implement this model using a K-fold Cross Validation. I want to split my data into train data and test data and then run the model using Cross Validation. How can I do this?

Neural Network with 1500+ features cannot fit in TensorFlow

There are 1875 features in data, which are correlated with loan records of people. Some of them have been used for score card, ks=27.
I wanted to use these features in neural networks to determine whether a person is good or bad. However,it turned out all people were defined bad or good without selectivity, even I employed the method of'#imbalance data' (as shown in the following code).
Is there some problem in my code(activation function?),can someone give some tips? thanks in advance!
from sas7bdat import SAS7BDAT
import pandas as pd
import numpy as np
from sklearn.utils import shuffle
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix
f=SAS7BDAT('data_10212102.sas7bdat')
data=f.to_data_frame()
#drop some time freatures
drop_cols = [col for col in data.columns if 'TIME' in col]
data=data.drop(drop_cols,axis=1)
data.loc[data.TARGET == 0,'Good'] =1
data.loc[data.TARGET == 1,'Good'] =0
data = data.rename(columns={'TARGET':'Bad'})
validation =data.ix[data.SETID==3,:]
train_test_data = data.loc[~data.index.isin(validation.index)]
X_train = train_test_data.ix[train_test_data.SETID==1,:]
X_test = train_test_data.ix[train_test_data.SETID==2,:]
X_train = shuffle(X_train)
X_test = shuffle(X_test)
X_validation = shuffle(validation)
Y_train = X_train.Bad
Y_train = pd.concat([Y_train,X_train.Good],axis=1)
Y_test = X_test.Bad
Y_test = pd.concat([Y_test,X_test.Good],axis=1)
Y_validation = X_validation.Bad
Y_validation = pd.concat([Y_validation,X_validation.Good],axis=1)
ratio = len(X_train)/len(X_train.ix[X_train.Bad==1,:])
X_train = X_train.drop(['Good','Bad'],axis=1)
X_test = X_test.drop(['Good','Bad'],axis=1)
X_validation = X_validation.drop(['Good','Bad'],axis=1)
#imbalance data
Y_train.Bad *= ratio
Y_test.Bad *=ratio
Y_validation.Bad *= ratio
#parameters
learning_rate = 0.001
training_epochs = 2000
batch_size = 512
display_step = 500
n_samples = X_train.shape[0]
n_features = 1845
n_class = 2
x = tf.placeholder(tf.float32, [None, n_features])
y = tf.placeholder(tf.float32, [None, n_class])
n_units =2048
n_layers =7
W={}
b={}
for i in range(n_layers):
if i==0:
W[i] = tf.Variable(tf.random_normal([n_features, n_units]))
b[i] = tf.Variable(tf.random_normal([n_units]))
pred = tf.nn.sigmoid(tf.matmul(x, W[i]) + b[i])
elif 0<i<n_layers-1:
W[i] = tf.Variable(tf.random_normal([n_units, n_units]))
b[i] = tf.Variable(tf.random_normal([n_units]))
pred = tf.nn.sigmoid(tf.matmul(pred, W[i]) + b[i])
else:
W[i] = tf.Variable(tf.random_normal([n_units, n_class]))
b[i] = tf.Variable(tf.random_normal([n_class]))
pred = tf.nn.softmax(tf.matmul(pred, W[i]) + b[i])
cost = -tf.reduce_sum(y * tf.log(pred))
optimizer =
tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
init = tf.global_variables_initializer()
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(init)
for epoch in range(training_epochs):
avg_cost = 0
total_batch = int(n_samples / batch_size)
for i in range(total_batch):
_, c = sess.run([optimizer, cost],
feed_dict={x: X_train[i * batch_size : (i+1) * batch_size],
y: Y_train[i * batch_size : (i+1) * batch_size]
})
avg_cost = c / total_batch
plt.plot(epoch+1, avg_cost, 'co')
if (epoch+1) % display_step == 0:
print("Epoch:", "%04d" % (epoch+1), "avg_cost=",avg_cost )
print("Training Accuracy:", accuracy.eval({x: X_train, y:Y_train}))
print("Testing Accuracy:", accuracy.eval({x: X_test, y:Y_test}))
print("Validating Accuracy:", accuracy.eval({x: X_validation, y:Y_validation}))
train_prob =pred.eval(feed_dict={x: X_train})
train_predict=tf.argmax(train_prob,1).eval()
print("train:",confusion_matrix(1-Y_train['Good'],train_predict))
test_prob = pred.eval(feed_dict={x: X_test})
test_predict=tf.argmax(test_prob,1).eval()
print("test:",confusion_matrix(1-Y_test['Good'],test_predict))
validation_prob = pred.eval(feed_dict={x: X_validation})
validation_predict=tf.argmax(validation_prob,1).eval()
print("validation:",confusion_matrix(1-Y_validation['Good'],validation_predict))
print("Optimization Finished!")
#Save the variables to disk
save_path = saver.save(sess,"./model_v0.ckpt")
print("Model saved in file: %s" %save_path)
plt.xlabel("Epoch")
plt.ylabel("Cost")
plt.show()

Categories

Resources