How can I implement a K-fold Cross Validation on a model in Tensorflow? I have done it before using scikit learn but not with Tensorflow. For example, let's say I have the following model...
def random_forest(target, data):
# Drop the target label, which we save separately.
X = data.drop([target], axis=1).values
y = data[target].values
# Run Cross Validation on Random Forest Classifier.
clf_tree = ske.RandomForestClassifier(n_estimators=50)
Then I would run clf_tree through a cross validation function...
unique_permutations_cross_val(X, y, clf_tree)
...which is defined as...
def unique_permutations_cross_val(X, y, model):
# Split data 20/80 to be used in a K-Fold Cross Validation with unique permutations.
shuffle_validator = model_selection.ShuffleSplit(n_splits=10, test_size=0.2, random_state=0)
# Calculate the score of the model after Cross Validation has been applied to it.
scores = model_selection.cross_val_score(model, X, y, cv=shuffle_validator)
# Print out the score (mean), as well as the variance.
print("Accuracy: %0.4f (+/- %0.2f)" % (scores.mean(), scores.std()))
This is very easy to do. However, let's say I have a working linear regression model defined as so...
# Drop the target label, which we save separately.
X = data.drop([target], axis=1).values
Y = data[target].values
iterable_X = np.asarray(X)
iterable_Y = np.asarray(Y)
rng = np.random
n_rows = X.shape[0]
X = tf.placeholder("float")
Y = tf.placeholder("float")
W = tf.Variable(rng.randn(), name="weight")
b = tf.Variable(rng.randn(), name="bias")
pred = tf.add(tf.multiply(X, W), b)
cost = tf.reduce_sum(tf.pow(pred-Y, 2)/(2*n_rows))
optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate).minimize(cost)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for epoch in range(FLAGS.training_epochs):
avg_cost = 0
for (x, y) in zip(iterable_X, iterable_Y):
_, c = sess.run([optimizer, cost], feed_dict={X:x, Y:y})
avg_cost += c / (n_rows/FLAGS.batch_size)
# display logs per epoch step
if (epoch + 1) % FLAGS.display_step == 0:
print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(avg_cost))
print("Optimization Finished!")
The model works, but how can I implement this model using a K-fold Cross Validation. I want to split my data into train data and test data and then run the model using Cross Validation. How can I do this?
Related
This is my model evaluation code
This is the error I am getting. x_train has all float values but still. each index of x_train is length of 262
# Display model architecture summary
model.summary()
# Calculate pre-training accuracy
score = model.evaluate(x_test, y_test, verbose=1)
accuracy = 100*score[1]
print("Pre-training accuracy: %.4f%%" % accuracy)
I have converted both of my features into a list('F[]') this way.
X = np.array(featuresdf.feature_mfcc.tolist() , dtype=object)
# print(type(X))
X2 = np.array(featuresdf.sc.tolist() , dtype=object)
# print(type(X2))
# print(X2)
F = []
for i , val in enumerate(X):
temp_x = val
temp_x2 = X2[i]
# concat = temp_x + temp_x2
concat = np.hstack((temp_x,temp_x2))
F.append(concat)
# y dependant variable jo hum predict karenge.
y = np.array(featuresdf.class_label.tolist())
When training my model on the adult income data set and using minibatches training is very slow regardless if I use PyTorch's DataLoader or a basic implementation for minibatch training.
Is there a problem with my code or is there another way to speed up training for the adult income data set? I want to use one-hot encoding and cross-entropy loss + softmax. Do I have to use a different loss function or remove the softmax layer?
import pandas as pd
from pandas import read_csv
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import KBinsDiscretizer
from sklearn.model_selection import train_test_split
import torch
from torch.autograd import Variable
from torch.utils.data import DataLoader, Dataset, TensorDataset
import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
import warnings
warnings.filterwarnings('ignore')
device = torch.device("cpu")
class Model(nn.Module):
def __init__(self, input_dim):
super(Model, self).__init__()
self.layer1 = nn.Linear(input_dim, 12)
self.layer2 = nn.Linear(12, 2)
def forward(self, x):
x = F.sigmoid(self.layer1(x))
x = F.softmax(self.layer2(x)) # To check with the loss function
return x
# load dataset
filename = './datasets/adult-all.csv'
dataframe = read_csv(filename, header=None, na_values='?')
# drop rows with missing
dataframe = dataframe.dropna()
# summarize the class distribution
target = dataframe.values[:, -1]
# split into inputs and outputs
last_ix = len(dataframe.columns) - 1
X_, y = dataframe.drop(last_ix, axis=1), dataframe[last_ix]
# select categorical and numerical features
cat_ix = X_.select_dtypes(include=['object', 'bool']).columns
num_ix = X_.select_dtypes(include=['int64', 'float64']).columns
# label encode the target variable to have the classes 0 and 1
y = LabelEncoder().fit_transform(y)
# one-hot encoding of categorical features
df_cat = pd.get_dummies(X_[cat_ix])
# binning of numerical features
x = X_.drop(columns=cat_ix, axis=1)
est = KBinsDiscretizer(n_bins=3, encode='onehot-dense', strategy='uniform')
df_num = est.fit_transform(x)
X = pd.concat([df_cat.reset_index(drop=True), pd.DataFrame(df_num).reset_index(drop=True)], axis=1)
# split training and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
X_tr = Variable(torch.tensor(X_train.values, dtype=torch.float))
X_te = Variable(torch.tensor(X_test.values, dtype=torch.float))
y_tr = Variable(torch.tensor(y_train, dtype=torch.long))
y_te = Variable(torch.tensor(y_test, dtype=torch.long))
def binary_cross_entropy_one_hot(input, target):
return torch.nn.CrossEntropyLoss()(input, target)
def _accuracy(y_pred, y_true):
classes = torch.argmax(y_pred, dim=1)
labels = y_true
accuracy = torch.mean((classes == labels).float())
return accuracy
model = Model(X.shape[1])
learning_rate = 1e-3
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
epochs = 1000
accuracy = 0.0
minibatch = True
# training loop
train_loss = []
for epoch in range(epochs):
if minibatch:
batch_size = 128 # or whatever
permutation = torch.randperm(X_tr.size()[0])
for i in range(0, X_tr.size()[0], batch_size):
optimizer.zero_grad()
indices = permutation[i:i + batch_size]
batch_x, batch_y = X_tr[indices], y_tr[indices]
# in case you wanted a semi-full example
outputs = model.forward(batch_x)
loss = binary_cross_entropy_one_hot(outputs, batch_y)
loss.backward()
optimizer.step()
if epoch % 100 == 0:
print(f'epoch: {epoch:2} loss: {loss:10.8f}')
# train_ds = TensorDataset(X_tr, y_tr)
# train_dl = DataLoader(train_ds, batch_size=256, shuffle=True)
# batch_loss = 0.0
# batch_accuracy = 0.0
# for nb, (x_batch, y_batch) in enumerate(train_dl): # manually set number of batches?
# optimizer.zero_grad()
# y_pred_train = model(x_batch)
# loss = binary_cross_entropy_one_hot(y_pred_train, y_batch)
# loss.backward()
# optimizer.step()
# batch_loss += loss.item()
# batch_accuracy += _accuracy(y_pred_train, y_batch)
# train_loss.append(batch_loss / (nb + 1))
# accuracy = batch_accuracy / (nb + 1)
# if epoch % 100 == 0:
# print(f'epoch: {epoch:2} loss: {train_loss[epoch]:10.8f}')
else:
optimizer.zero_grad()
y_pred = model(X_tr)
# computing the loss function
loss = binary_cross_entropy_one_hot(y_pred, y_tr)
if epoch % 100 == 0:
print(f'epoch: {epoch:2} loss: {loss.item():10.8f}')
loss.backward()
optimizer.step()
accuracy = _accuracy(y_pred, y_tr)
# evaluation on test data
with torch.no_grad():
model.eval()
y_pred = model(X_te)
test_loss = binary_cross_entropy_one_hot(y_pred, y_te)
test_acc = _accuracy(y_pred, y_te)
print("Loss on test data: {:.4}".format(test_loss))
print("Accuracy on test data: {:.4}".format(test_acc))
Time would depend on your input_dim, the size of your dataset, and the number of updates per epoch (// the batch size). From what you've shared with us, I'm not exactly sure what the issue is and if there is actually any bottleneck. However, here are a couple of things I would point out, which might help you (in no particular order):
No need to wrap your data with torch.autograd.Variable. It has been deprecated and is no longer needed, Autograd automatically supports torch.tensors with requires_grad set to True.
If you are using torch.nn.CrossEntropyLoss, you shouldn't use F.softmax on your model's output. That's because CrossEntropyLoss includes nn.LogSoftmax() and nn.NLLLoss(). Also no need to initialize the module each time you want to call it:
criterion = torch.nn.CrossEntropyLoss()
def binary_cross_entropy_one_hot(input, target):
return criterion(input, target)
I see you are redefining your data loader on each epoch. Is that what you really want? If not you can just define it outside the training loop:
train_ds = TensorDataset(X_tr, y_tr)
train_dl = DataLoader(train_ds, batch_size=256, shuffle=True)
for epoch in range(epochs):
for x, y in train_dl:
# ...
I would call .item() on your accuracy (when calling _accuracy) to not keep it attached to the computation graph and release it from memory when it is ready.
trying to classify new text input point into positive negative neutral using pre build model and getting value error.i have a new review which needs to be vectorized using count vectorizer but even after vectorizing the shape doesnt change. it works fine on train and test data set
used logistic regression
bag of words
count vectorization
# Create an object of class CountVectorizer
bow = CountVectorizer()
## X_train.values.shape
# Call the fit_transform method on training data
X_train = bow.fit_transform(X_train_raw.values)
# Call the transform method on the test dataset
X_test = bow.transform(X_test_raw.values)
#perform column standardaiztion
std = StandardScaler(with_mean=False)
X_train = std.fit_transform(X_train)
X_test = std.transform(X_test)
start = time.time()
# creating list of C
C_values = np.linspace(0.1,1,10)
cv_scores = [] # empty list that will hold cv scores
# Try each value of alpha in the below loop
for c in C_values:
# Create an object of the class Logistic Regression with balanced class weights
clf = LogisticRegression(C = c, class_weight = 'balanced')
# perform 5-fold cross validation
# It returns the cv accuracy for each fold in a list
scores = cross_val_score(clf, X_train, y_train, cv=5, scoring='accuracy')
# Store the mean of the accuracies from all the 5 folds
cv_scores.append(scores.mean())
# calculate misclassification error from accuracy (error = 1 - accuracy)
cv_error = [1 - x for x in cv_scores]
# optimal (best) C is the one for which error is minimum (or accuracy is maximum)
optimal_C = C_values[cv_error.index(min(cv_error))]
print('\nThe optimal alpha is', optimal_C)
end = time.time()
print("Total time in minutes = ", (end-start)/60)
clf = LogisticRegression(C = optimal_C)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
acc = accuracy_score(y_test, y_pred) * 100
print("Accuracy =", acc)
confusion_matrix(y_test, y_pred)
def predict_new(X):
# X1 = [[X]]
X2 = [X]
X4= pd.Series(X2)
X3,los = data_cleaning(X4)
#print(X3.values)
X_t = bow.transform(X3)
print(X_t.shape)
#X_t= std.transform(X_t)
#pred = clf.predict(X_t)
pred =0
if pred == 1:
print("Positive")
elif pred == -1:
print("Negative")
else:
print("Neutral")
predict_new('the app is good')
I expected the output to be positive negative or neutral
I was trying to create a model for character recognition.
This model was working fine with 28*28 dataset and for characters from 0-9 but it training accuracy is dropping if changed to 64*64 and characters ranges from 0-9, a-z, A-Z.
While iterating through accuracy it goes till 0.3 and then stays there afterwards. I tried to train with different dataset as well but the same thing is happening.
Changing learning rate to 0.001 also does not help.
Can anyone tell what is the issue with this?
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import random as ran
import os
import tensorflow as tf
def TRAIN_SIZE(num):
images = np.load("data/train/images64.npy").reshape([2852,4096])
labels = np.load("data/train/labels.npy")
print ('Total Training Images in Dataset = ' + str(images.shape))
print ('--------------------------------------------------')
x_train = images[:num,:]
print ('x_train Examples Loaded = ' + str(x_train.shape))
y_train = labels[:num,:]
print ('y_train Examples Loaded = ' + str(y_train.shape))
print('')
return x_train, y_train
def TEST_SIZE(num):
images = np.load("data/test/images64.npy").reshape([558,4096])
labels = np.load("data/test/labels.npy")
print ('Total testing Images in Dataset = ' + str(images.shape))
print ('--------------------------------------------------')
x_test = images[:num,:]
print ('x_test Examples Loaded = ' + str(x_test.shape))
y_test = labels[:num,:]
print ('y_test Examples Loaded = ' + str(y_test.shape))
print('')
return x_test, y_test
def display_digit(num):
# print(y_train[num])
label = y_train[num].argmax(axis=0)
image = x_train[num].reshape([64,64])
# plt.axis("off")
plt.title('Example: %d Label: %d' % (num, label))
plt.imshow(image, cmap=plt.get_cmap('gray_r'))
plt.show()
def display_mult_flat(start, stop):
images = x_train[start].reshape([1,4096])
for i in range(start+1,stop):
images = np.concatenate((images, x_train[i].reshape([1,4096])))
plt.imshow(images, cmap=plt.get_cmap('gray_r'))
plt.show()
def get_char(a):
if(a<10):
return a
elif(a>=10 and a<36):
return chr(a+55)
else:
return chr(a+61)
x_train, y_train = TRAIN_SIZE(2850)
x_test, y_test = TRAIN_SIZE(1900)
x = tf.placeholder(tf.float32, shape=[None, 4096])
y_ = tf.placeholder(tf.float32, shape=[None, 62])
W = tf.Variable(tf.zeros([4096,62]))
b = tf.Variable(tf.zeros([62]))
y = tf.nn.softmax(tf.matmul(x,W) + b)
with tf.Session() as sess:
# x_test = x_test[1400:,:]
# y_test = y_test[1400:,:]
x_test, y_test =TEST_SIZE(400)
LEARNING_RATE = 0.2
TRAIN_STEPS = 1000
sess.run(tf.global_variables_initializer())
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
training = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
for i in range(TRAIN_STEPS+1):
sess.run(training, feed_dict={x: x_train, y_: y_train})
if i%100 == 0:
print('Training Step:' + str(i) + ' Accuracy = ' + str(sess.run(accuracy, feed_dict={x: x_test, y_: y_test})) + ' Loss = ' + str(sess.run(cross_entropy, {x: x_train, y_: y_train})))
savedPath = tf.train.Saver().save(sess, "/tmp/model.ckpt")
print("Model saved at: " ,savedPath)
You are trying to classify 62 different numbers and characters, but use a single fully connected layer to do that. Your model simply has not enough parameters for that task. In other words, you are underfitting the data. So either expand your network by adding parameters (layers) and/or use CNNs, which generally have good performance for image classification tasks.
Try different CNN mode. the model you are using like inception v1, v2,v3 alexnet etc..
Iris dataset classification, network parameters not updating
Hey, i tried to build a classifier with a logistic regression netwrok but my parameters are not updating, my weights,bias,output and cost stay the same can somebody help me? I have no idea why my parameters are not updating how can I solve this? Thank you!
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
df = pd.read_csv('/Users/Laurens-Wissels/Desktop/iris.csv')
x = np.array(df[["sepal_length","sepal_width","petal_length","petal_width"]])
scaler_model = MinMaxScaler()
x = scaler_model.fit_transform(x)
y = df["species"]
def yvalue(y):
if y =="setosa":
return [1,0,0]
elif y == "versicolor":
return [0,1,0]
else:
return [0,0,1]
y = y.apply(yvalue)
y = y.reshape(150,1)
x_train, x_test , y_train,y_test = train_test_split(x,y,test_size=0.3)
print(y_train)
n_features = 4
n_species = 1
traing_epochs = 2000
learning_rate = 0.0001
n_samples = 105
display_step = 50
X = tf.placeholder(tf.float32,[105,n_features])
Y = tf.placeholder(tf.float32,[105,1])
W = tf.Variable(tf.random_normal([n_features,n_species]))
b = tf.Variable(tf.random_normal([1]))
_y = tf.add(tf.matmul(X,W),b)
output = tf.nn.softmax(_y)
cost = tf.reduce_mean(tf.pow(Y - output , 2))/(2*n_samples)
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
for i in range(traing_epochs):
sess.run(optimizer, feed_dict={X: x_train, Y: y_train}) # Take a gradient descent step using our inputs and labels
sess.run(output,feed_dict={X: x_train, Y: y_train})
# That's all! The rest of the cell just outputs debug messages.
# Display logs per epoch step
if (i) % display_step == 0:
cc = sess.run(cost, feed_dict={X: x_train, Y:y_train})
print("_y:",_y)
print("output:",output)
print("w:",sess.run(W, feed_dict={X: x_train, Y:y_train}))
print "Training step:", '%04d' % (i), "cost=",sess.run(cost, feed_dict={X: x_train, Y:y_train}) #, \"W=", sess.run(W), "b=", sess.run(b)
print("-------------------------------------")
plotData.append(sess.run(cost, feed_dict={X: x_train, Y:y_train}) )
print "Optimization Finished!"
training_cost = sess.run(cost, feed_dict={X: x_train, Y:y_train})
print "Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b), '\n'
plt.plot(plotData)
plt.show()
tf.nn.softmax divides by the sum of the exponentiated elements (see the expression in the docs). If you only have one element in the dimension being summed over (last by default):
print(_y.shape)
(105, 1)
Then you end up with exp(x) / sum(exp(x)), which is a constant 1. So the gradient is 0 and therefore no training.
You could switch to tf.nn.sigmoid.