There are 1875 features in data, which are correlated with loan records of people. Some of them have been used for score card, ks=27.
I wanted to use these features in neural networks to determine whether a person is good or bad. However,it turned out all people were defined bad or good without selectivity, even I employed the method of'#imbalance data' (as shown in the following code).
Is there some problem in my code(activation function?),can someone give some tips? thanks in advance!
from sas7bdat import SAS7BDAT
import pandas as pd
import numpy as np
from sklearn.utils import shuffle
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix
f=SAS7BDAT('data_10212102.sas7bdat')
data=f.to_data_frame()
#drop some time freatures
drop_cols = [col for col in data.columns if 'TIME' in col]
data=data.drop(drop_cols,axis=1)
data.loc[data.TARGET == 0,'Good'] =1
data.loc[data.TARGET == 1,'Good'] =0
data = data.rename(columns={'TARGET':'Bad'})
validation =data.ix[data.SETID==3,:]
train_test_data = data.loc[~data.index.isin(validation.index)]
X_train = train_test_data.ix[train_test_data.SETID==1,:]
X_test = train_test_data.ix[train_test_data.SETID==2,:]
X_train = shuffle(X_train)
X_test = shuffle(X_test)
X_validation = shuffle(validation)
Y_train = X_train.Bad
Y_train = pd.concat([Y_train,X_train.Good],axis=1)
Y_test = X_test.Bad
Y_test = pd.concat([Y_test,X_test.Good],axis=1)
Y_validation = X_validation.Bad
Y_validation = pd.concat([Y_validation,X_validation.Good],axis=1)
ratio = len(X_train)/len(X_train.ix[X_train.Bad==1,:])
X_train = X_train.drop(['Good','Bad'],axis=1)
X_test = X_test.drop(['Good','Bad'],axis=1)
X_validation = X_validation.drop(['Good','Bad'],axis=1)
#imbalance data
Y_train.Bad *= ratio
Y_test.Bad *=ratio
Y_validation.Bad *= ratio
#parameters
learning_rate = 0.001
training_epochs = 2000
batch_size = 512
display_step = 500
n_samples = X_train.shape[0]
n_features = 1845
n_class = 2
x = tf.placeholder(tf.float32, [None, n_features])
y = tf.placeholder(tf.float32, [None, n_class])
n_units =2048
n_layers =7
W={}
b={}
for i in range(n_layers):
if i==0:
W[i] = tf.Variable(tf.random_normal([n_features, n_units]))
b[i] = tf.Variable(tf.random_normal([n_units]))
pred = tf.nn.sigmoid(tf.matmul(x, W[i]) + b[i])
elif 0<i<n_layers-1:
W[i] = tf.Variable(tf.random_normal([n_units, n_units]))
b[i] = tf.Variable(tf.random_normal([n_units]))
pred = tf.nn.sigmoid(tf.matmul(pred, W[i]) + b[i])
else:
W[i] = tf.Variable(tf.random_normal([n_units, n_class]))
b[i] = tf.Variable(tf.random_normal([n_class]))
pred = tf.nn.softmax(tf.matmul(pred, W[i]) + b[i])
cost = -tf.reduce_sum(y * tf.log(pred))
optimizer =
tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
init = tf.global_variables_initializer()
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(init)
for epoch in range(training_epochs):
avg_cost = 0
total_batch = int(n_samples / batch_size)
for i in range(total_batch):
_, c = sess.run([optimizer, cost],
feed_dict={x: X_train[i * batch_size : (i+1) * batch_size],
y: Y_train[i * batch_size : (i+1) * batch_size]
})
avg_cost = c / total_batch
plt.plot(epoch+1, avg_cost, 'co')
if (epoch+1) % display_step == 0:
print("Epoch:", "%04d" % (epoch+1), "avg_cost=",avg_cost )
print("Training Accuracy:", accuracy.eval({x: X_train, y:Y_train}))
print("Testing Accuracy:", accuracy.eval({x: X_test, y:Y_test}))
print("Validating Accuracy:", accuracy.eval({x: X_validation, y:Y_validation}))
train_prob =pred.eval(feed_dict={x: X_train})
train_predict=tf.argmax(train_prob,1).eval()
print("train:",confusion_matrix(1-Y_train['Good'],train_predict))
test_prob = pred.eval(feed_dict={x: X_test})
test_predict=tf.argmax(test_prob,1).eval()
print("test:",confusion_matrix(1-Y_test['Good'],test_predict))
validation_prob = pred.eval(feed_dict={x: X_validation})
validation_predict=tf.argmax(validation_prob,1).eval()
print("validation:",confusion_matrix(1-Y_validation['Good'],validation_predict))
print("Optimization Finished!")
#Save the variables to disk
save_path = saver.save(sess,"./model_v0.ckpt")
print("Model saved in file: %s" %save_path)
plt.xlabel("Epoch")
plt.ylabel("Cost")
plt.show()
Related
When training my model on the adult income data set and using minibatches training is very slow regardless if I use PyTorch's DataLoader or a basic implementation for minibatch training.
Is there a problem with my code or is there another way to speed up training for the adult income data set? I want to use one-hot encoding and cross-entropy loss + softmax. Do I have to use a different loss function or remove the softmax layer?
import pandas as pd
from pandas import read_csv
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import KBinsDiscretizer
from sklearn.model_selection import train_test_split
import torch
from torch.autograd import Variable
from torch.utils.data import DataLoader, Dataset, TensorDataset
import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
import warnings
warnings.filterwarnings('ignore')
device = torch.device("cpu")
class Model(nn.Module):
def __init__(self, input_dim):
super(Model, self).__init__()
self.layer1 = nn.Linear(input_dim, 12)
self.layer2 = nn.Linear(12, 2)
def forward(self, x):
x = F.sigmoid(self.layer1(x))
x = F.softmax(self.layer2(x)) # To check with the loss function
return x
# load dataset
filename = './datasets/adult-all.csv'
dataframe = read_csv(filename, header=None, na_values='?')
# drop rows with missing
dataframe = dataframe.dropna()
# summarize the class distribution
target = dataframe.values[:, -1]
# split into inputs and outputs
last_ix = len(dataframe.columns) - 1
X_, y = dataframe.drop(last_ix, axis=1), dataframe[last_ix]
# select categorical and numerical features
cat_ix = X_.select_dtypes(include=['object', 'bool']).columns
num_ix = X_.select_dtypes(include=['int64', 'float64']).columns
# label encode the target variable to have the classes 0 and 1
y = LabelEncoder().fit_transform(y)
# one-hot encoding of categorical features
df_cat = pd.get_dummies(X_[cat_ix])
# binning of numerical features
x = X_.drop(columns=cat_ix, axis=1)
est = KBinsDiscretizer(n_bins=3, encode='onehot-dense', strategy='uniform')
df_num = est.fit_transform(x)
X = pd.concat([df_cat.reset_index(drop=True), pd.DataFrame(df_num).reset_index(drop=True)], axis=1)
# split training and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
X_tr = Variable(torch.tensor(X_train.values, dtype=torch.float))
X_te = Variable(torch.tensor(X_test.values, dtype=torch.float))
y_tr = Variable(torch.tensor(y_train, dtype=torch.long))
y_te = Variable(torch.tensor(y_test, dtype=torch.long))
def binary_cross_entropy_one_hot(input, target):
return torch.nn.CrossEntropyLoss()(input, target)
def _accuracy(y_pred, y_true):
classes = torch.argmax(y_pred, dim=1)
labels = y_true
accuracy = torch.mean((classes == labels).float())
return accuracy
model = Model(X.shape[1])
learning_rate = 1e-3
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
epochs = 1000
accuracy = 0.0
minibatch = True
# training loop
train_loss = []
for epoch in range(epochs):
if minibatch:
batch_size = 128 # or whatever
permutation = torch.randperm(X_tr.size()[0])
for i in range(0, X_tr.size()[0], batch_size):
optimizer.zero_grad()
indices = permutation[i:i + batch_size]
batch_x, batch_y = X_tr[indices], y_tr[indices]
# in case you wanted a semi-full example
outputs = model.forward(batch_x)
loss = binary_cross_entropy_one_hot(outputs, batch_y)
loss.backward()
optimizer.step()
if epoch % 100 == 0:
print(f'epoch: {epoch:2} loss: {loss:10.8f}')
# train_ds = TensorDataset(X_tr, y_tr)
# train_dl = DataLoader(train_ds, batch_size=256, shuffle=True)
# batch_loss = 0.0
# batch_accuracy = 0.0
# for nb, (x_batch, y_batch) in enumerate(train_dl): # manually set number of batches?
# optimizer.zero_grad()
# y_pred_train = model(x_batch)
# loss = binary_cross_entropy_one_hot(y_pred_train, y_batch)
# loss.backward()
# optimizer.step()
# batch_loss += loss.item()
# batch_accuracy += _accuracy(y_pred_train, y_batch)
# train_loss.append(batch_loss / (nb + 1))
# accuracy = batch_accuracy / (nb + 1)
# if epoch % 100 == 0:
# print(f'epoch: {epoch:2} loss: {train_loss[epoch]:10.8f}')
else:
optimizer.zero_grad()
y_pred = model(X_tr)
# computing the loss function
loss = binary_cross_entropy_one_hot(y_pred, y_tr)
if epoch % 100 == 0:
print(f'epoch: {epoch:2} loss: {loss.item():10.8f}')
loss.backward()
optimizer.step()
accuracy = _accuracy(y_pred, y_tr)
# evaluation on test data
with torch.no_grad():
model.eval()
y_pred = model(X_te)
test_loss = binary_cross_entropy_one_hot(y_pred, y_te)
test_acc = _accuracy(y_pred, y_te)
print("Loss on test data: {:.4}".format(test_loss))
print("Accuracy on test data: {:.4}".format(test_acc))
Time would depend on your input_dim, the size of your dataset, and the number of updates per epoch (// the batch size). From what you've shared with us, I'm not exactly sure what the issue is and if there is actually any bottleneck. However, here are a couple of things I would point out, which might help you (in no particular order):
No need to wrap your data with torch.autograd.Variable. It has been deprecated and is no longer needed, Autograd automatically supports torch.tensors with requires_grad set to True.
If you are using torch.nn.CrossEntropyLoss, you shouldn't use F.softmax on your model's output. That's because CrossEntropyLoss includes nn.LogSoftmax() and nn.NLLLoss(). Also no need to initialize the module each time you want to call it:
criterion = torch.nn.CrossEntropyLoss()
def binary_cross_entropy_one_hot(input, target):
return criterion(input, target)
I see you are redefining your data loader on each epoch. Is that what you really want? If not you can just define it outside the training loop:
train_ds = TensorDataset(X_tr, y_tr)
train_dl = DataLoader(train_ds, batch_size=256, shuffle=True)
for epoch in range(epochs):
for x, y in train_dl:
# ...
I would call .item() on your accuracy (when calling _accuracy) to not keep it attached to the computation graph and release it from memory when it is ready.
I'll keep this short and sweet, i'm trying to use tensorflow for forex learning, and any time I run my code I got from a youtube tutorial, I get 0 output. It just says In again. Can someone help me out? I have code below.
I have tried changing variables, simplifying code, everything.
import tensorflow as tf
import numpy
import pandas as pd
import matplotlib.pyplot as plt
rng = numpy.random
data = pd.read_csv("/Users/adamh/OneDrive/Desktop/data.csv")
server_time = data['server_time'].values
bid = data['bid'].values
ask = data['ask'].values
#hyperparameters
learning_rate = 0.01
training_epochs = 10000
#parameter
display_step = 50
train_X = numpy.asarray(server_time)
train_Y = numpy.asarray(ask)
n_samples = train_X.shape[0]
X = tf.placeholder('float32')
Y = tf.placeholder('float32')
W = tf.Variable(rng.randn(),name = "Weight")
b = tf.Variable(rng.randn(), name = 'bias')
pred = tf.add(tf.multiply(X,W),b)
error = tf.reduce_sum(tf.pow(pred-(Y+Y2),2))/(2*n_samples)
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(error)
init = tf.global_variables_initializer()
# Start training
with tf.Session() as sess:
# Run the initializer
sess.run(init)
# Fit all training data
for epoch in range(training_epochs):
for (x, y) in zip(train_X, train_Y):
sess.run(optimizer, feed_dict={X: x, Y: y})
# Display logs per epoch step
if (epoch+1) % display_step == 0:
c = sess.run(error, feed_dict={X: train_X, Y:train_Y})
print("Epoch:", '%04d' % (epoch+1), "error=", "{:.9f}".format(c), \
"W=", sess.run(W), "b=", sess.run(b))
print("Optimization Finished!")
training_error = sess.run(error, feed_dict={X: train_X, Y: train_Y})
print("Training error=", training_error, "W=", sess.run(W), "b=", sess.run(b), '\n')
# Graphic display
plt.plot(train_X, train_Y, 'ro', label='Original data')
plt.plot(train_X, sess.run(W) * train_X + sess.run(b), label='Fitted line')
plt.legend()
plt.show()
# Testing example, as requested (Issue #2)
test_X = numpy.asarray([2,4,6,8,10])
test_Y = numpy.asarray([25,23,21,19,17])
print("Testing... (Mean square loss Comparison)")
testing_error = sess.run(
tf.reduce_sum(tf.pow(pred - (Y), 2)) / (2 * test_X.shape[0]),
feed_dict={X: test_X, Y: test_Y}) # same function as cost above
print("Testing error=", testing_error)
print("Absolute mean square loss difference:", abs(
training_error - testing_error))
plt.plot(test_X, test_Y, 'bo', label='Testing data')
plt.plot(train_X, sess.run(W) * train_X + sess.run(b), label='Fitted line')
plt.legend()
plt.show()
No output is given.
Following the Pytorch Transfer learning tutorial, I am interested in reporting only train and test accuracy as well as confusion matrix (say using sklearn confusionmatrix). How can I do that? The current tutorial only reports train/val accuracy and I am having hard time figuring how to incorporate the sklearn confusionmatrix code there. Link to original tutorial here: https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html
%matplotlib inline
from graphviz import Digraph
import torch
from torch.autograd import Variable
# Author: Sasank Chilamkurthy
from __future__ import print_function, division
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
plt.ion()
data_transforms = {
'train': transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
'val': transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
}
data_dir = "images"
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
data_transforms[x])
for x in ['train', 'val']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
shuffle=True, num_workers=4)
for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def imshow(inp, title=None):
"""Imshow for Tensor."""
inp = inp.numpy().transpose((1, 2, 0))
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])
inp = std * inp + mean
inp = np.clip(inp, 0, 1)
plt.imshow(inp)
if title is not None:
plt.title(title)
plt.pause(0.001) # pause a bit so that plots are updated
# Get a batch of training data
inputs, classes = next(iter(dataloaders['train']))
# Make a grid from batch
out = torchvision.utils.make_grid(inputs)
imshow(out, title=[class_names[x] for x in classes])
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
since = time.time()
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['train', 'val']:
if phase == 'train':
scheduler.step()
model.train() # Set model to training mode
else:
model.eval() # Set model to evaluate mode
running_loss = 0.0
running_corrects = 0
# Iterate over data.
for inputs, labels in dataloaders[phase]:
inputs = inputs.to(device)
labels = labels.to(device)
# zero the parameter gradients
optimizer.zero_grad()
# forward
# track history if only in train
with torch.set_grad_enabled(phase == 'train'):
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
loss = criterion(outputs, labels)
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
# statistics
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == labels.data)
epoch_loss = running_loss / dataset_sizes[phase]
epoch_acc = running_corrects.double() / dataset_sizes[phase]
print('{} Loss: {:.4f} Acc: {:.4f}'.format(
phase, epoch_loss, epoch_acc))
# deep copy the model
if phase == 'val' and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_wts = copy.deepcopy(model.state_dict())
print()
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))
# load best model weights
model.load_state_dict(best_model_wts)
return model
def visualize_model(model, num_images=6):
was_training = model.training
model.eval()
images_so_far = 0
fig = plt.figure()
with torch.no_grad():
for i, (inputs, labels) in enumerate(dataloaders['val']):
inputs = inputs.to(device)
labels = labels.to(device)
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
for j in range(inputs.size()[0]):
images_so_far += 1
ax = plt.subplot(num_images//2, 2, images_so_far)
ax.axis('off')
ax.set_title('predicted: {}'.format(class_names[preds[j]]))
imshow(inputs.cpu().data[j])
if images_so_far == num_images:
model.train(mode=was_training)
return
model.train(mode=was_training)
model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 9)
model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
num_epochs=25)
visualize_model(model_ft)
Answer given by ptrblck of PyTorch community. Thanks a lot!
nb_classes = 9
confusion_matrix = torch.zeros(nb_classes, nb_classes)
with torch.no_grad():
for i, (inputs, classes) in enumerate(dataloaders['val']):
inputs = inputs.to(device)
classes = classes.to(device)
outputs = model_ft(inputs)
_, preds = torch.max(outputs, 1)
for t, p in zip(classes.view(-1), preds.view(-1)):
confusion_matrix[t.long(), p.long()] += 1
print(confusion_matrix)
To get the per-class accuracy:
print(confusion_matrix.diag()/confusion_matrix.sum(1))
Here is a slightly modified(direct) approach using sklearn's confusion_matrix:-
from sklearn.metrics import confusion_matrix
nb_classes = 9
# Initialize the prediction and label lists(tensors)
predlist=torch.zeros(0,dtype=torch.long, device='cpu')
lbllist=torch.zeros(0,dtype=torch.long, device='cpu')
with torch.no_grad():
for i, (inputs, classes) in enumerate(dataloaders['val']):
inputs = inputs.to(device)
classes = classes.to(device)
outputs = model_ft(inputs)
_, preds = torch.max(outputs, 1)
# Append batch prediction results
predlist=torch.cat([predlist,preds.view(-1).cpu()])
lbllist=torch.cat([lbllist,classes.view(-1).cpu()])
# Confusion matrix
conf_mat=confusion_matrix(lbllist.numpy(), predlist.numpy())
print(conf_mat)
# Per-class accuracy
class_accuracy=100*conf_mat.diagonal()/conf_mat.sum(1)
print(class_accuracy)
Follwing the answer above... Here is an answer with some visualization
nb_classes = 9
confusion_matrix = np.zeros((nb_classes, nb_classes))
with torch.no_grad():
for i, (inputs, classes) in enumerate(test_loader):
inputs = inputs.to(DEVICE)
classes = classes.to(DEVICE)
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
for t, p in zip(classes.view(-1), preds.view(-1)):
confusion_matrix[t.long(), p.long()] += 1
plt.figure(figsize=(15,10))
class_names = list(label2class.values())
df_cm = pd.DataFrame(confusion_matrix, index=class_names, columns=class_names).astype(int)
heatmap = sns.heatmap(df_cm, annot=True, fmt="d")
heatmap.yaxis.set_ticklabels(heatmap.yaxis.get_ticklabels(), rotation=0, ha='right',fontsize=15)
heatmap.xaxis.set_ticklabels(heatmap.xaxis.get_ticklabels(), rotation=45, ha='right',fontsize=15)
plt.ylabel('True label')
plt.xlabel('Predicted label')
;
Another simple way to get accuracy is to use sklearns "accuracy_score".
Heres an example:
from sklearn.metrics import accuracy_score
y_pred = y_pred.data.numpy()
accuracy = accuracy_score(labels, np.argmax(y_pred, axis=1))
First you need to get the data from the variable.
"y_pred" is the predictions from your model, and labels are of course your labels.
np.argmax returns the index of the largest value inside the array. We want the largest value as it corresponds to the highest probability class when using softmax for multi-class classification. Accuracy score will return a percentage of matches between the labels and y_pred.
I used the following to convert the torch tensors to an int defining the predicted class.
x = [torch.max(tensor).item() for tensor in x_data]
y = [torch.max(tensor).item() for tensor in y_data]
i hope this helps! i'm still a noob so please be gentle...
I was trying to create a model for character recognition.
This model was working fine with 28*28 dataset and for characters from 0-9 but it training accuracy is dropping if changed to 64*64 and characters ranges from 0-9, a-z, A-Z.
While iterating through accuracy it goes till 0.3 and then stays there afterwards. I tried to train with different dataset as well but the same thing is happening.
Changing learning rate to 0.001 also does not help.
Can anyone tell what is the issue with this?
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import random as ran
import os
import tensorflow as tf
def TRAIN_SIZE(num):
images = np.load("data/train/images64.npy").reshape([2852,4096])
labels = np.load("data/train/labels.npy")
print ('Total Training Images in Dataset = ' + str(images.shape))
print ('--------------------------------------------------')
x_train = images[:num,:]
print ('x_train Examples Loaded = ' + str(x_train.shape))
y_train = labels[:num,:]
print ('y_train Examples Loaded = ' + str(y_train.shape))
print('')
return x_train, y_train
def TEST_SIZE(num):
images = np.load("data/test/images64.npy").reshape([558,4096])
labels = np.load("data/test/labels.npy")
print ('Total testing Images in Dataset = ' + str(images.shape))
print ('--------------------------------------------------')
x_test = images[:num,:]
print ('x_test Examples Loaded = ' + str(x_test.shape))
y_test = labels[:num,:]
print ('y_test Examples Loaded = ' + str(y_test.shape))
print('')
return x_test, y_test
def display_digit(num):
# print(y_train[num])
label = y_train[num].argmax(axis=0)
image = x_train[num].reshape([64,64])
# plt.axis("off")
plt.title('Example: %d Label: %d' % (num, label))
plt.imshow(image, cmap=plt.get_cmap('gray_r'))
plt.show()
def display_mult_flat(start, stop):
images = x_train[start].reshape([1,4096])
for i in range(start+1,stop):
images = np.concatenate((images, x_train[i].reshape([1,4096])))
plt.imshow(images, cmap=plt.get_cmap('gray_r'))
plt.show()
def get_char(a):
if(a<10):
return a
elif(a>=10 and a<36):
return chr(a+55)
else:
return chr(a+61)
x_train, y_train = TRAIN_SIZE(2850)
x_test, y_test = TRAIN_SIZE(1900)
x = tf.placeholder(tf.float32, shape=[None, 4096])
y_ = tf.placeholder(tf.float32, shape=[None, 62])
W = tf.Variable(tf.zeros([4096,62]))
b = tf.Variable(tf.zeros([62]))
y = tf.nn.softmax(tf.matmul(x,W) + b)
with tf.Session() as sess:
# x_test = x_test[1400:,:]
# y_test = y_test[1400:,:]
x_test, y_test =TEST_SIZE(400)
LEARNING_RATE = 0.2
TRAIN_STEPS = 1000
sess.run(tf.global_variables_initializer())
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
training = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
for i in range(TRAIN_STEPS+1):
sess.run(training, feed_dict={x: x_train, y_: y_train})
if i%100 == 0:
print('Training Step:' + str(i) + ' Accuracy = ' + str(sess.run(accuracy, feed_dict={x: x_test, y_: y_test})) + ' Loss = ' + str(sess.run(cross_entropy, {x: x_train, y_: y_train})))
savedPath = tf.train.Saver().save(sess, "/tmp/model.ckpt")
print("Model saved at: " ,savedPath)
You are trying to classify 62 different numbers and characters, but use a single fully connected layer to do that. Your model simply has not enough parameters for that task. In other words, you are underfitting the data. So either expand your network by adding parameters (layers) and/or use CNNs, which generally have good performance for image classification tasks.
Try different CNN mode. the model you are using like inception v1, v2,v3 alexnet etc..
Iris dataset classification, network parameters not updating
Hey, i tried to build a classifier with a logistic regression netwrok but my parameters are not updating, my weights,bias,output and cost stay the same can somebody help me? I have no idea why my parameters are not updating how can I solve this? Thank you!
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
df = pd.read_csv('/Users/Laurens-Wissels/Desktop/iris.csv')
x = np.array(df[["sepal_length","sepal_width","petal_length","petal_width"]])
scaler_model = MinMaxScaler()
x = scaler_model.fit_transform(x)
y = df["species"]
def yvalue(y):
if y =="setosa":
return [1,0,0]
elif y == "versicolor":
return [0,1,0]
else:
return [0,0,1]
y = y.apply(yvalue)
y = y.reshape(150,1)
x_train, x_test , y_train,y_test = train_test_split(x,y,test_size=0.3)
print(y_train)
n_features = 4
n_species = 1
traing_epochs = 2000
learning_rate = 0.0001
n_samples = 105
display_step = 50
X = tf.placeholder(tf.float32,[105,n_features])
Y = tf.placeholder(tf.float32,[105,1])
W = tf.Variable(tf.random_normal([n_features,n_species]))
b = tf.Variable(tf.random_normal([1]))
_y = tf.add(tf.matmul(X,W),b)
output = tf.nn.softmax(_y)
cost = tf.reduce_mean(tf.pow(Y - output , 2))/(2*n_samples)
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
for i in range(traing_epochs):
sess.run(optimizer, feed_dict={X: x_train, Y: y_train}) # Take a gradient descent step using our inputs and labels
sess.run(output,feed_dict={X: x_train, Y: y_train})
# That's all! The rest of the cell just outputs debug messages.
# Display logs per epoch step
if (i) % display_step == 0:
cc = sess.run(cost, feed_dict={X: x_train, Y:y_train})
print("_y:",_y)
print("output:",output)
print("w:",sess.run(W, feed_dict={X: x_train, Y:y_train}))
print "Training step:", '%04d' % (i), "cost=",sess.run(cost, feed_dict={X: x_train, Y:y_train}) #, \"W=", sess.run(W), "b=", sess.run(b)
print("-------------------------------------")
plotData.append(sess.run(cost, feed_dict={X: x_train, Y:y_train}) )
print "Optimization Finished!"
training_cost = sess.run(cost, feed_dict={X: x_train, Y:y_train})
print "Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b), '\n'
plt.plot(plotData)
plt.show()
tf.nn.softmax divides by the sum of the exponentiated elements (see the expression in the docs). If you only have one element in the dimension being summed over (last by default):
print(_y.shape)
(105, 1)
Then you end up with exp(x) / sum(exp(x)), which is a constant 1. So the gradient is 0 and therefore no training.
You could switch to tf.nn.sigmoid.