I have bayesian network code to train mnist dataset like this:
import edward as ed
import tensorflow as tf
from edward.models import Normal
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
n_nodes_hl1 = 500
n_nodes_hl2 = 500
n_nodes_hl3 = 500
n_classes = 10
batch_size = 100
x_ = tf.placeholder('float', [None, 784])
y_ = tf.placeholder('float', shape=(batch_size))
# def neural_network_model(data):
w_h1 = Normal(loc=tf.zeros([784, n_nodes_hl1]), scale=tf.ones([784, n_nodes_hl1]))
w_h2 = Normal(loc=tf.zeros([n_nodes_hl1, n_nodes_hl2]), scale=tf.ones([n_nodes_hl1, n_nodes_hl2]))
w_h3 = Normal(loc=tf.zeros([n_nodes_hl2, n_nodes_hl3]), scale=tf.ones([n_nodes_hl2, n_nodes_hl3]))
w_o = Normal(loc=tf.zeros([n_nodes_hl3, n_classes]), scale=tf.ones([n_nodes_hl3, n_classes]))
b_h1 = Normal(loc=tf.zeros([n_nodes_hl1]), scale=tf.ones([n_nodes_hl1]))
b_h2 = Normal(loc=tf.zeros([n_nodes_hl2]), scale=tf.ones([n_nodes_hl2]))
b_h3 = Normal(loc=tf.zeros([n_nodes_hl3]), scale=tf.ones([n_nodes_hl3]))
b_o = Normal(loc=tf.zeros([n_classes]), scale=tf.ones([n_classes]))
y_pre = Normal(tf.matmul(x_, w_o) + b_o, scale=1.0)
qw_h1 = Normal(loc=tf.Variable(tf.random_normal([784, n_nodes_hl1])),
scale=tf.Variable(tf.random_normal([784, n_nodes_hl1])))
qw_h2 = Normal(loc=tf.Variable(tf.random_normal([n_nodes_hl1, n_nodes_hl2])),
scale=tf.Variable(tf.random_normal([n_nodes_hl1, n_nodes_hl2])))
qw_h3 = Normal(loc=tf.Variable(tf.random_normal([n_nodes_hl2, n_nodes_hl3])),
scale=tf.Variable(tf.random_normal([n_nodes_hl2, n_nodes_hl3])))
qw_o = Normal(loc=tf.Variable(tf.random_normal([n_nodes_hl3, n_classes])),
scale=tf.Variable(tf.random_normal([n_nodes_hl3, n_classes])))
qb_h1 = Normal(loc=tf.Variable(tf.random_normal([n_nodes_hl1])), scale=tf.Variable(tf.random_normal([n_nodes_hl1])))
qb_h2 = Normal(loc=tf.Variable(tf.random_normal([n_nodes_hl2])), scale=tf.Variable(tf.random_normal([n_nodes_hl2])))
qb_h3 = Normal(loc=tf.Variable(tf.random_normal([n_nodes_hl3])), scale=tf.Variable(tf.random_normal([n_nodes_hl3])))
qb_o = Normal(loc=tf.Variable(tf.random_normal([n_classes])), scale=tf.Variable(tf.random_normal([n_classes])))
y = Normal(tf.matmul(x_, qw_o) + qb_o, scale=1.0)
inference = ed.KLqp({w_h1: qw_h1, b_h1: qb_h1,
w_h2: qw_h2, b_h2: qb_h2,
w_h3: qw_h3, b_h3: qb_h3,
w_o: qw_o, b_o: qb_o, }, data={y_pre: y_})
inference.initialize()
sess = tf.Session()
sess.run(tf.global_variables_initializer())
hm_epochs = 10
with sess:
for epoch in range(hm_epochs):
epoch_loss = 0
for _ in range(int(mnist.train.num_examples / batch_size)):
epoch_x, epoch_y = mnist.train.next_batch(batch_size)
inference.update(feed_dict={x_: epoch_x, y_: epoch_y})
I make it by combine neural network from pythonprogramming.net tutorial and bayesian network from this link. But, there is error:
ValueError: Dimensions must be equal, but are 784 and 500 for 'MatMul'
(op: 'MatMul') with input shapes: [?,784], [500,10].
So, what is the meaning of error above? How to solve it?
Related
After training the model, I save it and load to make some tests. But every time I reload the model I get a different accuracy and results with the exactly same input data. After training the model I print the accuracy and it always gets a nice value (0.8 ~ 0.9), but when I reload it goes down to something like (0.1 ~ 0.5) - I dont know if it is something related to the problem btw thats weird.
import tensorflow as tf
import numpy as np
import json
n_nodes_hl1 = 1600
n_nodes_hl2 = 800
n_nodes_hl3 = 400
n_nodes_hl4 = 200
n_classes = 4
batch_size = 50
input_lenght = 65
x = tf.placeholder('float', [None, input_lenght])
y = tf.placeholder('float')
def train_network(x):
prediction = neural_network_model(x)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=prediction, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=0.00001).minimize(cost)
hm_epochs = 20000
saver = tf.train.Saver()
init_op = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init_op)
epoch = 0
for epoch in range(hm_epochs):
epoch_cost = 0
i = 0
while i < len(train_x):
start = i
end = i + batch_size
batch_x = np.array(train_x[start:end])
batch_y = np.array(train_y[start:end])
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y})
epoch_cost += c
i += batch_size
save_path = saver.save(sess, "drive/My Drive/datasets/tensorflow/model")
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
print("accuracy:", accuracy.eval({x: test_x, y: test_y}, session=sess))
sess.close()
def group_test_train(features_data, labels_data, test_size):
featureset = []
for i in range(test_size):
featureset += [[features_data[i], labels_data[i]]]
featureset = np.array(featureset)
np.random.shuffle(featureset)
train_x = list(featureset[:, 0][:test_size // 2])
train_y = list(featureset[:, 1][:test_size // 2])
test_x = list(featureset[:, 0][test_size // 2:])
test_y = list(featureset[:, 1][test_size // 2:])
return train_x, train_y, test_x, test_y
def neural_network_model(data):
hidden1 = {'weights': tf.Variable(tf.random_uniform([input_lenght, n_nodes_hl1], -1, 1)),
'biases': tf.Variable(tf.random_normal([n_nodes_hl1]))
}
hidden2 = {'weights': tf.Variable(tf.random_uniform([n_nodes_hl1, n_nodes_hl2], -1, 1)),
'biases': tf.Variable(tf.random_normal([n_nodes_hl2]))
}
hidden3 = {'weights': tf.Variable(tf.random_uniform([n_nodes_hl2, n_nodes_hl3], -1, 1)),
'biases': tf.Variable(tf.random_normal([n_nodes_hl3]))
}
hidden4 = {'weights': tf.Variable(tf.random_uniform([n_nodes_hl3, n_nodes_hl4], -1, 1)),
'biases': tf.Variable(tf.random_normal([n_nodes_hl4]))
}
l_output = {'weights': tf.Variable(tf.random_uniform([n_nodes_hl4, n_classes], -1, 1)),
'biases': tf.Variable(tf.random_normal([n_classes]))
}
l1 = tf.add(tf.matmul(data, hidden1['weights']), hidden1['biases'])
l1 = tf.nn.relu(l1)
l2 = tf.add(tf.matmul(l1, hidden2['weights']), hidden2['biases'])
l2 = tf.nn.relu(l2)
l3 = tf.add(tf.matmul(l2, hidden3['weights']), hidden3['biases'])
l3 = tf.nn.relu(l3)
l4 = tf.add(tf.matmul(l3, hidden4['weights']), hidden4['biases'])
l4 = tf.nn.relu(l4)
output = tf.add(tf.matmul(l4, l_output['weights']), l_output['biases'])
return output
version = 'end'
with open('drive/My Drive/datasets/json/' + 'data-'+ version +'.json') as json_file:
x_, y_ = json.load(json_file)
train_x, train_y, test_x, test_y = group_test_train(x_, y_, len(x_) )
train_network(x)
Every time I run this part down bellow the accuracy changes and the output as well.
prediction = neural_network_model(x)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=prediction, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=0.00001).minimize(cost)
init_op = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init_op)
new_saver = tf.train.import_meta_graph('drive/My Drive/datasets/tensorflow/model.meta')
new_saver.restore(sess, tf.train.latest_checkpoint('drive/My Drive/datasets/tensorflow/'))
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
print("accuracy:", accuracy.eval({x: train_x, y: train_y}, session=sess))
I wrote two version code for minst data set in tensorflow. The first one is just similar to the example code with input[None,784]
However, the second piece is what i change. I just modify the input scale to [784,None] and change some corresponding matrix format.
I am confused with the difference in result. I am a beginner for tensorflow. Really wanna your help.
Thanks
import tensorflow as tf
import tensorflow.examples.tutorials.mnist.input_data as input_data
mnist = input_data.read_data_sets(r"C:\Ruigy\NN\minst", one_hot=True)
def hidden_layer(X, sizeOutput, non_linear_name = ''):
sizeInput = X.shape[1]
W = tf.Variable(tf.zeros([sizeInput,sizeOutput]))
B = tf.Variable(tf.zeros([sizeOutput]))
Y = tf.matmul(X,W) + B
if non_linear_name == '': return Y
elif non_linear_name == 'softmax': A = tf.nn.softmax(Y)
elif non_linear_name == 'ReLU': A = tf.nn.relu(Y)
return A
X = tf.placeholder(tf.float32, [None, 784],name = 'Input')
Y_LABEL = tf.placeholder(tf.float32, [None, 10], name = 'Label')
Y_linear= hidden_layer(X,Y_LABEL.shape[1])
cross_entropy = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=Y_LABEL,
logits=Y_linear))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()
for _ in range(1000):
batch_xs, batch_ys = mnist.train.next_batch(100)
sess.run(train_step, feed_dict={X: batch_xs, Y_LABEL: batch_ys})
correct_prediction = tf.equal(tf.argmax(Y_linear, 1), tf.argmax(Y_LABEL, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(sess.run(accuracy, feed_dict={X: mnist.test.images, Y_LABEL: mnist.test.labels}))
The result is 0.9188
import tensorflow as tf
import tensorflow.examples.tutorials.mnist.input_data as input_data
mnist = input_data.read_data_sets(r"C:\Ruigy\NN\minst", one_hot=True)
def hidden_layer(X, sizeOutput, non_linear_name = ''):
sizeInput = X.shape[0]
W = tf.Variable(tf.zeros([sizeInput,sizeOutput]))
B = tf.Variable(tf.zeros([sizeOutput,1]))
Y = tf.matmul(W,X,True) + B
if non_linear_name == '': return Y
elif non_linear_name == 'softmax': A = tf.nn.softmax(Y)
elif non_linear_name == 'ReLU': A = tf.nn.relu(Y)
return A
X = tf.placeholder(tf.float32, [784,None],name = 'Input')
Y_LABEL = tf.placeholder(tf.float32, [10,None], name = 'Label')
Y_linear= hidden_layer(X,Y_LABEL.shape[0])
cross_entropy = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=Y_LABEL,
logits=Y_linear))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()
for _ in range(1000):
batch_xs, batch_ys = mnist.train.next_batch(100)
sess.run(train_step, feed_dict={X: batch_xs.T, Y_LABEL: batch_ys.T})
correct_prediction = tf.equal(tf.argmax(Y_linear, 0), tf.argmax(Y_LABEL, 0))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(sess.run(accuracy, feed_dict={X: mnist.test.images.T, Y_LABEL: mnist.test.labels.T}))
The result is 0.6638
I am really confused. where am i wrong ? I just wanna change the format.
Data is to be taken in the same format as given in the dataset. If you want to change format, take a tf.tranpose() for Input after assigning to the placeholder
I am a new with machine learning. I have a final project about prediction using two algorithms, Artificial Neural Network and Bayesian Neural Network. I want to compare the prediction result between ANN and BNN. I have finished the ANN program, but I have a problem with the BNN. I try a tutorial from this link: bayesian neural network tutorial. This is my ANN sample code to train and evaluate the model.
keep_prob = tf.placeholder("float", name="keep_prob")
x = tf.placeholder(tf.float32, [None, n_input], name="x")
y = tf.placeholder(tf.float32, name="y")
training_epochs = 5000
display_step = 1000
batch_size = 5
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=predictions, labels=y), name="cost_function")
optimizer = tf.train.AdamOptimizer(learning_rate=0.0001, name="Adam").minimize(cost)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in tqdm(range(training_epochs)):
avg_cost = 0.0
total_batch = int(len(x_train) / batch_size)
x_batches = np.array_split(x_train, total_batch)
y_batches = np.array_split(y_train, total_batch)
for i in range(total_batch):
batch_x, batch_y = x_batches[i], y_batches[i]
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y, keep_prob: 0.8})
avg_cost += c / total_batch
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(avg_cost))
print("Optimization Finished!")
correct_prediction = tf.equal(tf.argmax(predictions, 1), tf.argmax(y, 1), name="corr_pred")
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"), name="accuracy")
# print('Accuracy: ', sess.run(accuracy, feed_dict={x: x_test, y: y_test}))
print("Accuracy:", accuracy.eval({x: x_test, y: y_test, keep_prob: 1.0}))
and this is my BNN code:
# Importing required libraries
from math import floor
import edward as ed
import numpy as np
import pandas as pd
import tensorflow as tf
from edward.models import Normal, NormalWithSoftplusScale
from fancyimpute import KNN
from sklearn import preprocessing
# Read data
features_dummies_nan = pd.read_csv('csv/features_dummies_with_label.csv', sep=',')
# Function: impute missing value by KNN
def impute_missing_values_by_KNN():
home_data = features_dummies_nan[[col for col in features_dummies_nan.columns if 'hp' in col]]
away_data = features_dummies_nan[[col for col in features_dummies_nan.columns if 'ap' in col]]
label_data = features_dummies_nan[[col for col in features_dummies_nan.columns if 'label' in col]]
home_filled = pd.DataFrame(KNN(3).complete(home_data))
home_filled.columns = home_data.columns
home_filled.index = home_data.index
away_filled = pd.DataFrame(KNN(3).complete(away_data))
away_filled.columns = away_data.columns
away_filled.index = away_data.index
data_frame_out = pd.concat([home_filled, away_filled, label_data], axis=1)
return data_frame_out
features_dummies = impute_missing_values_by_KNN()
target = features_dummies.loc[:, 'label'].values
data = features_dummies.drop('label', axis=1)
data = data.values
perm = np.random.permutation(len(features_dummies))
data = data[perm]
target = target[perm]
train_size = 0.9
train_cnt = floor(features_dummies.shape[0] * train_size)
x_train = data[0:train_cnt] # data_train
y_train = target[0:train_cnt] # target_train
x_test = data[train_cnt:] # data_test
y_test = target[train_cnt:] # target_test
keep_prob = tf.placeholder("float", name="keep_prob")
n_input = data.shape[1] # D
n_classes = 3
n_hidden_1 = 100 # H0
n_hidden_2 = 100 # H1
n_hidden_3 = 100 # H2
def neural_network(X, W_0, W_1, W_2, W_out, b_0, b_1, b_2, b_out):
hidden1 = tf.nn.relu(tf.matmul(X, W_0) + b_0)
hidden2 = tf.nn.relu(tf.matmul(hidden1, W_1) + b_1)
hidden3 = tf.nn.relu(tf.matmul(hidden2, W_2) + b_2)
output = tf.matmul(hidden3, W_out) + b_out
return tf.reshape(output, [-1])
scaler = preprocessing.StandardScaler().fit(x_train)
data_train_scaled = scaler.transform(x_train)
data_test_scaled = scaler.transform(x_test)
W_0 = Normal(loc=tf.zeros([n_input, n_hidden_1]), scale=5.0 * tf.ones([n_input, n_hidden_1]))
W_1 = Normal(loc=tf.zeros([n_hidden_1, n_hidden_2]), scale=5.0 * tf.ones([n_hidden_1, n_hidden_2]))
W_2 = Normal(loc=tf.zeros([n_hidden_2, n_hidden_3]), scale=5.0 * tf.ones([n_hidden_2, n_hidden_3]))
W_out = Normal(loc=tf.zeros([n_hidden_3, 1]), scale=5.0 * tf.ones([n_hidden_3, 1]))
b_0 = Normal(loc=tf.zeros(n_hidden_1), scale=5.0 * tf.ones(n_hidden_1))
b_1 = Normal(loc=tf.zeros(n_hidden_2), scale=5.0 * tf.ones(n_hidden_2))
b_2 = Normal(loc=tf.zeros(n_hidden_3), scale=5.0 * tf.ones(n_hidden_3))
b_out = Normal(loc=tf.zeros(1), scale=5.0 * tf.ones(1))
qW_0 = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_input, n_hidden_1])),
scale=tf.Variable(tf.random_normal([n_input, n_hidden_1])))
qW_1 = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
scale=tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])))
qW_2 = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3])),
scale=tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3])))
qW_out = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_hidden_3, 1])),
scale=tf.Variable(tf.random_normal([n_hidden_3, 1])))
qb_0 = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_hidden_1])),
scale=tf.Variable(tf.random_normal([n_hidden_1])))
qb_1 = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_hidden_2])),
scale=tf.Variable(tf.random_normal([n_hidden_2])))
qb_2 = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_hidden_3])),
scale=tf.Variable(tf.random_normal([n_hidden_3])))
qb_out = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([1])),
scale=tf.Variable(tf.random_normal([1])))
sigma_y = 1.0
x = tf.placeholder(tf.float32, [None, n_input])
y = Normal(loc=neural_network(x, W_0, W_1, W_2, W_out, b_0, b_1, b_2, b_out), scale=sigma_y)
inference = ed.KLqp({W_0: qW_0, b_0: qb_0,
W_1: qW_1, b_1: qb_1,
W_2: qW_2, b_2: qb_2,
W_out: qW_out, b_out: qb_out}, data={x: x_train, y: y_train})
global_step = tf.Variable(0, trainable=False)
starter_learning_rate = 0.05
learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
1000, 0.3, staircase=True)
optimizer = tf.train.AdamOptimizer(learning_rate)
inference.run(n_iter=5000, optimizer=optimizer, global_step=global_step)
But, I want to compare two algorithms result. So, I want to make some variables will be same between ANN and BNN, for example sum of epoch. Then I want to adapt my ANN code above for this BNN code section.
sigma_y = 1.0
x = tf.placeholder(tf.float32, [None, n_input])
y = Normal(loc=neural_network(x, W_0, W_1, W_2, W_out, b_0, b_1, b_2, b_out), scale=sigma_y)
inference = ed.KLqp({W_0: qW_0, b_0: qb_0,
W_1: qW_1, b_1: qb_1,
W_2: qW_2, b_2: qb_2,
W_out: qW_out, b_out: qb_out}, data={x: x_train, y: y_train})
global_step = tf.Variable(0, trainable=False)
starter_learning_rate = 0.05
learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
1000, 0.3, staircase=True)
optimizer = tf.train.AdamOptimizer(learning_rate)
inference.run(n_iter=5000, optimizer=optimizer, global_step=global_step)
I have several things that I don't understand. There is y = tf.placeholder(tf.float32, name="y") in ANN but in BNN is y = Normal(loc=neural_network(x, W_0, W_1, W_2, W_out, b_0, b_1, b_2, b_out), scale=sigma_y). Then, there is scale in BNN but not in ANN. So, can I adapt my ANN train and test sample code to BNN sample code above? I want to make inference on BNN run like in sess.run() on ANN so I can count the BNN prediction accuracy result. Can I do that?
My specs:
Windows 10 64,
Python 3.6,
Tensorflow 1.0.1.
I've been trying to train and use neural network agains MNIST dataset:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data", one_hot=True)
tf.reset_default_graph()
n_nodes_hl1 = 500
n_nodes_hl2 = 500
n_nodes_hl3 = 500
n_classes = 10
batch_size = 100
x = tf.placeholder('float',[None,784])
y = tf.placeholder('float')
current_epoch = tf.Variable(1)
def neural_network_model(data):
hidden_1_layer = {'weights': tf.Variable(tf.random_normal([784,n_nodes_hl1])),
'biases': tf.Variable(tf.random_normal([n_nodes_hl1]))
}
hidden_2_layer = {'weights': tf.Variable(tf.random_normal([n_nodes_hl1,n_nodes_hl2])),
'biases': tf.Variable(tf.random_normal([n_nodes_hl2]))
}
hidden_3_layer = {'weights': tf.Variable(tf.random_normal([n_nodes_hl2,n_nodes_hl3])),
'biases': tf.Variable(tf.random_normal([n_nodes_hl3]))
}
output_layer = {'weights': tf.Variable(tf.random_normal([n_nodes_hl3,n_classes])),
'biases': tf.Variable(tf.random_normal([n_classes]))
}
l1 = tf.add(tf.matmul(data, hidden_1_layer['weights']), hidden_1_layer['biases'])
l1 = tf.nn.relu(l1)
l2 = tf.add(tf.matmul(l1, hidden_2_layer['weights']), hidden_2_layer['biases'])
l2 = tf.nn.relu(l2)
l3 = tf.add(tf.matmul(l2, hidden_3_layer['weights']), hidden_3_layer['biases'])
l3 = tf.nn.relu(l3)
output = tf.matmul(l3, output_layer['weights']) + output_layer['biases']
return output
saver = tf.train.Saver()
def train_neural_network(x):
prediction = neural_network_model(x)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction,labels=y))
optimizer = tf.train.AdamOptimizer().minimize(cost)
hm_epochs = 10
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(hm_epochs):
epoch_loss = 0
for _ in range(int(mnist.train.num_examples/batch_size)):
epoch_x,epoch_y = mnist.train.next_batch(batch_size)
_, c = sess.run([optimizer, cost], feed_dict = {x:epoch_x, y:epoch_y})
epoch_loss += c
correct = tf.equal(tf.arg_max(prediction,1), tf.arg_max(y,1))
accuracy = tf.reduce_mean(tf.cast(correct,'float'))
saver.save(sess, "/tmp/model.ckpt")
train_neural_network(x)
After that I passed one of MNIST images through trained model:
import cv2
import numpy as np
mn = cv2.imread('352.png')
mn = cv2.cvtColor(mn,cv2.COLOR_BGR2GRAY)
mn2 = np.array(list(mn.flatten()))
x = tf.placeholder('float')
y = tf.placeholder('float')
with tf.Session() as sess:
prediction = neural_network_model(x)
sess.run(tf.global_variables_initializer())
saver.restore(sess,"/tmp/model.ckpt")
result = (sess.run(tf.argmax(prediction.eval(feed_dict={x:[ mn2 ]}),1)))
print(prediction.eval(feed_dict={x:[mn2]}))
print(result)
Unfortunately, with a model accuracy of ca. 95% I keep getting wrong results - not only they are wrong but also not consistent among different runs
Let's say I pass image of number 8, and I get 9, 4, 3...
The model file model.ckpt.data-00000-of-00001 saved to hard drive is about 10 megabytes. When I try to restore model while doing the training for every loop (as suggested on pythonprogramming.net course) model seems not to update, because files are only 1KB.
What am I doing wrong here?
I'm trying to write my own mnist example which could use all the two gpu of one machine.
It is a simple multi-layer perceptron.
Here is my code. You can run it directly.
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
import tensorflow as tf
learning_rate = 0.001
training_steps = 100000
batch_size = 100
display_step = 100
n_hidden_1 = 256
n_hidden_2 = 256
n_input = 784
n_classes = 10
def _variable_on_cpu(name, shape, initializer):
with tf.device('/cpu:0'):
dtype = tf.float32
var = tf.get_variable(name, shape, initializer=initializer, dtype=dtype)
return var
def build_model():
def multilayer_perceptron(x, weights, biases):
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
layer_2 = tf.nn.relu(layer_2)
out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
return out_layer
with tf.variable_scope('aaa'):
weights = {
'h1': _variable_on_cpu('h1',[n_input, n_hidden_1],tf.constant_initializer(0.0)),
'h2': _variable_on_cpu('h2',[n_hidden_1, n_hidden_2],tf.constant_initializer(0.0)),
'out': _variable_on_cpu('out_w',[n_hidden_2, n_classes],tf.constant_initializer(0.0))
}
biases = {
'b1': _variable_on_cpu('b1',[n_hidden_1],tf.constant_initializer(0.0)),
'b2': _variable_on_cpu('b2',[n_hidden_2],tf.constant_initializer(0.0)),
'out': _variable_on_cpu('out_b',[n_classes],tf.constant_initializer(0.0))
}
pred = multilayer_perceptron(x, weights, biases)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
return cost
def average_gradients(tower_grads):
average_grads = []
for grad_and_vars in zip(*tower_grads):
grads = []
for g,_ in grad_and_vars:
expanded_g = tf.expand_dims(g, 0)
grads.append(expanded_g)
grad = tf.concat(axis=0, values=grads)
grad = tf.reduce_mean(grad, 0)
v = grad_and_vars[0][1]
grad_and_var = (grad, v)
average_grads.append(grad_and_var)
return average_grads
with tf.Graph().as_default(), tf.device('/cpu:0'):
x = tf.placeholder("float", [None, n_input])
y = tf.placeholder("float", [None, n_classes])
tower_grads = []
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
with tf.variable_scope(tf.get_variable_scope()):
for i in xrange(2):
with tf.device('/gpu:%d' % i):
cost = build_model()
tf.get_variable_scope().reuse_variables()
grads = optimizer.compute_gradients(cost)
tower_grads.append(grads)
grads = average_gradients(tower_grads)
apply_gradient_op = optimizer.apply_gradients(grads)
train_op = apply_gradient_op
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
for step in range(training_steps):
image_batch, label_batch = mnist.train.next_batch(batch_size)
_, cost_print = sess.run([train_op, cost],
{x:image_batch,
y:label_batch})
if step % display_step == 0:
print("step=%04d" % (step+1)+ " cost=" + str(cost_print))
print("Optimization Finished!")
sess.close()
The print info looks like:
step=0001 cost=2.30258
step=0101 cost=2.30246
step=0201 cost=2.30128
step=0301 cost=2.30376
step=0401 cost=2.29817
step=0501 cost=2.2992
step=0601 cost=2.3104
step=0701 cost=2.29995
step=0801 cost=2.29802
step=0901 cost=2.30524
step=1001 cost=2.29673
step=1101 cost=2.30016
step=1201 cost=2.31057
step=1301 cost=2.29815
step=1401 cost=2.29669
step=1501 cost=2.30345
step=1601 cost=2.29811
step=1701 cost=2.30867
step=1801 cost=2.30757
step=1901 cost=2.29716
step=2001 cost=2.30394
The loss doesn't decrease. I don't know how to fix it.
By the way, GPU-Util is about 26% and 26%. How to increase the GPU-Util?
The problem is that,
I should use tf.constant_initializer(0.1) for the weights instead of tf.constant_initializer(0)