Recently,I am studying the GAN network,I'm using it to generator a mnisit image,the environment in my computer is ubuntu16.04,tensorflow,python3.
The code can run without any error.But the result shows the network study nothing,through training,the output image is still noisy image.
Firstly I design a generator network:the input is 784 dimension's noisy data,through a hidden layer and rule it,generate a 784 dimension's image.
Then I design a discriminator network:the input is real image and fake image,through a hidden layer and rule it,the output is 1 dimension's logits.
Then I defined the generator_loss and discriminator_loss, then train generator and discriminator.It can run,but the result show the network study nothing, the loss can not convergence.
import tensorflow as tf
import numpy as np
import tensorflow.contrib.slim as slim
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/home/zyw/data/tensor_mnist-master/MNIST_data/",one_hot=True)
batch_size = 100
G_in = tf.placeholder(tf.float32,[None,784])
G_h1 = tf.layers.dense(G_in, 128)
G_h1 = tf.maximum(0.01 * G_h1, G_h1)
G_out = tf.tanh(tf.layers.dense(G_h1, 784))
real = tf.placeholder(tf.float32,[None,784])
Dl0 = tf.layers.dense(G_out, 128)
Dl0 = tf.maximum(0.01 * Dl0, Dl0)
p0 = tf.layers.dense(Dl0, 1)
Dl1 = tf.layers.dense(real, 128)
Dl1 = tf.maximum(0.01 * Dl1, Dl1)
p1 = tf.layers.dense(Dl1, 1)
G_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits =p0,labels=tf.ones_like(p0)*0.9))
D_real_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits =p1,labels=tf.ones_like(p1)*0.9))
D_fake_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits =p0,labels=tf.zeros_like(p0)))
D_total_loss = tf.add(D_fake_loss,D_real_loss)
G_train = tf.train.AdamOptimizer(0.01).minimize(G_loss)
D_train = tf.train.AdamOptimizer(0.01).minimize(D_total_loss)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for i in range(1000):
mnist_data,_ = mnist.train.next_batch(batch_size)
# noise_org = tf.random_normal([batch_size,784],stddev = 0.1,dtype = tf.float32)
noise_org = np.random.randn(batch_size, 784)
a,b,dloss= sess.run([D_real_loss,D_fake_loss,D_total_loss,G_train,D_train],feed_dict={G_in:noise_org,real:mnist_data})[:3]
if i%100==0:
print(a,b,dloss)
#test_generative_image
noise_org = np.random.randn(1, 784)
image = sess.run(G_out,feed_dict ={G_in:noise_org})
outimage = tf.reshape(image, [28,28])
plt.imshow(outimage.eval(),cmap='gray')
plt.show()
print('ok')
the result is:
0.80509 0.63548 1.44057
0.33512 0.20223 0.53735
0.332536 0.97737 1.30991
0.328048 0.814452 1.1425
0.326688 0.411907 0.738596
0.325864 0.570807 0.896671
0.325575 0.970406 1.29598
0.325421 1.02487 1.35029
0.325222 1.34089 1.66612
0.325217 0.747129 1.07235
I have added the modified code with the comments where I made the changes. Moreover, I have described about my changes below.
import tensorflow as tf
import numpy as np
import tensorflow.contrib.slim as slim
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/home/zyw/data/tensor_mnist-master/MNIST_data/",one_hot=True)
batch_size = 100
#define the generator function
def generator(input):
G_h1 = tf.layers.dense(input, 128)
# G_h1 = tf.maximum(0.01 * G_h1, G_h1)
G_out = tf.sigmoid(tf.layers.dense(G_h1, 784)) # sigmoid function added
return G_out
#Define the discrminator function
def discriminator(input):
Dl0 = tf.layers.dense(input, 128)
# Dl0 = tf.maximum(0.01 * Dl0, Dl0)
p0 = tf.layers.dense(Dl0, 1)
return p0
#Generator
with tf.variable_scope('G'):
G_in = tf.placeholder(tf.float32, [None, 784])
G_out = generator(G_in)
real = tf.placeholder(tf.float32, [None, 784])
#Discrimnator that takes the real data
with tf.variable_scope('D'):
D1 = discriminator(real)
#Discriminator that takes fake data
with tf.variable_scope('D', reuse=True): # need to use the same copy of Discrminator
D2 = discriminator(G_out)
G_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D2, labels=tf.ones_like(D2)))
D_real_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D1, labels=tf.ones_like(D1)))
D_fake_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D2, labels=tf.zeros_like(D2)))
D_total_loss = tf.add(D_fake_loss, D_real_loss)
vars = tf.trainable_variables() #all trainable variables
d_training_vars = [v for v in vars if v.name.startswith('D/')] # varibles associated with the discrminator
g_training_vars = [v for v in vars if v.name.startswith('G/')] # varibles associated with the generator
G_train = tf.train.AdamOptimizer(0.001).minimize(G_loss,var_list=g_training_vars) # only train the variables associated with the generator
D_train = tf.train.AdamOptimizer(0.001).minimize(D_total_loss,var_list=d_training_vars) # only train the variables associated with the discriminator
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for i in range(1000):
mnist_data, _ = mnist.train.next_batch(batch_size)
# noise_org = tf.random_normal([batch_size,784],stddev = 0.1,dtype = tf.float32)
noise_org = np.random.randn(batch_size, 784)
a, b, dloss = sess.run([D_real_loss, D_fake_loss, D_total_loss, G_train, D_train],feed_dict={G_in: noise_org, real: mnist_data})[:3]
if i % 100 == 0:
print(a, b, dloss)
# test_generative_image
noise_org = np.random.randn(1, 784)
image = sess.run(G_out, feed_dict={G_in: noise_org})
outimage = tf.reshape(image, [28, 28])
plt.imshow(outimage.eval(), cmap='gray')
plt.show()
print('ok')
Few points you should note when implementing a GAN,
Need to use the same copies of the discriminator (i.e share same
weights) when implementing the discriminator loss (in your case Dl0
and Dl1 should share same paraments).
Generator activation function should be sigmoid not tanh
since the output of the generator should only be varying between 0
and 1. (since its a image)
When training the discriminator, you should only train the variables that associated with the discriminator. Likewise, when training the generator you only should train the variables that associated with the generator.
Sometimes it is important to make sure that the discriminator is
more powerful than the generator, as otherwise, it would not have
sufficient capacity to learn to be able to distinguish accurately
between generated and real samples.
These are only the basic things of GANs that you should note. However, there are many other aspects that you should consider when developing a GAN. You can get a good basic idea of GANs by reading following two articles.
http://blog.aylien.com/introduction-generative-adversarial-networks-code-tensorflow/
http://blog.evjang.com/2016/06/generative-adversarial-nets-in.html
Hope this helps.
Related
Trying to use Derived class of Tensorflow FIFOQueue. I override the enqueue function. It takes in the images and enqueues the output from the final dense layer, in the queue.
Now I dequeue the output tensor and try to calculate Cost function and minimize it using Adam Optimiser.
On calculating cost and minimizing it inside the enqueue function itself, my code works fine. But as soon as I shift the loss_op (i.e my cost) outside the Derived class, I get the error: "No gradients provided for any variable, check your graph for ops that do not support gradients"
Import
from tensorflow.python.ops.data_flow_ops import FIFOQueue
import tensorflow as tf
from tensorflow.python.framework import dtypes as _dtypes
from tensorflow.python.framework import ops
from tensorflow.python.ops import gen_data_flow_ops
Read the data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
n_input = 784 # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)
Y = mnist.train.labels
X = mnist.train.images
Derived Queue
class MyQueue(FIFOQueue):
def enqueue(self, x,Y,name=None):
#Reshape
x = tf.reshape(x, shape=[-1, 28, 28, 1])
# 1st conv_2d layer
conv1_mp = tf.layers.conv2d(x, 32, 5, activation=tf.nn.relu,name = 'Q1_c1')
# 1st max pool layer
conv1 = tf.layers.max_pooling2d(conv1_mp, 2, 2,name='Q1_mp1')
# 2nd conv_2d layer
conv2 = tf.layers.conv2d(conv1, 64, 3, activation=tf.nn.relu,name = 'Q1_c2')
# 2nd max pool layer
conv2_mp = tf.layers.max_pooling2d(conv2, 2, 2,name='Q1_mp2')
#Flatten
flat = tf.contrib.layers.flatten(conv2_mp)
#Dense 1
dense_1 = tf.layers.dense(tf.reshape(flat,[-1,1600]), 1024,name = 'Q2_D1' )
#Dropout = 0.8
drop = tf.layers.dropout(dense_1, rate=0.8, training=True,name='Q2_Dp')
#Output class = 10
out = tf.layers.dense(drop, n_classes,name = 'Q2_Op')
#update vals to put "out" in the queue
vals = out
# Rest of the enqueue operation which has not been changed
with ops.name_scope(name, "%s_enqueue" % self._name,
self._scope_vals(vals)) as scope:
vals = self._check_enqueue_dtypes(vals)
# NOTE(mrry): Not using a shape function because
# we need access to the `QueueBase` object.
for val, shape in zip(vals, self._shapes):
val.get_shape().assert_is_compatible_with(shape)
if self._queue_ref.dtype == _dtypes.resource:
return gen_data_flow_ops.queue_enqueue_v2(
self._queue_ref, vals, name=scope)
else:
return gen_data_flow_ops.queue_enqueue(
self._queue_ref, vals, name=scope)
Main
q_pred = MyQueue( capacity=1, dtypes=tf.float32 )
enqueue_op = q_pred.enqueue(X,Y)
data_pred = q_pred.dequeue()
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
sess.run(enqueue_op)
out = data_pred
#Calculating Cost
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
logits=out, labels=Y),name = 'Q2_loss')
# Adam optimiser
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
#Write in the graph
writer = tf.summary.FileWriter("logs\MyDerivedQueue", sess.graph)
####### ERROR LINE ###################
# Minimising the cost.
train_op = optimizer.minimize(cost)
correct_pred = tf.equal(tf.argmax(out, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
Using multiple hit and trial methods. I came to the conclusion that this won't work as backpropagation isn't in our control. While using multi GPU, every GPU will give it's feedforward and now while back propagating, we won't get to know which weights/parameters should be updated.
I just started using the GPU version of TensorFlow hoping that it would speed up the training of my feed-forward neural networks. I am able to train on my GPU (GTX1080ti), but unfortunately it is not notably faster than doing the same training on my CPU (i7-8700K) the current way I’ve implemented it. During training, the GPU appears to barely be utilized at all, which makes me suspect that the bottleneck in my implementation is how the data is copied from the host to the device using feed_dict.
I’ve heard that TensorFlow has something called the “tf.data” pipeline which is supposed to make it easier and faster to feed data to GPUs etc. However I have not been able to find any simple examples where this concept is implemented into multilayer perceptron training as a replacement for feed_dict.
Is anyone aware of such an example and can point me to it? Preferably as simple as possible since I’m new to TensorFlow in general. Or is there something else I should change in my current implementation to make it more efficient? I’m pasting the code I have here:
import tensorflow as tf
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
tf.reset_default_graph()
import time
# Function for iris dataset.
def get_iris_data():
iris = datasets.load_iris()
data = iris["data"]
target = iris["target"]
# Convert to one-hot vectors
num_labels = len(np.unique(target))
all_Y = np.eye(num_labels)[target]
return train_test_split(data, all_Y, test_size=0.33, random_state=89)
# Function which initializes tensorflow weights & biases for feed-forward NN.
def InitWeights(LayerSizes):
with tf.device('/gpu:0'):
# Make tf placeholders for network inputs and outputs.
X = tf.placeholder( shape = (None,LayerSizes[0]),
dtype = tf.float32,
name ='InputData')
y = tf.placeholder( shape = (None,LayerSizes[-1]),
dtype = tf.float32,
name ='OutputData')
# Initialize weights and biases.
W = {}; b = {};
for ii in range(len(LayerSizes)-1):
layername = f'layer%s' % ii
with tf.variable_scope(layername):
ny = LayerSizes[ii]
nx = LayerSizes[ii+1]
# Weights (initialized with xavier initializatiion).
W['Weights_'+layername] = tf.get_variable(
name = 'Weights_'+layername,
shape = (ny, nx),
initializer = tf.contrib.layers.xavier_initializer(),
dtype = tf.float32
)
# Bias (initialized with xavier initializatiion).
b['Bias_'+layername] = tf.get_variable(
name = 'Bias_'+layername,
shape = (nx),
initializer = tf.contrib.layers.xavier_initializer(),
dtype = tf.float32
)
return W, b, X, y
# Function for forward propagation of NN.
def FeedForward(X, W, b):
with tf.device('/gpu:0'):
# Initialize 'a' of first layer to the placeholder of the network input.
a = X
# Loop all layers of the network.
for ii in range(len(W)):
# Use name of each layer as index.
layername = f'layer%s' % ii
## Weighted sum: z = input*W + b
z = tf.add(tf.matmul(a, W['Weights_'+layername], name = 'WeightedSum_z_'+layername), b['Bias_'+layername])
## Passed through actication fcn: a = h(z)
if ii == len(W)-1:
a = z
else:
a = tf.nn.relu(z, name = 'activation_a_'+layername)
return a
if __name__ == "__main__":
# Import data
train_X, test_X, train_y, test_y = get_iris_data()
# Define network size [ninputs-by-256-by-outputs]
LayerSizes = [4, 256, 3]
# Initialize weights and biases.
W, b, X, y = InitWeights(LayerSizes)
# Define loss function to optimize.
yhat = FeedForward(X, W, b)
loss = tf.reduce_sum(tf.square(y - yhat),reduction_indices=[0])
# Define optimizer to use when minimizing loss function.
all_variables = tf.trainable_variables()
optimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.0001)
train_op = optimizer.minimize(loss, var_list = all_variables)
# Start tf session and initialize variables.
sess = tf.Session()
sess.run(tf.global_variables_initializer())
# Train 10000 minibatches and time how long it takes.
t0 = time.time()
for i in range(10000):
ObservationsToUse = np.random.choice(len(train_X), 32)
X_minibatch = train_X[ObservationsToUse,:]
y_minibatch = train_y[ObservationsToUse,:]
sess.run(train_op, feed_dict={X : X_minibatch, y : y_minibatch})
t1 = time.time()
print('Training took %0.2f seconds' %(t1-t0))
sess.close()
The speed might be low because:
You are creating placeholders. Using numpy, we insert the data in the
placeholders and thereby they are converted to tensors of the graph.
By using tf.data.Dataset, you can create a direct pipeline which makes the data directly flow into the graph without the need of placeholders. They are fast, scalable and have a number of functions to play around with.
with np.load("/var/data/training_data.npy") as data:
features = data["features"]
labels = data["labels"]
# Assume that each row of `features` corresponds to the same row as `labels`.
assert features.shape[0] == labels.shape[0]
dataset = tf.data.Dataset.from_tensor_slices((features, labels))
Some useful functions :
dataset = dataset.shuffle(buffer_size=10000)
dataset = dataset.batch(32) # Creating batches
dataset = dataset.repeat(num_epochs) # repeat the dataset 'N' times
iterator = dataset.make_one_shot_iterator() # Create a iterator to retrieve batches of data
X, Y = iterator.get_next()
Here, 32 is the batch size.
In your case,
dataset = tf.data.Dataset.from_tensor_slices((data, targets))
Hence, there is no need of placeholders. Directly run,
session.run( train_op ) # no feed_dict!!
i'm stuck working on a Tensorflow Convolutional Neural Network for a university project and i hope somebody can help me.
it's supposed to output a picture for a picture input. left is input, right is output. both are in .jpeg format.
input and output
The weights look like this. left image shows the weights before learning, right is after a few epochs and it does not change at all with further training.
The net does not seem to learn anything useful and i have a feeling i forgot something basic.
the accuracy peeks around 5% when learning
weights
here is what it looks when i save the input image x
i dont know if i make a mistake loading or saving the image
And this is what the output y of the net looks like
i based the code on the tensorflow mnist tutorial.
here is my code that i have shortened to make it more readable:
import tensorflow as tf
from PIL import Image
import numpy as np
def weight_variable(dim,stddev=0.35):
init = tf.random_normal(dim, stddev=stddev)
return tf.Variable(init)
def bias_variable(dim,val=0.1):
init = tf.constant(val, shape=dim)
return tf.Variable(init)
def conv2d(x,W):
return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding = 'SAME')
def max_pool2x2(x):
return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding = 'SAME')
def output_pics(pic): # for weights
#1 color (dimension) array cast to uint8 and output as jpeg to file
def output_pics_color(pic):
#3 colors (dimensions) array cast to uint8 and output as jpeg to file
def show_pic(pic):
#3 colors (dimensions) array cast to uint8 and shown in window
filesX = [...] # filenames of inputs for training
filesY = [...] # filenames of outputsfor training
test_filesX = [...]# filenames of inputs for testing
test_filesY = [...]# filenames of outputs for testing
px_size = 128 # size of images 128x128 (resized)
filename_queueX = tf.train.string_input_producer(filesX)
filename_queueY = tf.train.string_input_producer(filesY)
filename_testX = tf.train.string_input_producer(test_filesY)
filename_testY = tf.train.string_input_producer(test_filesY)
image_reader = tf.WholeFileReader()
img_name, img_dataX = image_reader.read(filename_queueX)
imageX = tf.image.decode_jpeg(img_dataX)
imageX = tf.image.resize_images(imageX, [px_size,px_size])
imageX.set_shape((px_size,px_size,3))
imageX=tf.cast(imageX, tf.float32)
...
same for imageY, test_imageX, test_imageY
trainX = []
trainY = []
testX = []
testY = []
j=1
with tf.name_scope('model'):
x=tf.placeholder(tf.float32, [None, px_size,px_size,3])
prob = tf.placeholder(tf.float32)
init_op = tf.global_variables_initializer()
# load images into lists
with tf.Session() as sess:
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
for i in range(1,65):
trainX.append(imageX.eval())
trainY.append(imageY.eval())
for i in range(1, 10):
testX.append(test_imageX.eval())
testY.append(test_imageY.eval())
coord.request_stop()
coord.join(threads)
# layer 1
x_img = tf.reshape(x,[-1,px_size,px_size, 3])
W1 = weight_variable([20,20,3,3])
b1 = bias_variable([3])
y1 = tf.nn.softmax(conv2d(x_img,W1)+b1)
# layer 2
W2 = weight_variable([30,30,3,3])
b2 = bias_variable([3])
y2=tf.nn.softmax(conv2d(y1, W2)+b2)
# layer 3
W3 = weight_variable([40,40,3,3])
b3 = bias_variable([3])
y3=tf.nn.softmax(conv2d(y2, W3)+b3)
y = y3
with tf.name_scope('train'):
y_ =tf.placeholder(tf.float32, [None, px_size,px_size,3])
cross_entropy = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=y_, logits=y))
opt = tf.train.MomentumOptimizer(learning_rate=0.5, momentum=0.1).minimize(cross_entropy)
with tf.name_scope('eval'):
correct = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
nEpochs = 1000
batchSize = 10
res = 0
with tf.Session() as sess:
init = tf.global_variables_initializer()
sess.run(init)
trAccs = []
for i in range(nEpochs):
if i%100 == 0 :
train_accuracy = sess.run(accuracy, feed_dict={x:trainX, y_:trainY, prob: 1.0})
print(train_accuracy)
output_pics(W1)#output weights of layer 1 to file
output_pics_color(x)#save input image
output_pics_color(y)#save net output
sess.run(opt, feed_dict={x:trainX, y_:trainY, prob: 0.5})
This is an Image generation problem
The model you selected is a very bad model for Image generation tasks
Normal CNNs are used for image recognition and object detection tasks
The tutorial on MNIST is image classification problem and not image generation problem
It is very important to select an appropriate model type for a particular problem
Clearly with this model there is no chance of achieving the output that you have mentioned
I do not event understand that how are you even calculating the accuracy because this is unsupervised learning problem
You have used softmax after every layer which is really a bad idea.. Tensorflow mnist tutorial does not even has this code
Softmax is only used in the last layer
In the hidden layers leaky relu or simple relu should be used
I would suggest you to look for a more appropriate deep-learning model
Specifically combination of Variational Auto-Encoder Generative Adversarial Networks or simple Generative Adversarial Networks
I'm trying to build a softmax regression model for CIFAR classification. At first when I tried to pass in my images and labels into the feed dictionary, I got an error that said that feed dictionaries do not accept Tensors. I then converted them into numpy arrays using .eval() but the program hangs at the .eval() line and does not continue any further. How can I pass this data into the feed_dict?
CIFARIMAGELOADING.PY
import tensorflow as tf
import os
import tensorflow.models.image.cifar10 as cf
IMAGE_SIZE = 24
BATCH_SIZE = 128
def loadimagesandlabels(size):
# Load the images from the CIFAR data directory
FLAGS = tf.app.flags.FLAGS
data_dir = os.path.join(FLAGS.data_dir, 'cifar-10-batches-bin')
filenames = [os.path.join(data_dir, 'data_batch_%d.bin' % i) for i in xrange(1, 6)]
filename_queue = tf.train.string_input_producer(filenames)
read_input = cf.cifar10_input.read_cifar10(filename_queue)
# Reshape and crop the image
height = IMAGE_SIZE
width = IMAGE_SIZE
reshaped_image = tf.cast(read_input.uint8image, tf.float32)
cropped_image = tf.random_crop(reshaped_image, [height, width, 3])
# Generate a batch of images and labels by building up a queue of examples
print('Filling queue with CIFAR images')
num_preprocess_threads = 16
min_fraction_of_examples_in_queue = 0.4
min_queue_examples = int(BATCH_SIZE*min_fraction_of_examples_in_queue)
images, label_batch = tf.train.batch([cropped_image,read_input.label],batch_size=BATCH_SIZE, num_threads=num_preprocess_threads, capacity=min_queue_examples+3*BATCH_SIZE)
print(images)
print(label_batch)
return images, tf.reshape(label_batch, [BATCH_SIZE])
CIFAR.PY
#Set up placeholder vectors for image and labels
x = tf.placeholder(tf.float32, shape = [None, 1728])
y_ = tf.placeholder(tf.float32, shape = [None,10])
W = tf.Variable(tf.zeros([1728,10]))
b = tf.Variable(tf.zeros([10]))
#Implement regression model. Multiply input images x by weight matrix W, add the bias b
#Compute the softmax probabilities that are assigned to each class
y = tf.nn.softmax(tf.matmul(x,W) + b)
#Define cross entropy
#tf.reduce sum sums across all classes and tf.reduce_mean takes the average over these sums
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_*tf.log(y), reduction_indices = [1]))
#Train the model
#Each training iteration we load 128 training examples. We then run the train_step operation
#using feed_dict to replace the placeholder tensors x and y_ with the training examples
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
#Open up a Session
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)
for i in range(1000) :
images, labels = CIFARImageLoading.loadimagesandlabels(size=BATCH_SIZE)
unrolled_images = tf.reshape(images, (1728, BATCH_SIZE))
#convert labels to their one_hot representations
# should produce [[1,0,0,...],[0,1,0...],...]
one_hot_labels = tf.one_hot(indices= labels, depth=NUM_CLASSES, on_value=1.0, off_value= 0.0, axis=-1)
print(unrolled_images)
print(one_hot_labels)
images_numpy, labels_numpy = unrolled_images.eval(session=sess), one_hot_labels.eval(session=sess)
sess.run(train_step, feed_dict = {x: images_numpy, y_:labels_numpy})
#Evaluate the model
#.equal returns a tensor of booleans, we want to cast these as floats and then take their mean
#to get percent correctness (accuracy)
print("evaluating")
test_images, test_labels = CIFARImageLoading.loadimagesandlabels(TEST_SIZE)
test_images_unrolled = tf.reshape(test_images, (1728, TEST_SIZE))
test_images_one_hot = tf.one_hot(indices= test_labels, depth=NUM_CLASSES, on_value=1.0, off_value= 0.0, axis=-1)
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(accuracy.eval(feed_dict = {x: unrolled_images.eval(), y_ : test_images_one_hot.eval()}))
Theres a couple of things that you not are understanding really well. Throughout your graph you will work with Tensors. You define Tensors by either using tf.placeholder and feeding them in the session.run(feed_dict{}) or with tf.Variable and initializing it with session.run(tf.initialize_all_variables()). You must feed your input this way, and it should be numpy arrays in the same as shape as you expect in the placeholders. Here's a simple example:
images = tf.placeholder(type, [1728, BATCH_SIZE])
labels = tf.placeholder(type, [size])
'''
Build your network here so you have the variable: Output
'''
images_feed, labels_feed = CIFARImageLoading.loadimagesandlabels(size=BATCH_SIZE)
# here you can see your output
print sess.run(Output, feed_dict = {x: images_feed, y_:labels_feed})
You do not feed tf.functions with numpy arrays, you always feed them with Tensors. And the feed_dict is always fed with numpy arrays. The thing is: you will never have to convert tensors to numpy arrays for the input, that does not make sense. Your input must be numpy arrays, if it's a list, you can use np.asarray(list), if it's a tensor, you are doing this wrong.
I do not know what CIFARImageLoading.loadimagesandlabels returns to you, but I imagine it's not a Tensor, it's probably a numpy array already, so just get rid of this .eval().
I am relatively new to machine-learning and currently have almost no experiencing in developing it.
So my Question is: after training and evaluating the cifar10 dataset from the tensorflow tutorial I was wondering how could one test it with sample images?
I could train and evaluate the Imagenet tutorial from the caffe machine-learning framework and it was relatively easy to use the trained model on custom applications using the python API.
Any help would be very appreciated!
This isn't 100% the answer to the question, but it's a similar way of solving it, based on a MNIST NN training example suggested in the comments to the question.
Based on the TensorFlow begginer MNIST tutorial, and thanks to this tutorial, this is a way of training and using your Neural Network with custom data.
Please note that similar should be done for tutorials such as the CIFAR10, as #Yaroslav Bulatov mentioned in the comments.
import input_data
import datetime
import numpy as np
import tensorflow as tf
import cv2
from matplotlib import pyplot as plt
import matplotlib.image as mpimg
from random import randint
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
x = tf.placeholder("float", [None, 784])
W = tf.Variable(tf.zeros([784,10]))
b = tf.Variable(tf.zeros([10]))
y = tf.nn.softmax(tf.matmul(x,W) + b)
y_ = tf.placeholder("float", [None,10])
cross_entropy = -tf.reduce_sum(y_*tf.log(y))
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)
#Train our model
iter = 1000
for i in range(iter):
batch_xs, batch_ys = mnist.train.next_batch(100)
sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
#Evaluationg our model:
correct_prediction=tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy=tf.reduce_mean(tf.cast(correct_prediction,"float"))
print "Accuracy: ", sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels})
#1: Using our model to classify a random MNIST image from the original test set:
num = randint(0, mnist.test.images.shape[0])
img = mnist.test.images[num]
classification = sess.run(tf.argmax(y, 1), feed_dict={x: [img]})
'''
#Uncomment this part if you want to plot the classified image.
plt.imshow(img.reshape(28, 28), cmap=plt.cm.binary)
plt.show()
'''
print 'Neural Network predicted', classification[0]
print 'Real label is:', np.argmax(mnist.test.labels[num])
#2: Using our model to classify MNIST digit from a custom image:
# create an an array where we can store 1 picture
images = np.zeros((1,784))
# and the correct values
correct_vals = np.zeros((1,10))
# read the image
gray = cv2.imread("my_digit.png", 0 ) #0=cv2.CV_LOAD_IMAGE_GRAYSCALE #must be .png!
# rescale it
gray = cv2.resize(255-gray, (28, 28))
# save the processed images
cv2.imwrite("my_grayscale_digit.png", gray)
"""
all images in the training set have an range from 0-1
and not from 0-255 so we divide our flatten images
(a one dimensional vector with our 784 pixels)
to use the same 0-1 based range
"""
flatten = gray.flatten() / 255.0
"""
we need to store the flatten image and generate
the correct_vals array
correct_val for a digit (9) would be
[0,0,0,0,0,0,0,0,0,1]
"""
images[0] = flatten
my_classification = sess.run(tf.argmax(y, 1), feed_dict={x: [images[0]]})
"""
we want to run the prediction and the accuracy function
using our generated arrays (images and correct_vals)
"""
print 'Neural Network predicted', my_classification[0], "for your digit"
For further image conditioning (digits should be completely dark in a white background) and better NN training (accuracy>91%) please check the Advanced MNIST tutorial from TensorFlow or the 2nd tutorial i've mentioned.
The below example is not for the mnist tutorial, but a simple XOR example. Note the train() and test() methods. All that we declare & keep globally are the weights, biases, and session. In the test method we redefine the shape of the input and reuse the same weights & biases (and session) that we refined in training.
import tensorflow as tf
#parameters for the net
w1 = tf.Variable(tf.random_uniform(shape=[2,2], minval=-1, maxval=1, name='weights1'))
w2 = tf.Variable(tf.random_uniform(shape=[2,1], minval=-1, maxval=1, name='weights2'))
#biases
b1 = tf.Variable(tf.zeros([2]), name='bias1')
b2 = tf.Variable(tf.zeros([1]), name='bias2')
#tensorflow session
sess = tf.Session()
def train():
#placeholders for the traning inputs (4 inputs with 2 features each) and outputs (4 outputs which have a value of 0 or 1)
x = tf.placeholder(tf.float32, [4, 2], name='x-inputs')
y = tf.placeholder(tf.float32, [4, 1], name='y-inputs')
#set up the model calculations
temp = tf.sigmoid(tf.matmul(x, w1) + b1)
output = tf.sigmoid(tf.matmul(temp, w2) + b2)
#cost function is avg error over training samples
cost = tf.reduce_mean(((y * tf.log(output)) + ((1 - y) * tf.log(1.0 - output))) * -1)
#training step is gradient descent
train_step = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(cost)
#declare training data
training_x = [[0,1], [0,0], [1,0], [1,1]]
training_y = [[1], [0], [1], [0]]
#init session
init = tf.initialize_all_variables()
sess.run(init)
#training
for i in range(100000):
sess.run(train_step, feed_dict={x:training_x, y:training_y})
if i % 1000 == 0:
print (i, sess.run(cost, feed_dict={x:training_x, y:training_y}))
print '\ntraining done\n'
def test(inputs):
#redefine the shape of the input to a single unit with 2 features
xtest = tf.placeholder(tf.float32, [1, 2], name='x-inputs')
#redefine the model in terms of that new input shape
temp = tf.sigmoid(tf.matmul(xtest, w1) + b1)
output = tf.sigmoid(tf.matmul(temp, w2) + b2)
print (inputs, sess.run(output, feed_dict={xtest:[inputs]})[0, 0] >= 0.5)
train()
test([0,1])
test([0,0])
test([1,1])
test([1,0])
I recommend taking a look at the basic MNIST tutorial on the TensorFlow website. It looks like you define some function that generates the type of output that you want, and then run your session, passing it this evaluation function (correct_prediction below), and a dictionary containing whatever arguments you require (x and y_ below).
If you have defined and trained some network that takes an input x, generates a response y based on your inputs, and you know your expected responses for your testing set y_, you may be able to print out every response to your testing set with something like:
correct_prediction = tf.equal(y, y_) % Check whether your prediction is correct
print(sess.run(correct_prediction, feed_dict={x: test_images, y_: test_labels}))
This is just a modification of what is done in the tutorial, where instead of trying to print each response, they determine the percent of correct responses. Also note that the tutorial uses one-hot vectors for the prediction y and actual value y_, so in order to return the associated numeral, they have to find which index of these vectors are equal to one with tf.argmax(y, 1).
Edit
In general, if you define something in your graph, you can output it later when you run your graph. Say you define something that determines the result of the softmax function on your output logits as:
graph = tf.Graph()
with graph.as_default():
...
prediction = tf.nn.softmax(logits)
...
then you can output this at run time with:
with tf.Session(graph=graph) as sess:
...
feed_dict = { ... } # define your feed dictionary
pred = sess.run([prediction], feed_dict=feed_dict)
# do stuff with your prediction vector