I wrote cnn program with tensorflow, but it can not learn well.
The data set is cifar-10, and the task of classifying color images into 10 classes.
This is the code.
from __future__ import print_function
import tensorflow as tf
import os
import numpy as np
import cv2
import random
NUM_CLASSES = 10
IMG_SIZE = 32
STEPS = 5000
BATCH_SIZE=20
train_img_dirs = ["airplane","automobile","bird","cat","deer","dog","frog","horse","ship","truck"]
train_image = []
train_label = []
config = tf.ConfigProto(
gpu_options=tf.GPUOptions(
per_process_gpu_memory_fraction=0.1
)
)
for i, d in enumerate(train_img_dirs):
files = os.listdir('./' + d)
for f in files:
img = cv2.imread('./' + d + '/' + f)
img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
img = img.flatten().astype(np.float32)/255.0
train_image.append(img)
tmp = np.zeros(NUM_CLASSES)
tmp[i] = 1
train_label.append(tmp)
train_image = np.asarray(train_image)
train_label = np.asarray(train_label)
def weight_variable(shape,name):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial,name=name)
def bias_variable(shape,name):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial,name=name)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
# Input layer
x = tf.placeholder(tf.float32, [None, 32*32*3], name='x')
y_ = tf.placeholder(tf.float32, [None, 10], name='y_')
x_image = tf.reshape(x, [-1, 32, 32, 3])
# Convolutional layer 1
W_conv1 = weight_variable([5, 5, 3, 32],"W_conv1")
b_conv1 = bias_variable([32],"b_conv1")
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
# Convolutional layer 2
W_conv2 = weight_variable([5, 5, 32, 64],"W_conv2")
b_conv2 = bias_variable([64],"b_conv2")
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
# Convolutional layer 3
W_conv3 = weight_variable([5, 5, 64, 128],"W_conv3")
b_conv3 = bias_variable([128],"b_conv3")
h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3)
h_pool3 = max_pool_2x2(h_conv3)
# Fully connected layer 1
h_pool3_flat = tf.reshape(h_pool3, [-1, 4*4*128])
W_fc1 = weight_variable([4 * 4 * 128, 1024],"W_fc1")
b_fc1 = bias_variable([1024],"b_fc1")
h_fc1 = tf.nn.relu(tf.matmul(h_pool3_flat, W_fc1) + b_fc1)
# Dropout
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
# Fully connected layer 2 (Output layer)
W_fc2 = weight_variable([1024, 10],"W_fc2")
b_fc2 = bias_variable([10],"b_fc2")
y = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2, name='y')
# Evaluation functions
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')
# Training algorithm
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
# Training steps
with tf.Session(config=config) as sess:
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver({'W_conv1': W_conv1, 'b_conv1': b_conv1, 'W_conv2': W_conv2, 'b_conv2': b_conv2 ,'W_conv3': W_conv3, 'b_conv3': b_conv3})
max_steps = 1000
for i in range(STEPS):
random_seq = list(range(len(train_image)))
random.shuffle(random_seq)
for j in range(len(train_image)//BATCH_SIZE):
batch = BATCH_SIZE * j
train_image_batch = []
train_label_batch = []
for k in range(BATCH_SIZE):
train_image_batch.append(train_image[random_seq[batch + k]])
train_label_batch.append(train_label[random_seq[batch + k]])
train_step.run(feed_dict={x: train_image_batch, y_: train_label_batch, keep_prob: 0.5})
train_accuracy = accuracy.eval(feed_dict={
x:train_image_batch, y_: train_label_batch, keep_prob: 1.0})
print("step %d, training accuracy %g"%(i, train_accuracy))
saver.save(sess, "model.ckpt")
The training accuracy keeps figuring about 0.1 until the end of learning.
Is this a bug? Or is the structure of CNN bad?
The python version is 2.7, and the tensorflow version is 1.4.0.
Related
I created TFrecord file with mfccs of music segment and theirs labels. But I am running into a problem of tenor shape. I'm sure that the shape of mfcc is [256,12] before writing to TFrecord file. It seems that the requested shape is half of the tensor read from the file. I can't locate the mistake and don't know the reason. Could you please give me some advice? thanks
enter code here
def read_tfRecord(file_tfRecord):
queue = tf.train.string_input_producer([file_tfRecord])
reader = tf.TFRecordReader()
_,serialized_example = reader.read(queue)
features = tf.parse_single_example(serialized_example,features={'micsegment': tf.FixedLenFeature([], tf.string), 'label': tf.FixedLenFeature([], tf.int64)})
mic=tf.decode_raw(features['micsegment'],tf.float32)
mic=tf.reshape(mic,[256,12,1])
label=tf.cast(features['label'], tf.int64)
return mic,label
[mic,label]=read_tfRecord('D:/360MoveData/training.tfrecords')
[testmic,testlabel]=read_tfRecord('D:/360MoveData/test1.tfrecords')
sess = tf.InteractiveSession()
x=tf.placeholder(tf.float32, [256*12])
y_actual = tf.placeholder(tf.float32, [None, num_classes])
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool(x,ksize):
return tf.nn.max_pool(x, ksize,strides=[1, 1, 1, 1], padding='SAME')
x_mic=tf.reshape(x,[-1,256,12,1])
W_conv1 = weight_variable([3, 3, 1, 16])
b_conv1 = bias_variable([16])
h_conv1 = tf.nn.relu(conv2d(x_mic, W_conv1) + b_conv1) #第一个卷积层
h_pool1 = max_pool(h_conv1,ksize=[1,2,2,1])
W_conv2 = weight_variable([3, 3, 16, 32])
b_conv2 = bias_variable([32])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool(h_conv2,ksize=[1,2,3,1])
W_conv3 = weight_variable([3, 3, 32, 32])
b_conv3 = bias_variable([32])
h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3)
h_pool3 = max_pool(h_conv3,ksize=[1,2,2,1])
W_fc1 = weight_variable([32*1*32, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool3, [-1, 32*1*32])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
keep_prob = tf.placeholder("float")
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
y_predict=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2) #softmax
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_predict* tf.log(y_actual), reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(y_predict, y_actual)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
sess.run(tf.global_variables_initializer())
# start the threads used for reading files
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess,coord=coord)
# start training
nSteps=1000
for i in range(nSteps):
batch_xs, batch_ys=sess.run([mic_batches,label_batches])
# run the training step with feed of images
train_step.run(feed_dict={x: batch_xs, y_actual: batch_ys, keep_prob: 0.5})
if (i+1)%100==0: # then perform validation
# get a validation batch
vbatch_xs, vbatch_ys = sess.run([mictest_batches,labeltest_batches])
train_accuracy = accuracy.eval(feed_dict={x:vbatch_xs, y_actual: vbatch_ys, keep_prob: 1.0})
print("step %d, training accuracy %g"%(i+1, train_accuracy))
# finalise
coord.request_stop()
coord.join(threads)
sess.close()
and this is my code for compute mfccs and write Tfrecord file
enter code here
def load_file(example_list_file):
lines = np.genfromtxt(example_list_file,delimiter="*",dtype=[('col1','S200'), ('col2', 'i8')])
examples = []
labels = []
for example,label in lines:
examples.append(example)
labels.append(label)
return np.asarray(examples),np.asarray(labels),len(lines)
enter code here
_examples,_labels,examples_num = load_file(train_file)
filename = name + '.tfrecords'
writer= tf.python_io.TFRecordWriter(filename)
for i,[example,label] in enumerate(zip(_examples,_labels)):
micseg = compute_spectrograms(example)
micsegment=micseg.tostring()
example = tf.train.Example(features=tf.train.Features(feature={ 'micsegment':_bytes_feature(micsegment), 'label': _int64_feature(label)}))
writer.write(example.SerializeToString())
writer.close()
CODE:
image_row = 640
image_col = 480
num_labels = 17
num_channels = 3 # grayscale
import numpy as np
#Load data
train_dataset, train_labels = load_file.load_data()
test_dataset = scipy.misc.imread("1501005004.548261985.png")
test_labels = np.loadtxt("1501005004.493062654.txt", comments="#", delimiter=",", unpack=False)
batch_labels = train_labels
print('Training set', train_dataset.shape, train_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)
def reformat(dataset, labels):
dataset = dataset.reshape((-1, image_row, image_col, num_channels)).astype(np.float32)
#labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
labels = labels.reshape((-1,num_labels)).astype(np.float32)
return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)
def accuracy(labels,predictions):
return 100.0 * tf.reduce_sum(tf.pow(predictions - labels,2))
batch_size = 1
kernel_size = patch_size =5
depth = 16
num_hidden1 = 64
num_hidden2 = 32
graph = tf.Graph()
with graph.as_default():
#Input data
tf_train_dataset = tf.placeholder(tf.float32,shape=([batch_size, image_row, image_col, num_channels]))
tf_train_labels = tf.placeholder(tf.float32,shape=([batch_size, num_labels]))
tf_test_dataset = tf.constant(test_dataset)
# Variables.
layer1_weights = tf.Variable(tf.truncated_normal([patch_size, patch_size, num_channels, depth], stddev=0.1))
layer1_biases = tf.Variable(tf.zeros([depth]))
# dropout
keep_prob = tf.placeholder("float")
layer2_weights = tf.Variable(tf.truncated_normal([patch_size, patch_size, depth, depth], stddev=0.1))
layer2_biases = tf.Variable(tf.constant(1.0, shape=[depth]))
layer3_weights = tf.Variable(tf.truncated_normal([image_row // 4 * image_col // 4 * depth, num_hidden1], stddev=0.1))
layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden1]))
layer4_weights = tf.Variable(tf.truncated_normal([num_hidden1, num_hidden2], stddev=0.1))
layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden2]))
layer5_weights = tf.Variable(tf.truncated_normal([num_hidden2, num_labels], stddev=0.1))
layer5_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))
def model(data):
conv = tf.nn.conv2d(data, layer1_weights, [1, 1, 1, 1], padding='SAME')
hidden = tf.nn.relu(conv + layer1_biases)
# pooling
pool1 = tf.nn.max_pool(hidden, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],padding='SAME', name='pool1')
norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,name='norm1')
# layer2
conv = tf.nn.conv2d(norm1, layer2_weights, [1, 1, 1, 1], padding='SAME')
hidden = tf.nn.relu(conv + layer2_biases)
# pooling2
pool2 = tf.nn.max_pool(hidden, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],padding='SAME', name='pool1')
norm2 = tf.nn.lrn(pool2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,name='norm1')
# layer3
conv = tf.nn.conv2d(norm2, layer2_weights, [1, 1, 1, 1], padding='SAME')
hidden = tf.nn.relu(conv + layer2_biases)
shape = hidden.get_shape().as_list()
reshape = tf.reshape(hidden, [shape[0], shape[1] * shape[2] * shape[3]])
# RELU - 1e-9
hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases)
hidden = tf.matmul(hidden, layer4_weights) + layer4_biases
# # add a dropout
# hidden = tf.nn.dropout(hidden, keep_prob)
result = tf.matmul(hidden, layer5_weights) + layer5_biases
return result
logits = model(tf_train_dataset)
print ('AFTER LOGITS')
embed()
loss = tf.reduce_sum(tf.pow(logits-tf_train_labels,2))/(2*batch_size)
#loss = tf.reduce_sum(tf.pow(logits-batch_labels,2))/(2*batch_size)
global_step = tf.Variable(0, trainable = False)
start_learning_rate = 0.001
learning_rate = tf.train.exponential_decay(start_learning_rate, global_step, 100000, 0.96,staircase = True)
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
#Prediction
train_prediction = logits
test_prediction = tf_test_dataset
num_steps = 10000001
with tf.Session(graph=graph) as session:
tf.initialize_all_variables().run()
print('----------------INITIALIZED-----------------')
for step in range(num_steps):
print(step)
offset = (step * batch_size)% (train_labels.shape[0] - batch_size)
print('after offset')
embed()
batch_data = train_dataset[offset: (offset+batch_size), :,:,:]
batch_labels = train_labels[offset: (offset + batch_size),:]
feed_dict = {tf_train_dataset: batch_data, tf_train_labels: batch_labels, keep_prob:1.0}
_,l,prediction = session.run([optimizer, loss,train_prediction], feed_dict= feed_dict)
print('after _,l,prediction')
embed()
if(step % 50 ==0):
print("Minibatch loss %d: %f"%(step,l))
print('Minibatch accuracy:' % accuracy(prediction, batch_labels))
In the above code, I am getting lots of Inf values in my previous hidden layer whose output is pasted below:
In [93]: session.run(hidden)
Out[93]:
array([[ 9.99999972e-10, 9.99999972e-10, 9.99999972e-10,
inf, 9.99999972e-10, 5.50044295e+28,
9.99999972e-10, 9.99999972e-10, 3.21215463e+28,
9.99999972e-10, 1.24344986e+28, 9.99999972e-10,
9.99999972e-10, 2.52180816e+28, 9.99999972e-10,
9.99999972e-10, 9.99999972e-10, 9.99999972e-10,
1.41978562e+28, inf, 9.99999972e-10,
How do I avoid these Inf values. I am a beginner in Deep Learning and Tensorflow and hence I am not sure how to go about these.
I tried adding a constant along with the relu layer: hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases +1e-9) , but it doesn't help.
How can I go about it?
In general, this problem shows up with exploding gradients, you need to clip the gradients.
# Replace this lines with the following
>optimizer=tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
grads_vars = optimizer.compute_gradients(loss, tf.trainable_variables())
grads_vars = clip_grad_norms(grads_vars, max_norm=10)
train_op = optimizer.apply_gradients(grads_vars)
# finally
> _,l,prediction = session.run([optimizer, loss,train_prediction], feed_dict= feed_dict)
#replace with
_,l,prediction = session.run([train_op, loss,train_prediction], feed_dict= feed_dict)
# clip_grad_norms function link
https://github.com/n3011/tefla/blob/master/tefla/core/base.py#L253
From your code above, it looks like you haven't got any activation function between layer 4 and 5.
hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases)
hidden = tf.matmul(hidden, layer4_weights) + layer4_biases
result = tf.matmul(hidden, layer5_weights) + layer5_biases
return result
Depending on your initialization of weights and biases it can be a reason for over/under flowing weights.
I've adjusted the data loading of the TensorFlow MNIST example to use the original MNIST data. The original example gets > 0.80 accuracy after 100 epochs. My adjusted example (set use_original = False to use it) gets only about 0.09 - 0.10 accuracy (which is just random). Could you please explain why?
#!/usr/bin/env python
"""MNIST with Tensorflow."""
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
from struct import unpack
import gzip
from numpy import zeros, uint8
from sklearn.preprocessing import OneHotEncoder
use_original = True
def get_labeled_data(imagefile, labelfile):
"""
Read input-vector (image) and target class (label, 0-9).
Return
------
tuple of lists
"""
# Open the images with gzip in read binary mode
images = gzip.open(imagefile, 'rb')
labels = gzip.open(labelfile, 'rb')
# Read the binary data
# We have to get big endian unsigned int. So we need '>I'
# Get metadata for images
images.read(4) # skip the magic_number
number_of_images = images.read(4)
number_of_images = unpack('>I', number_of_images)[0]
rows = images.read(4)
rows = unpack('>I', rows)[0]
cols = images.read(4)
cols = unpack('>I', cols)[0]
# Get metadata for labels
labels.read(4) # skip the magic_number
N = labels.read(4)
N = unpack('>I', N)[0]
if number_of_images != N:
raise Exception('number of labels did not match the number of images')
# Get the data
x = zeros((N, rows * cols), dtype=uint8) # Initialize numpy array
y = zeros((N, 1), dtype=uint8) # Initialize numpy array
for i in range(N):
if i % 1000 == 0:
print("%s: %i" % (imagefile, i))
j = 0
for row in range(rows):
for col in range(cols):
tmp_pixel = images.read(1) # Just a single byte
tmp_pixel = unpack('>B', tmp_pixel)[0]
x[i][j] = tmp_pixel
j += 1
tmp_label = labels.read(1)
y[i] = unpack('>B', tmp_label)[0]
enc = OneHotEncoder()
enc.fit(y)
y = enc.transform(y).toarray()
return (x, y)
epochs = 20000
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
def eval_network(dataset, correct_prediction):
correct_sum = 0
total_test = 0
for i in range(dataset.labels.shape[0] / 1000):
feed_dict = {x: dataset.images[i * 1000:(i + 1) * 1000],
y_: dataset.labels[i * 1000:(i + 1) * 1000],
keep_prob: 1.0}
test_correct = correct_prediction.eval(feed_dict=feed_dict)
correct_sum += sum(test_correct)
total_test += len(test_correct)
return float(correct_sum) / total_test
def add_score(filename, mnist, scoring, epoch, other=''):
with open(filename, "a") as myfile:
train = eval_network(mnist.train, scoring)
test = eval_network(mnist.test, scoring)
myfile.write("%i;%0.6f;%0.6f;%s\n" % (epoch, train, test, other))
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
sess = tf.InteractiveSession()
x = tf.placeholder(tf.float32, shape=[None, 784])
y_ = tf.placeholder(tf.float32, shape=[None, 10])
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
sess.run(tf.initialize_all_variables())
y = tf.nn.softmax(tf.matmul(x, W) + b)
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
x_image = tf.reshape(x, [-1, 28, 28, 1])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv),
reduction_indices=[1]))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
sess.run(tf.initialize_all_variables())
# Data loading
if use_original:
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
else:
mnist = lambda: None
setattr(mnist, 'train', lambda: None)
setattr(mnist, 'test', lambda: None)
setattr(mnist.train, 'images', lambda: None)
setattr(mnist.train, 'labels', lambda: None)
setattr(mnist.test, 'images', lambda: None)
setattr(mnist.test, 'labels', lambda: None)
xs, ys = get_labeled_data('mnist/train-images-idx3-ubyte.gz',
'mnist/train-labels-idx1-ubyte.gz')
mnist.train.images = xs
mnist.train.labels = ys
xst, yst = get_labeled_data('mnist/t10k-images-idx3-ubyte.gz',
'mnist/t10k-labels-idx1-ubyte.gz')
mnist.test.images = xst
mnist.test.labels = yst
for i in range(epochs):
if use_original:
batch = mnist.train.next_batch(50) # This works
else:
# This doesnt work
batch = (xs[i * 50:(i + 1) * 50], ys[i * 50:(i + 1) * 50])
if i % 100 == 0:
add_score('accuracy.csv',
mnist,
correct_prediction,
i)
train_step.run(feed_dict={x: batch[0],
y_: batch[1],
keep_prob: 0.5})
add_score('accuracy.csv', mnist, correct_prediction, epochs)
I found the problem. The data needs to be loaded as numpy.float32 (and normalized by dividing it through 255).
I'm trying to build an image recognition CNN following tutorials from TensorFlow's website. I converted a directory of RGB 256x256 images (24 classes) to TFRecords using this solution so now I have two files - 'training' and 'validation'. I'm sure that all images are the same size, but when I try to train my network the only thing that I achive is this error:
Invalid argument: Shape mismatch in tuple component 0. Expected [65536], got [37191]
I tried to find a solution but I didn't succed - there was only a hint that this error can be caused by images with size different to the one declarated in the network but as I said - I'm sure the images are good.
Here is my code:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import os
NUM_CLASSES = 24
IMAGE_SIZE = 256
IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_float('learning_rate', 0.01, 'Initial learning rate.')
flags.DEFINE_integer('num_epochs', 2, 'Number of epochs to run trainer.')
flags.DEFINE_integer('batch_size', 100, 'Batch size.')
flags.DEFINE_string('train_dir', 'HERE_IS_MY_DATA_DIRECTORY', 'Directory with the training data.')
TRAIN_FILE = 'train'
VALIDATION_FILE = 'validation'
sess = tf.InteractiveSession()
def read_and_decode(filename_queue):
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
features={
'image/buffer': tf.FixedLenFeature([], tf.string),
'image/class/label': tf.FixedLenFeature([], tf.int64),
})
image = tf.decode_raw(features['image/buffer'], tf.uint8)
image.set_shape([IMAGE_PIXELS])
image = tf.cast(image, tf.float32)
image = tf.cast(image, tf.float32) * (1. / 255) - 0.5
label = tf.cast(features['image/class/label'], tf.int32)
return image, label
def input_pipeline(train, batch_size, num_epochs):
if not num_epochs: num_epochs = None
filename = os.path.join(FLAGS.train_dir,TRAIN_FILE if train else VALIDATION_FILE)
with tf.name_scope('input'):
filename_queue = tf.train.string_input_producer([filename], num_epochs=num_epochs)
image, label = read_and_decode(filename_queue)
min_after_dequeue = 10000
capacity = min_after_dequeue + 3 * batch_size
image_batch, label_batch = tf.train.shuffle_batch(
[image, label], batch_size=batch_size, capacity=capacity, min_after_dequeue=min_after_dequeue)
return image_batch, label_batch
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
#Variaveis
x, y_ = input_pipeline(train=True, batch_size=FLAGS.batch_size, num_epochs=FLAGS.num_epochs)
#Layer 1
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
x_image = tf.reshape(x, [-1, 256, 256, 1])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
#Layer 2
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
#layer3
W_conv3 = weight_variable([5, 5, 64, 128])
b_conv3 = bias_variable([128])
h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3)
h_pool3 = max_pool_2x2(h_conv3)
#layer4
W_conv4 = weight_variable([5, 5, 128, 256])
b_conv4 = bias_variable([256])
h_conv4 = tf.nn.relu(conv2d(h_pool3, W_conv4) + b_conv4)
h_pool4 =max_pool_2x2(h_conv4)
#Layer5
W_conv5 = weight_variable([5, 5, 256, 512])
b_conv5 = bias_variable([512])
h_conv5 = tf.nn.relu(conv2d(h_pool4, W_conv5) + b_conv5)
h_pool5 =max_pool_2x2(h_conv5)
#layer6
W_conv6 = weight_variable([5, 5, 512, 1024])
b_conv6 = bias_variable([1024])
h_conv6 = tf.nn.relu(conv2d(h_pool5, W_conv6) + b_conv6)
h_pool6 =max_pool_2x2(h_conv6)
#Densely Connected Layer
W_fc1 = weight_variable([16 * 16 * 64, 1024])
b_fc1 = bias_variable([1024])
h_pool6_flat = tf.reshape(h_pool6, [-1, 16 * 16 * 64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool6_flat, W_fc1) + b_fc1)
#Dropout - reduz overfitting
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
#Readout layer
W_fc2 = weight_variable([1024, 36])
b_fc2 = bias_variable([36])
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
#Train and evaluate
cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(y_conv, y_))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
sess.run(tf.initialize_all_variables())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
for i in range(100):
if i%10 == 0:
train_accuracy = accuracy.eval(feed_dict={keep_prob: 1.0})
print("step %d, training accuracy %g"%(i, train_accuracy))
train_step.run(feed_dict={keep_prob: 0.5})
x, y_ = input_pipeline(train=True, batch_size=FLAGS.batch_size)
print("test accuracy %g"%accuracy.eval(feed_dict={keep_prob: 1.0}))
coord.join(threads)
sess.close()
Is there something wrong in my code or maybe I get the error because the images are RGB (in all tutorials people are using grayscaled images)? If so, how do I handle RGB images correctly?
I will be grateful for any help.
EDIT:
I solved the problem by creating TFRecord files with this script, thanks for all the help.
I'm stuck on CNN model on Tensorflow.
My code as below.
Libraries
# -*- coding: utf-8 -*-
import tensorflow as tf
import time
import json
import numpy as np
import matplotlib.pyplot as plt
import random
import multiprocessing as mp
import glob
import os
Model
def inference(images_placeholder, keep_prob):
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
# convolution
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
# X2 pooling
def max_pool_2x128(x):
return tf.nn.max_pool(x, ksize=[1, 2, 1, 1],strides=[1, 2, 1, 1], padding='VALID')
# X4 pooling
def max_pool_4x128(x):
return tf.nn.max_pool(x, ksize=[1, 4, 1, 1],strides=[1, 4, 1, 1], padding='VALID')
x_image = tf.reshape(images_placeholder, [-1,599,1,128])
#1st conv
with tf.name_scope('conv1') as scope:
W_conv1 = weight_variable([4, 1, 128, 256])
b_conv1 = bias_variable([256])
print "image変形後のshape"
print tf.Tensor.get_shape(x_image)
print "conv1の形"
print tf.Tensor.get_shape(conv2d(x_image, W_conv1))
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
#1st pooling X4
with tf.name_scope('pool1') as scope:
h_pool1 = max_pool_4x128(h_conv1)
print "h_pool1の形"
print tf.Tensor.get_shape(h_pool1)
#2nd conv
with tf.name_scope('conv2') as scope:
W_conv2 = weight_variable([4, 1, 256, 256])
b_conv2 = bias_variable([256])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
#2nd pooling X2
with tf.name_scope('pool2') as scope:
h_pool2 = max_pool_2x128(h_conv2)
print "h_pool2の形"
print tf.Tensor.get_shape(h_pool2)
#3rd conv
with tf.name_scope('conv3') as scope:
W_conv3 = weight_variable([4, 1, 256, 512])
b_conv3 = bias_variable([512])
h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3)
#3rd pooling X2
with tf.name_scope('pool3') as scope:
h_pool3 = max_pool_2x128(h_conv3)
print "h_pool3の形"
print tf.Tensor.get_shape(h_pool3)
#flatten + 1st fully connected
with tf.name_scope('fc1') as scope:
W_fc1 = weight_variable([37 * 1 * 512, 2048])
b_fc1 = bias_variable([2048])
h_pool3_flat = tf.reshape(h_pool3, [-1, 37 * 1 * 512])
h_fc1 = tf.nn.relu(tf.matmul(h_pool3_flat, W_fc1) + b_fc1)
#ドロップ層の設定
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
#2nd fully connected
with tf.name_scope('fc2') as scope:
W_fc2 = weight_variable([2048, NUM_CLASSES])
b_fc2 = bias_variable([NUM_CLASSES])
#softmax output
with tf.name_scope('softmax') as scope:
y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
return y_conv
Loss
def loss(logits, labels):
# cross entropy
cross_entropy = -tf.reduce_sum(labels*tf.log(tf.clip_by_value(logits,1e-10,1.0)))
# TensorBoard
tf.scalar_summary("cross_entropy", cross_entropy)
return cross_entropy
Training
def training(loss, learning_rate):
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
return train_step
Accuracy
def accuracy(logits, labels):
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
tf.scalar_summary("accuracy", accuracy)
return accuracy
Main
if __name__ == '__main__':
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_string('train_dir', '/tmp/data', 'Directory to put the training data.')
flags.DEFINE_integer('max_steps', , 'Number of steps to run trainer.')
flags.DEFINE_integer('batch_size', 10, 'Batch size'
'Must divide evenly into the dataset sizes.')
flags.DEFINE_float('learning_rate', 1e-4, 'Initial learning rate.')
#num output
NUM_CLASSES = 5
#num frame
IMAGE_SIZE = 599
#tensor shape
IMAGE_PIXELS = IMAGE_SIZE*1*128
##################
#modify the data #
##################
#number of training data
train_num = 70
#loading data limit
data_limit = 100
flatten_data = []
flatten_label = []
# データの整形
filenames = glob.glob(os.path.join('/Users/kosukefukui/Qosmo/WASABEAT/song_features/*.json'))
filenames = filenames[0:data_limit]
print "----loading data---"
for file_path in filenames:
data = json.load(open(file_path))
data = np.array(data)
for_flat = np.array(data)
assert for_flat.flatten().shape == (IMAGE_PIXELS,)
flatten_data.append(for_flat.flatten().tolist())
# ラベルの整形
f2 = open("id_information.txt")
print "---loading labels----"
for line in f2:
line = line.rstrip()
l = line.split(",")
tmp = np.zeros(NUM_CLASSES)
tmp[int(l[4])] = 1
flatten_label.append(tmp)
flatten_label = flatten_label[0:data_limit]
print "データ数 %s" % len(flatten_data)
print "ラベルデータ数 %s" % len(flatten_label)
#train data
train_image = np.asarray(flatten_data[0:train_num], dtype=np.float32)
train_label = np.asarray(flatten_label[0:train_num],dtype=np.float32)
print "訓練データ数 %s" % len(train_image)
#test data
test_image = np.asarray(flatten_data[train_num:data_limit], dtype=np.float32)
test_label = np.asarray(flatten_label[train_num:data_limit],dtype=np.float32)
print "テストデータ数 %s" % len(test_image)
print "599×128 = "
print len(train_image[0])
f2.close()
if 1==1:
# Image Tensor
images_placeholder = tf.placeholder("float", shape=(None, IMAGE_PIXELS))
# Label Tensor
labels_placeholder = tf.placeholder("float", shape=(None, NUM_CLASSES))
# dropout Tensor
keep_prob = tf.placeholder("float")
# construct model
logits = inference(images_placeholder, keep_prob)
# calculate loss
loss_value = loss(logits, labels_placeholder)
# training
train_op = training(loss_value, FLAGS.learning_rate)
# accuracy
acc = accuracy(logits, labels_placeholder)
saver = tf.train.Saver()
sess = tf.Session()
sess.run(tf.initialize_all_variables())
# for TensorBoard
summary_op = tf.merge_all_summaries()
summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph_def)
# Training
for step in range(FLAGS.max_steps):
for i in range(len(train_image)/FLAGS.batch_size):
# train for batch_size
batch = FLAGS.batch_size*i
sess.run(train_op, feed_dict={
images_placeholder: train_image[batch:batch+FLAGS.batch_size],
labels_placeholder: train_label[batch:batch+FLAGS.batch_size],
keep_prob: 0.5})
# calculate accuracy at each step
train_accuracy = sess.run(acc, feed_dict={
images_placeholder: train_image,
labels_placeholder: train_label,
keep_prob: 1.0})
print "step %d, training accuracy %g"%(step, train_accuracy)
# add value for Tensorboard at each step
summary_str = sess.run(summary_op, feed_dict={
images_placeholder: train_image,
labels_placeholder: train_label,
keep_prob:1.0})
summary_writer.add_summary(summary_str, step)
# show accuracy for test data
print "test accuracy %g"%sess.run(acc, feed_dict={
images_placeholder: test_image,
labels_placeholder: test_label,
keep_prob: 1.0})
# save the last model
save_path = saver.save(sess, "model.ckpt")
However, I got the same training accuracy. How to fix this problem?
step 0, training accuracy 0.142857
step 1, training accuracy 0.142857
step 2, training accuracy 0.142857
step 3, training accuracy 0.142857
step 4, training accuracy 0.142857
step 5, training accuracy 0.142857
step 6, training accuracy 0.142857
step 7, training accuracy 0.142857
step 8, training accuracy 0.142857
step 9, training accuracy 0.142857
test accuracy 0.133333
I referred the following model and my tensorboard is as below.
Could it be that you are not minimizing the right tensor?
You are minimizing cross_entropy, but should be cross_entropy_mean (accuracy in your code).
Basically with the following logic:
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
logits, ground_truth_placeholder)
cross_entropy_mean = tf.reduce_mean(cross_entropy)
train_step = tf.train.GradientDescentOptimizer(FLAGS.learning_rate).minimize(
cross_entropy_mean)