I'm stuck on CNN model on Tensorflow.
My code as below.
Libraries
# -*- coding: utf-8 -*-
import tensorflow as tf
import time
import json
import numpy as np
import matplotlib.pyplot as plt
import random
import multiprocessing as mp
import glob
import os
Model
def inference(images_placeholder, keep_prob):
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
# convolution
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
# X2 pooling
def max_pool_2x128(x):
return tf.nn.max_pool(x, ksize=[1, 2, 1, 1],strides=[1, 2, 1, 1], padding='VALID')
# X4 pooling
def max_pool_4x128(x):
return tf.nn.max_pool(x, ksize=[1, 4, 1, 1],strides=[1, 4, 1, 1], padding='VALID')
x_image = tf.reshape(images_placeholder, [-1,599,1,128])
#1st conv
with tf.name_scope('conv1') as scope:
W_conv1 = weight_variable([4, 1, 128, 256])
b_conv1 = bias_variable([256])
print "image変形後のshape"
print tf.Tensor.get_shape(x_image)
print "conv1の形"
print tf.Tensor.get_shape(conv2d(x_image, W_conv1))
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
#1st pooling X4
with tf.name_scope('pool1') as scope:
h_pool1 = max_pool_4x128(h_conv1)
print "h_pool1の形"
print tf.Tensor.get_shape(h_pool1)
#2nd conv
with tf.name_scope('conv2') as scope:
W_conv2 = weight_variable([4, 1, 256, 256])
b_conv2 = bias_variable([256])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
#2nd pooling X2
with tf.name_scope('pool2') as scope:
h_pool2 = max_pool_2x128(h_conv2)
print "h_pool2の形"
print tf.Tensor.get_shape(h_pool2)
#3rd conv
with tf.name_scope('conv3') as scope:
W_conv3 = weight_variable([4, 1, 256, 512])
b_conv3 = bias_variable([512])
h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3)
#3rd pooling X2
with tf.name_scope('pool3') as scope:
h_pool3 = max_pool_2x128(h_conv3)
print "h_pool3の形"
print tf.Tensor.get_shape(h_pool3)
#flatten + 1st fully connected
with tf.name_scope('fc1') as scope:
W_fc1 = weight_variable([37 * 1 * 512, 2048])
b_fc1 = bias_variable([2048])
h_pool3_flat = tf.reshape(h_pool3, [-1, 37 * 1 * 512])
h_fc1 = tf.nn.relu(tf.matmul(h_pool3_flat, W_fc1) + b_fc1)
#ドロップ層の設定
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
#2nd fully connected
with tf.name_scope('fc2') as scope:
W_fc2 = weight_variable([2048, NUM_CLASSES])
b_fc2 = bias_variable([NUM_CLASSES])
#softmax output
with tf.name_scope('softmax') as scope:
y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
return y_conv
Loss
def loss(logits, labels):
# cross entropy
cross_entropy = -tf.reduce_sum(labels*tf.log(tf.clip_by_value(logits,1e-10,1.0)))
# TensorBoard
tf.scalar_summary("cross_entropy", cross_entropy)
return cross_entropy
Training
def training(loss, learning_rate):
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
return train_step
Accuracy
def accuracy(logits, labels):
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
tf.scalar_summary("accuracy", accuracy)
return accuracy
Main
if __name__ == '__main__':
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_string('train_dir', '/tmp/data', 'Directory to put the training data.')
flags.DEFINE_integer('max_steps', , 'Number of steps to run trainer.')
flags.DEFINE_integer('batch_size', 10, 'Batch size'
'Must divide evenly into the dataset sizes.')
flags.DEFINE_float('learning_rate', 1e-4, 'Initial learning rate.')
#num output
NUM_CLASSES = 5
#num frame
IMAGE_SIZE = 599
#tensor shape
IMAGE_PIXELS = IMAGE_SIZE*1*128
##################
#modify the data #
##################
#number of training data
train_num = 70
#loading data limit
data_limit = 100
flatten_data = []
flatten_label = []
# データの整形
filenames = glob.glob(os.path.join('/Users/kosukefukui/Qosmo/WASABEAT/song_features/*.json'))
filenames = filenames[0:data_limit]
print "----loading data---"
for file_path in filenames:
data = json.load(open(file_path))
data = np.array(data)
for_flat = np.array(data)
assert for_flat.flatten().shape == (IMAGE_PIXELS,)
flatten_data.append(for_flat.flatten().tolist())
# ラベルの整形
f2 = open("id_information.txt")
print "---loading labels----"
for line in f2:
line = line.rstrip()
l = line.split(",")
tmp = np.zeros(NUM_CLASSES)
tmp[int(l[4])] = 1
flatten_label.append(tmp)
flatten_label = flatten_label[0:data_limit]
print "データ数 %s" % len(flatten_data)
print "ラベルデータ数 %s" % len(flatten_label)
#train data
train_image = np.asarray(flatten_data[0:train_num], dtype=np.float32)
train_label = np.asarray(flatten_label[0:train_num],dtype=np.float32)
print "訓練データ数 %s" % len(train_image)
#test data
test_image = np.asarray(flatten_data[train_num:data_limit], dtype=np.float32)
test_label = np.asarray(flatten_label[train_num:data_limit],dtype=np.float32)
print "テストデータ数 %s" % len(test_image)
print "599×128 = "
print len(train_image[0])
f2.close()
if 1==1:
# Image Tensor
images_placeholder = tf.placeholder("float", shape=(None, IMAGE_PIXELS))
# Label Tensor
labels_placeholder = tf.placeholder("float", shape=(None, NUM_CLASSES))
# dropout Tensor
keep_prob = tf.placeholder("float")
# construct model
logits = inference(images_placeholder, keep_prob)
# calculate loss
loss_value = loss(logits, labels_placeholder)
# training
train_op = training(loss_value, FLAGS.learning_rate)
# accuracy
acc = accuracy(logits, labels_placeholder)
saver = tf.train.Saver()
sess = tf.Session()
sess.run(tf.initialize_all_variables())
# for TensorBoard
summary_op = tf.merge_all_summaries()
summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph_def)
# Training
for step in range(FLAGS.max_steps):
for i in range(len(train_image)/FLAGS.batch_size):
# train for batch_size
batch = FLAGS.batch_size*i
sess.run(train_op, feed_dict={
images_placeholder: train_image[batch:batch+FLAGS.batch_size],
labels_placeholder: train_label[batch:batch+FLAGS.batch_size],
keep_prob: 0.5})
# calculate accuracy at each step
train_accuracy = sess.run(acc, feed_dict={
images_placeholder: train_image,
labels_placeholder: train_label,
keep_prob: 1.0})
print "step %d, training accuracy %g"%(step, train_accuracy)
# add value for Tensorboard at each step
summary_str = sess.run(summary_op, feed_dict={
images_placeholder: train_image,
labels_placeholder: train_label,
keep_prob:1.0})
summary_writer.add_summary(summary_str, step)
# show accuracy for test data
print "test accuracy %g"%sess.run(acc, feed_dict={
images_placeholder: test_image,
labels_placeholder: test_label,
keep_prob: 1.0})
# save the last model
save_path = saver.save(sess, "model.ckpt")
However, I got the same training accuracy. How to fix this problem?
step 0, training accuracy 0.142857
step 1, training accuracy 0.142857
step 2, training accuracy 0.142857
step 3, training accuracy 0.142857
step 4, training accuracy 0.142857
step 5, training accuracy 0.142857
step 6, training accuracy 0.142857
step 7, training accuracy 0.142857
step 8, training accuracy 0.142857
step 9, training accuracy 0.142857
test accuracy 0.133333
I referred the following model and my tensorboard is as below.
Could it be that you are not minimizing the right tensor?
You are minimizing cross_entropy, but should be cross_entropy_mean (accuracy in your code).
Basically with the following logic:
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
logits, ground_truth_placeholder)
cross_entropy_mean = tf.reduce_mean(cross_entropy)
train_step = tf.train.GradientDescentOptimizer(FLAGS.learning_rate).minimize(
cross_entropy_mean)
Related
Working on cifar-10 dataset to build CNN and evaluate loss and accuracy. What I am trying to do is split the dataset into training and test data using keras and the train the model.
But on the very last step, it is giving me dimension error and nothing I could do to fix it. Please help!
Here is the code:
import numpy as np
import pickle
import tensorflow as tf
import os
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import sklearn
path ='cifar-10-batches-py'
def load_cfar10_batch(path):
with open(path + '/data_batch_1', mode='rb') as file:
batch = pickle.load(file, encoding='latin1')
features = batch['data']
labels = batch['labels']
return features, labels
x = features.reshape((len(features), 3, 32, 32)).transpose(0, 2, 3, 1)
x.shape
y = labels
def one_hot_encode(y):
encoded = np.zeros((len(y), 10))
for index, val in enumerate(y):
encoded[index][val] = 1
return encoded
def normalize(x):
x_norm = x/255
return x_norm
from sklearn import preprocessing
scaler = preprocessing.StandardScaler()
scaled_df = scaler.fit_transform(features)
scaled_df = scaled_df.reshape(10000,3,32,32).transpose(0,2,3,1)
plt.imshow(scaled_df[9999])
def _preprocess_and_save(normalize_and_standardize, one_hot_encode, features, labels, filename):
features = normalize(x)
labels = one_hot_encode(y)
pickle.dump((features, labels), open(filename, 'wb'))
features, labels = load_cfar10_batch(path)
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(features, labels, test_size=0.2)
def preprocess_and_save_data(path, normalize, one_hot_encode):
#check where the code for _preprocess_and_save is
_preprocess_and_save(normalize, one_hot_encode, np.array(x_test), np.array(y_test), 'preprocess_test.p')
_preprocess_and_save(normalize, one_hot_encode, np.array(x_train), np.array(y_train), 'preprocess_training.p')
preprocess_and_save_data(path, normalize, one_hot_encode)
x_test, y_test = pickle.load(open('preprocess_test.p', mode='rb'))
y_train, y_train = pickle.load(open('preprocess_training.p', mode='rb'))
def tf_reset():
try:
sess.close()
except:
pass
tf.reset_default_graph()
return tf.Session()
sess = tf_reset()
x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3), name='input_x')
y = tf.placeholder(tf.float32, shape=(None, 10), name='output_y')
keep_prob = tf.placeholder(tf.float32, name='keep_prob')
def conv_net(x, keep_prob):
#x = tf.reshape(x,[-1,32,32,3])
conv1_filter = tf.Variable(tf.truncated_normal(shape=[3, 3, 3, 64], mean=0, stddev=0.08))
conv2_filter = tf.Variable(tf.truncated_normal(shape=[3, 3, 64, 128], mean=0, stddev=0.08))
conv3_filter = tf.Variable(tf.truncated_normal(shape=[5, 5, 128, 256], mean=0, stddev=0.08))
conv4_filter = tf.Variable(tf.truncated_normal(shape=[5, 5, 256, 512], mean=0, stddev=0.08))
#Layer1
conv1 = tf.nn.conv2d(x, conv1_filter, strides=[1,1,1,1], padding='SAME')
conv1 = tf.nn.relu(conv1)
conv1_pool = tf.nn.max_pool(conv1, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
conv1_bn = tf.layers.batch_normalization(conv1_pool)
#Layer2
conv2 = tf.nn.conv2d(conv1_bn, conv2_filter, strides=[1,1,1,1], padding='SAME')
conv2 = tf.nn.relu(conv2)
conv2_pool = tf.nn.max_pool(conv2, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
conv2_bn = tf.layers.batch_normalization(conv2_pool)
#Layer 3
conv3 = tf.nn.conv2d(conv2_bn, conv3_filter, strides=[1,1,1,1], padding='SAME')
conv3 = tf.nn.relu(conv3)
conv3_pool = tf.nn.max_pool(conv3, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
conv3_bn = tf.layers.batch_normalization(conv3_pool)
#Layer 4
conv4 = tf.nn.conv2d(conv3_bn, conv4_filter, strides=[1,1,1,1], padding='SAME')
conv4 = tf.nn.relu(conv4)
conv4_pool = tf.nn.max_pool(conv4, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
conv4_bn = tf.layers.batch_normalization(conv4_pool)
flat = tf.contrib.layers.flatten(conv4_bn)
full1 = tf.contrib.layers.fully_connected(inputs=flat, num_outputs=128, activation_fn=tf.nn.relu)
full1 = tf.nn.dropout(full1, keep_prob)
full1 = tf.layers.batch_normalization(full1)
full2 = tf.contrib.layers.fully_connected(inputs=full1, num_outputs=256, activation_fn=tf.nn.relu)
full2 = tf.nn.dropout(full2, keep_prob)
full2 = tf.layers.batch_normalization(full2)
full3 = tf.contrib.layers.fully_connected(inputs=full2, num_outputs=512, activation_fn=tf.nn.relu)
full3 = tf.nn.dropout(full3, keep_prob)
full3 = tf.layers.batch_normalization(full3)
full4 = tf.contrib.layers.fully_connected(inputs=full3, num_outputs=1024, activation_fn=tf.nn.relu)
full4 = tf.nn.dropout(full4, keep_prob)
full4 = tf.layers.batch_normalization(full4)
out = tf.contrib.layers.fully_connected(inputs=full3, num_outputs=10, activation_fn=None)
return out
iterations = 101
batch_size = 128
keep_probability = 0.7
learning_rate = 0.001
logits = conv_net(x, keep_prob)
# Loss and Optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Accuracy
correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name='accuracy')
def train_neural_network(session, optimizer, keep_probability, feature_batch, label_batch):
session.run(optimizer,
feed_dict={
x: feature_batch,
y: label_batch,
keep_prob: keep_probability
})
def print_stats(sess, feature_batch, label_batch, cost, accuracy):
loss = sess.run(cost,
feed_dict={
x: feature_batch,
y: label_batch,
keep_prob: 1.
})
valid_acc = sess.run(accuracy,
feed_dict={
x: x_train,
y: y_train,
keep_prob: 1.
})
print('Loss: {:>10.4f} Validation Accuracy: {:.6f}'.format(loss, valid_acc))
def batch_features_labels(features, labels, batch_size):
"""
Split features and labels
"""
for start in range(0, len(features), batch_size):
end = min(start + batch_size, len(features))
yield features[start:end], labels[start:end]
def load_preprocess_training(batch_size):
"""
Load the Preprocessed Training data and return them in batches of <batch_size> or less
"""
features = features.reshape((len(features), 3, 32, 32)).transpose(0, 2, 3, 1)
filename = 'preprocess_training.p'
features, labels = pickle.load(open(filename, mode='rb'))
# Return the training data in batches of size <batch_size> or less
return batch_features_labels(features, labels, batch_size)
print('Training...')
with tf.Session() as sess:
# Initializing the variables
sess.run(tf.global_variables_initializer())
# Training cycle
for i in range(iterations):
for batch_features, batch_labels in load_preprocess_training(batch_size):
train_neural_network(sess, optimizer, keep_probability, batch_features, batch_labels)
if i % 10 == 0:
print('Iterations {}, CIFAR-10 Batch {}: '.format(i, 1), end='')
print_stats(sess, batch_features, batch_labels, cost, accuracy)
ValueError: Cannot feed value of shape (8000, 3072) for Tensor 'input_x:0', which has shape '(?, 32, 32, 3)'
the problem is located in here:
def batch_features_labels(features, labels, batch_size):
"""
Split features and labels
"""
for start in range(0, len(features), batch_size):
end = min(start + batch_size, len(features))
yield features[start:end], labels[start:end]
You should reshape the item in features from 3072 to [32,32,3]
Good luck
I created TFrecord file with mfccs of music segment and theirs labels. But I am running into a problem of tenor shape. I'm sure that the shape of mfcc is [256,12] before writing to TFrecord file. It seems that the requested shape is half of the tensor read from the file. I can't locate the mistake and don't know the reason. Could you please give me some advice? thanks
enter code here
def read_tfRecord(file_tfRecord):
queue = tf.train.string_input_producer([file_tfRecord])
reader = tf.TFRecordReader()
_,serialized_example = reader.read(queue)
features = tf.parse_single_example(serialized_example,features={'micsegment': tf.FixedLenFeature([], tf.string), 'label': tf.FixedLenFeature([], tf.int64)})
mic=tf.decode_raw(features['micsegment'],tf.float32)
mic=tf.reshape(mic,[256,12,1])
label=tf.cast(features['label'], tf.int64)
return mic,label
[mic,label]=read_tfRecord('D:/360MoveData/training.tfrecords')
[testmic,testlabel]=read_tfRecord('D:/360MoveData/test1.tfrecords')
sess = tf.InteractiveSession()
x=tf.placeholder(tf.float32, [256*12])
y_actual = tf.placeholder(tf.float32, [None, num_classes])
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool(x,ksize):
return tf.nn.max_pool(x, ksize,strides=[1, 1, 1, 1], padding='SAME')
x_mic=tf.reshape(x,[-1,256,12,1])
W_conv1 = weight_variable([3, 3, 1, 16])
b_conv1 = bias_variable([16])
h_conv1 = tf.nn.relu(conv2d(x_mic, W_conv1) + b_conv1) #第一个卷积层
h_pool1 = max_pool(h_conv1,ksize=[1,2,2,1])
W_conv2 = weight_variable([3, 3, 16, 32])
b_conv2 = bias_variable([32])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool(h_conv2,ksize=[1,2,3,1])
W_conv3 = weight_variable([3, 3, 32, 32])
b_conv3 = bias_variable([32])
h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3)
h_pool3 = max_pool(h_conv3,ksize=[1,2,2,1])
W_fc1 = weight_variable([32*1*32, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool3, [-1, 32*1*32])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
keep_prob = tf.placeholder("float")
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
y_predict=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2) #softmax
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_predict* tf.log(y_actual), reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(y_predict, y_actual)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
sess.run(tf.global_variables_initializer())
# start the threads used for reading files
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess,coord=coord)
# start training
nSteps=1000
for i in range(nSteps):
batch_xs, batch_ys=sess.run([mic_batches,label_batches])
# run the training step with feed of images
train_step.run(feed_dict={x: batch_xs, y_actual: batch_ys, keep_prob: 0.5})
if (i+1)%100==0: # then perform validation
# get a validation batch
vbatch_xs, vbatch_ys = sess.run([mictest_batches,labeltest_batches])
train_accuracy = accuracy.eval(feed_dict={x:vbatch_xs, y_actual: vbatch_ys, keep_prob: 1.0})
print("step %d, training accuracy %g"%(i+1, train_accuracy))
# finalise
coord.request_stop()
coord.join(threads)
sess.close()
and this is my code for compute mfccs and write Tfrecord file
enter code here
def load_file(example_list_file):
lines = np.genfromtxt(example_list_file,delimiter="*",dtype=[('col1','S200'), ('col2', 'i8')])
examples = []
labels = []
for example,label in lines:
examples.append(example)
labels.append(label)
return np.asarray(examples),np.asarray(labels),len(lines)
enter code here
_examples,_labels,examples_num = load_file(train_file)
filename = name + '.tfrecords'
writer= tf.python_io.TFRecordWriter(filename)
for i,[example,label] in enumerate(zip(_examples,_labels)):
micseg = compute_spectrograms(example)
micsegment=micseg.tostring()
example = tf.train.Example(features=tf.train.Features(feature={ 'micsegment':_bytes_feature(micsegment), 'label': _int64_feature(label)}))
writer.write(example.SerializeToString())
writer.close()
I am trying to make a character recognition classifier for bangla alphabets. The images are size of 50x50. There are in total of 50 classes. Using the below CNN model to train but I am encountering this error: "ValueError: Validation size should be between 0 and 0. Received: 5000."
How do I resolve this?
MODEL
# Python 3.6.0
# tensorflow 1.1.0
import os
import os.path as path
import tensorflow as tf
from tensorflow.python.tools import freeze_graph
from tensorflow.python.tools import optimize_for_inference_lib
from tensorflow.examples.tutorials.mnist import input_data
MODEL_NAME = 'mnist_convnet'
NUM_STEPS = 3000
BATCH_SIZE = 16
def model_input(input_node_name, keep_prob_node_name):
x = tf.placeholder(tf.float32, shape=[None, 50*50], name=input_node_name)
keep_prob = tf.placeholder(tf.float32, name=keep_prob_node_name)
y_ = tf.placeholder(tf.float32, shape=[None, 50])
return x, keep_prob, y_
def build_model(x, keep_prob, y_, output_node_name):
x_image = tf.reshape(x, [-1, 50, 50, 1])
# 50*50*1
conv1 = tf.layers.conv2d(x_image, 64, 3, 1, 'same', activation=tf.nn.relu)
# 50*50*64
pool1 = tf.layers.max_pooling2d(conv1, 2, 2, 'same')
# 14*14*64
conv2 = tf.layers.conv2d(pool1, 128, 3, 1, 'same', activation=tf.nn.relu)
# 14*14*128
pool2 = tf.layers.max_pooling2d(conv2, 2, 2, 'same')
# 7*7*128
conv3 = tf.layers.conv2d(pool2, 256, 3, 1, 'same', activation=tf.nn.relu)
# 7*7*256
pool3 = tf.layers.max_pooling2d(conv3, 2, 2, 'same')
# 4*4*256
flatten = tf.reshape(pool3, [-1, 4*4*256])
fc = tf.layers.dense(flatten, 1024, activation=tf.nn.relu)
dropout = tf.nn.dropout(fc, keep_prob)
logits = tf.layers.dense(dropout, 50)
outputs = tf.nn.softmax(logits, name=output_node_name)
# loss
loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=logits))
# train step
train_step = tf.train.AdamOptimizer(1e-4).minimize(loss)
# accuracy
correct_prediction = tf.equal(tf.argmax(outputs, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.summary.scalar("loss", loss)
tf.summary.scalar("accuracy", accuracy)
merged_summary_op = tf.summary.merge_all()
return train_step, loss, accuracy, merged_summary_op
def train(x, keep_prob, y_, train_step, loss, accuracy,
merged_summary_op, saver):
print("training start...")
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
init_op = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init_op)
tf.train.write_graph(sess.graph_def, 'out',
MODEL_NAME + '.pbtxt', True)
# op to write logs to Tensorboard
summary_writer = tf.summary.FileWriter('logs/',
graph=tf.get_default_graph())
for step in range(NUM_STEPS):
batch = mnist.train.next_batch(BATCH_SIZE)
if step % 100 == 0:
train_accuracy = accuracy.eval(feed_dict={
x: batch[0], y_: batch[1], keep_prob: 1.0})
print('step %d, training accuracy %f' % (step, train_accuracy))
_, summary = sess.run([train_step, merged_summary_op],
feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
summary_writer.add_summary(summary, step)
saver.save(sess, 'out/' + MODEL_NAME + '.chkp')
test_accuracy = accuracy.eval(feed_dict={x: mnist.test.images,
y_: mnist.test.labels,
keep_prob: 1.0})
print('test accuracy %g' % test_accuracy)
print("training finished!")
def export_model(input_node_names, output_node_name):
freeze_graph.freeze_graph('out/' + MODEL_NAME + '.pbtxt', None, False,
'out/' + MODEL_NAME + '.chkp', output_node_name, "save/restore_all",
"save/Const:0", 'out/frozen_' + MODEL_NAME + '.pb', True, "")
input_graph_def = tf.GraphDef()
with tf.gfile.Open('out/frozen_' + MODEL_NAME + '.pb', "rb") as f:
input_graph_def.ParseFromString(f.read())
output_graph_def = optimize_for_inference_lib.optimize_for_inference(
input_graph_def, input_node_names, [output_node_name],
tf.float32.as_datatype_enum)
with tf.gfile.FastGFile('out/opt_' + MODEL_NAME + '.pb', "wb") as f:
f.write(output_graph_def.SerializeToString())
print("graph saved!")
def main():
if not path.exists('out'):
os.mkdir('out')
input_node_name = 'input'
keep_prob_node_name = 'keep_prob'
output_node_name = 'output'
x, keep_prob, y_ = model_input(input_node_name, keep_prob_node_name)
train_step, loss, accuracy, merged_summary_op = build_model(x, keep_prob, y_, output_node_name)
saver = tf.train.Saver()
train(x, keep_prob, y_, train_step, loss, accuracy, merged_summary_op, saver)
export_model([input_node_name, keep_prob_node_name], output_node_name)
if __name__ == '__main__':
main()
ERROR
ValueError Traceback (most recent call last)
<ipython-input-2-2015e0ea466d> in <module>()
136
137 if __name__ == '__main__':
--> 138 main()
<ipython-input-2-2015e0ea466d> in main()
131 saver = tf.train.Saver()
132
--> 133 train(x, keep_prob, y_, train_step, loss, accuracy, merged_summary_op, saver)
134
135 export_model([input_node_name, keep_prob_node_name], output_node_name)
<ipython-input-2-2015e0ea466d> in train(x, keep_prob, y_, train_step, loss, accuracy, merged_summary_op, saver)
67 print("training start...")
68
---> 69 mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
70
71 init_op = tf.global_variables_initializer()
/anaconda3/envs/nlpTFnltk/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py in read_data_sets(train_dir, fake_data, one_hot, dtype, reshape, validation_size)
247 raise ValueError(
248 'Validation size should be between 0 and {}. Received: {}.'
--> 249 .format(len(train_images), validation_size))
250
251 validation_images = train_images[:validation_size]
ValueError: Validation size should be between 0 and 0. Received: 5000.
You're using the MNIST tutorial code, which is calling read_data_sets from here; note that validation_size of 5000 comes from that function's default parameters. It's expecting to get data from the following files:
TRAIN_IMAGES = 'train-images-idx3-ubyte.gz'
TRAIN_LABELS = 'train-labels-idx1-ubyte.gz'
TEST_IMAGES = 't10k-images-idx3-ubyte.gz'
TEST_LABELS = 't10k-labels-idx1-ubyte.gz'
Normally it would try to download those files if it doesn't find them, but the fact that you're getting a validation_size of 0 suggests it isn't doing so. This wouldn't help you anyway, since you don't want to use the MNIST data.
Even if you rename your train and test files to match the above filenames, your code won't work because the MNIST code is also calling extract_labels, which has a default parameter num_classes=10 while you want this to be 50. Your best bet is probably to get rid of the MNIST import completely and read about how to set up an input pipeline; it's not difficult compared to the stuff you've done already.
I wrote cnn program with tensorflow, but it can not learn well.
The data set is cifar-10, and the task of classifying color images into 10 classes.
This is the code.
from __future__ import print_function
import tensorflow as tf
import os
import numpy as np
import cv2
import random
NUM_CLASSES = 10
IMG_SIZE = 32
STEPS = 5000
BATCH_SIZE=20
train_img_dirs = ["airplane","automobile","bird","cat","deer","dog","frog","horse","ship","truck"]
train_image = []
train_label = []
config = tf.ConfigProto(
gpu_options=tf.GPUOptions(
per_process_gpu_memory_fraction=0.1
)
)
for i, d in enumerate(train_img_dirs):
files = os.listdir('./' + d)
for f in files:
img = cv2.imread('./' + d + '/' + f)
img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
img = img.flatten().astype(np.float32)/255.0
train_image.append(img)
tmp = np.zeros(NUM_CLASSES)
tmp[i] = 1
train_label.append(tmp)
train_image = np.asarray(train_image)
train_label = np.asarray(train_label)
def weight_variable(shape,name):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial,name=name)
def bias_variable(shape,name):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial,name=name)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
# Input layer
x = tf.placeholder(tf.float32, [None, 32*32*3], name='x')
y_ = tf.placeholder(tf.float32, [None, 10], name='y_')
x_image = tf.reshape(x, [-1, 32, 32, 3])
# Convolutional layer 1
W_conv1 = weight_variable([5, 5, 3, 32],"W_conv1")
b_conv1 = bias_variable([32],"b_conv1")
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
# Convolutional layer 2
W_conv2 = weight_variable([5, 5, 32, 64],"W_conv2")
b_conv2 = bias_variable([64],"b_conv2")
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
# Convolutional layer 3
W_conv3 = weight_variable([5, 5, 64, 128],"W_conv3")
b_conv3 = bias_variable([128],"b_conv3")
h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3)
h_pool3 = max_pool_2x2(h_conv3)
# Fully connected layer 1
h_pool3_flat = tf.reshape(h_pool3, [-1, 4*4*128])
W_fc1 = weight_variable([4 * 4 * 128, 1024],"W_fc1")
b_fc1 = bias_variable([1024],"b_fc1")
h_fc1 = tf.nn.relu(tf.matmul(h_pool3_flat, W_fc1) + b_fc1)
# Dropout
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
# Fully connected layer 2 (Output layer)
W_fc2 = weight_variable([1024, 10],"W_fc2")
b_fc2 = bias_variable([10],"b_fc2")
y = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2, name='y')
# Evaluation functions
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')
# Training algorithm
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
# Training steps
with tf.Session(config=config) as sess:
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver({'W_conv1': W_conv1, 'b_conv1': b_conv1, 'W_conv2': W_conv2, 'b_conv2': b_conv2 ,'W_conv3': W_conv3, 'b_conv3': b_conv3})
max_steps = 1000
for i in range(STEPS):
random_seq = list(range(len(train_image)))
random.shuffle(random_seq)
for j in range(len(train_image)//BATCH_SIZE):
batch = BATCH_SIZE * j
train_image_batch = []
train_label_batch = []
for k in range(BATCH_SIZE):
train_image_batch.append(train_image[random_seq[batch + k]])
train_label_batch.append(train_label[random_seq[batch + k]])
train_step.run(feed_dict={x: train_image_batch, y_: train_label_batch, keep_prob: 0.5})
train_accuracy = accuracy.eval(feed_dict={
x:train_image_batch, y_: train_label_batch, keep_prob: 1.0})
print("step %d, training accuracy %g"%(i, train_accuracy))
saver.save(sess, "model.ckpt")
The training accuracy keeps figuring about 0.1 until the end of learning.
Is this a bug? Or is the structure of CNN bad?
The python version is 2.7, and the tensorflow version is 1.4.0.
I'm trying to build an image recognition CNN following tutorials from TensorFlow's website. I converted a directory of RGB 256x256 images (24 classes) to TFRecords using this solution so now I have two files - 'training' and 'validation'. I'm sure that all images are the same size, but when I try to train my network the only thing that I achive is this error:
Invalid argument: Shape mismatch in tuple component 0. Expected [65536], got [37191]
I tried to find a solution but I didn't succed - there was only a hint that this error can be caused by images with size different to the one declarated in the network but as I said - I'm sure the images are good.
Here is my code:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import os
NUM_CLASSES = 24
IMAGE_SIZE = 256
IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_float('learning_rate', 0.01, 'Initial learning rate.')
flags.DEFINE_integer('num_epochs', 2, 'Number of epochs to run trainer.')
flags.DEFINE_integer('batch_size', 100, 'Batch size.')
flags.DEFINE_string('train_dir', 'HERE_IS_MY_DATA_DIRECTORY', 'Directory with the training data.')
TRAIN_FILE = 'train'
VALIDATION_FILE = 'validation'
sess = tf.InteractiveSession()
def read_and_decode(filename_queue):
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
features={
'image/buffer': tf.FixedLenFeature([], tf.string),
'image/class/label': tf.FixedLenFeature([], tf.int64),
})
image = tf.decode_raw(features['image/buffer'], tf.uint8)
image.set_shape([IMAGE_PIXELS])
image = tf.cast(image, tf.float32)
image = tf.cast(image, tf.float32) * (1. / 255) - 0.5
label = tf.cast(features['image/class/label'], tf.int32)
return image, label
def input_pipeline(train, batch_size, num_epochs):
if not num_epochs: num_epochs = None
filename = os.path.join(FLAGS.train_dir,TRAIN_FILE if train else VALIDATION_FILE)
with tf.name_scope('input'):
filename_queue = tf.train.string_input_producer([filename], num_epochs=num_epochs)
image, label = read_and_decode(filename_queue)
min_after_dequeue = 10000
capacity = min_after_dequeue + 3 * batch_size
image_batch, label_batch = tf.train.shuffle_batch(
[image, label], batch_size=batch_size, capacity=capacity, min_after_dequeue=min_after_dequeue)
return image_batch, label_batch
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
#Variaveis
x, y_ = input_pipeline(train=True, batch_size=FLAGS.batch_size, num_epochs=FLAGS.num_epochs)
#Layer 1
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
x_image = tf.reshape(x, [-1, 256, 256, 1])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
#Layer 2
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
#layer3
W_conv3 = weight_variable([5, 5, 64, 128])
b_conv3 = bias_variable([128])
h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3)
h_pool3 = max_pool_2x2(h_conv3)
#layer4
W_conv4 = weight_variable([5, 5, 128, 256])
b_conv4 = bias_variable([256])
h_conv4 = tf.nn.relu(conv2d(h_pool3, W_conv4) + b_conv4)
h_pool4 =max_pool_2x2(h_conv4)
#Layer5
W_conv5 = weight_variable([5, 5, 256, 512])
b_conv5 = bias_variable([512])
h_conv5 = tf.nn.relu(conv2d(h_pool4, W_conv5) + b_conv5)
h_pool5 =max_pool_2x2(h_conv5)
#layer6
W_conv6 = weight_variable([5, 5, 512, 1024])
b_conv6 = bias_variable([1024])
h_conv6 = tf.nn.relu(conv2d(h_pool5, W_conv6) + b_conv6)
h_pool6 =max_pool_2x2(h_conv6)
#Densely Connected Layer
W_fc1 = weight_variable([16 * 16 * 64, 1024])
b_fc1 = bias_variable([1024])
h_pool6_flat = tf.reshape(h_pool6, [-1, 16 * 16 * 64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool6_flat, W_fc1) + b_fc1)
#Dropout - reduz overfitting
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
#Readout layer
W_fc2 = weight_variable([1024, 36])
b_fc2 = bias_variable([36])
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
#Train and evaluate
cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(y_conv, y_))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
sess.run(tf.initialize_all_variables())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
for i in range(100):
if i%10 == 0:
train_accuracy = accuracy.eval(feed_dict={keep_prob: 1.0})
print("step %d, training accuracy %g"%(i, train_accuracy))
train_step.run(feed_dict={keep_prob: 0.5})
x, y_ = input_pipeline(train=True, batch_size=FLAGS.batch_size)
print("test accuracy %g"%accuracy.eval(feed_dict={keep_prob: 1.0}))
coord.join(threads)
sess.close()
Is there something wrong in my code or maybe I get the error because the images are RGB (in all tutorials people are using grayscaled images)? If so, how do I handle RGB images correctly?
I will be grateful for any help.
EDIT:
I solved the problem by creating TFRecord files with this script, thanks for all the help.