mobilenet always predict same label - python

I trained a mobilenet_v2 model to classify a 8 class image dataset. I used google's pretrain parameters for fine-tune and retrain net in my dataset, the accuracy improve fast during training,and reached 98% when training end.
but when I use this trained model to predict test image, it always output the same label.
I have tried vgg16 too. It works well
here is my code
inference
def inference(inputs,is_training=True):
with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope(is_training=is_training)):
net, endpoints = mobilenet_v2.mobilenet(input_tensor = inputs,num_classes = n_class,conv_defs = V2_18_DEF)
print('mobilenet output',net.get_shape().as_list())
return net
loss
def loss(logit,label):
losses=[]
with tf.name_scope('LOSS'):
class_loss = tf.nn.softmax_cross_entropy_with_logits(labels=label,logits=logit)
class_loss = tf.reduce_mean(class_loss,axis = 0)
tf.summary.scalar('class_loss',class_loss)
regularization_loss = tf.reduce_sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
total_loss = class_loss + regularization_loss
train_step
def train_op(loss):
train_step = tf.train.AdamOptimizer(1e-4).minimize(loss)#
return train_step
test
def test():
inputs = tf.placeholder(name='inputs',shape=[None,224,224,3],dtype=tf.float32)
label_holder = tf.placeholder(name='label_holder',shape=[None,n_class],dtype=tf.float32)
if(net_type=='mobile_v2'):
test_logits = inference(inputs,is_training=False)
elif(net_type=='mobile_v1'):
test_logits = inference_mobile_v1(inputs,is_training=False)
elif net_type=='vgg16':
test_logits = inference_vgg(inputs,is_training=False)
predict = tf.nn.softmax(test_logits)
predict_result = tf.argmax(predict,axis=1,output_type=tf.int32)
true_result = tf.argmax(label_holder,axis=1,output_type=tf.int32)
correct_predict = tf.equal(tf.argmax(predict,axis=1,output_type=tf.int32),tf.argmax(label_holder,axis=1,\
output_type = tf.int32))
accuracy = tf.reduce_mean(tf.cast(correct_predict,tf.float32))
pos = 0
acc = 0
count = 0
sess = tf.Session()
ckpt_path=''
try:
ckpt_path = sys.argv[3]
except:
ckpt = tf.train.get_checkpoint_state(output_path)
if(ckpt and ckpt.model_checkpoint_path):
ckpt_path = ckpt.model_checkpoint_path
sess.run(tf.global_variables_initializer())
variables_to_restore = slim.get_variables_to_restore()
for var in variables_to_restore:
print(var.name)
saver = tf.train.Saver(variables_to_restore)
saver.restore(sess,ckpt_path)
dataset = Dataset(classes,0.8)
test_data,test_label = dataset.get_test_data()
while(pos<len(test_data)):
start = pos
end = min(pos+batch_size,len(test_data))
batch_img = test_data[start:end]
batch_label = test_label[start:end]
batch_data={}
batch_data[inputs] = batch_img
batch_data[label_holder] = batch_label
batch_acc,p_result,t_result = sess.run([accuracy,predict_result,true_result],feed_dict=batch_data)
print('batch_acc',batch_acc)
print(p_result)
print(t_result)
acc += batch_acc
pos = end
count+=1
acc = acc/count
print('test acc',acc)
sess.close()
train function
def train():
inputs = tf.placeholder(name='inputs',shape=[None,224,224,3],dtype=tf.float32)#
labels_placeholder = {}
label_holder = tf.placeholder(name='label_holder',shape=[None,n_class],dtype=tf.float32) #
if(net_type=='mobile_v2'):
train_logits = inference(inputs)
elif(net_type=='mobile_v1'):
train_logits = inference_mobile_v1(inputs,is_training=True)
elif net_type=='vgg16':
train_logits = inference_vgg(inputs,is_training=True)
loss_op = loss(train_logits,label_holder)
predict = tf.nn.softmax(train_logits)
print('predict shape',predict.get_shape().as_list())
predict_result = tf.argmax(predict,axis=1,output_type = tf.int32)
correct_predict = tf.equal(tf.argmax(predict,axis=1,output_type=tf.int32),tf.argmax(label_holder,axis=1,\
output_type = tf.int32))
accuracy = tf.reduce_mean(tf.cast(correct_predict,tf.float32))
for var in tf.trainable_variables():
tf.summary.histogram(var.name,var)
train_step = train_op(loss_op)
#pretrain_restore
all_variable = tf.trainable_variables()
pretrain_vals=[]
reader = pywrap_tensorflow.NewCheckpointReader(pretrain_model_path)
var_to_shape_map=reader.get_variable_to_shape_map()
for var in all_variable:
print(var.name)
if('Logits' in var.name):
continue
if(var.name.split(':')[0] in var_to_shape_map):
print('restore',var.name)
pretrain_vals.append(var)
pretrain_saver = tf.train.Saver(pretrain_vals)
variables_to_restore = slim.get_variables_to_restore()
train_saver = tf.train.Saver(variables_to_restore)
#tensorboard
sess = tf.Session()
merge_op = tf.summary.merge_all()
summary_writer = tf.summary.FileWriter('./tensorboard/train',sess.graph)
#init
print('init & restore')
sess.run(tf.global_variables_initializer())
pretrain_saver.restore(sess,pretrain_model_path)
dataset = Dataset(classes,0.8)#
data_amount = dataset.data_amount
each_epoch = int(data_amount/batch_size)
total_step = int(each_epoch*EPOCH)
global_step = 0
test_data,test_label = dataset.next_batch(80)
for epoch in range(EPOCH):
pos = 0
count = 0
acc = 0
while(pos<len(test_data)):
start = pos
end = min(pos+batch_size,len(test_data))
batch_img = test_data[start:end]
batch_label = test_label[start:end]
batch_data={}
batch_data[inputs] = batch_img
batch_data[label_holder] = batch_label
batch_acc,t_result ,loss_val= sess.run([accuracy,predict_result,loss_op],feed_dict=batch_data)
print('batch_acc',batch_acc,loss_val)
print(t_result)
acc += batch_acc
pos = end
count+=1
acc = acc/count
print('=====test_acc===',acc)
for epoch_step in range(each_epoch):
batch_img,batch_label = dataset.next_batch(batch_size)#
batch_data={}
batch_data[inputs] = batch_img
batch_data[label_holder] = batch_label
merge_str,loss_val,acc,p_result= sess.run([merge_op,loss_op,accuracy,predict_result],feed_dict=batch_data)
sess.run(train_step,feed_dict = batch_data)
print('loss %f,acc %f, global step %d ,epoch %d,epoch_step %d' % (loss_val,acc,global_step,epoch,epoch_step) )
print(p_result)
summary_writer.add_summary(merge_str,global_step=global_step)
summary_writer.flush()
global_step+=1
save_path = os.path.join(output_path,'model.ckpt')
train_saver.save(sess,save_path,global_step)

Related

for loop sending wrong data to list

Below is the code, i am running a for loop to train on different training sizes. The first loop works correctly, where when training begins, the training and validation accuracy are sent to a list, then a frame then finally a csv. But on the subsequent loops, a data generator is sent to the list. Can anyone see where the issue is, because I cant find it.
Also if you have a better way of doing this (data compiling for analysis), I'm all ears.
The first block is the code snippet, the second block is the full code. The for loop starts about halfway down.
for i in range(1,6):
training_loader, validation_loader, training_ones, training_zeros, validation_ones, validation_zeros = switcher().sets(case)
train_accuracy = []
val_accuracy = []
start_time = time.time()
for epoch in tqdm(range(1, epochs + 1), total=epochs):
train()
train_acc = test(training_loader)
train_accuracy.append(train_acc)
val_acc = test(validation_loader)
val_accuracy.append(val_acc)
accuracy = pd.DataFrame()
accuracy['train_acc'] = train_accuracy
accuracy['val_acc'] = val_accuracy
accuracy.to_csv(f'C:\\Users\\Anthony Sirico\\Documents\\GitHub\\PyGeo_Circuit_exp\\PyGeo_Circuit_exp\\imbalance_exp\\csv files\\accuracy_{i}.csv')
import sys
sys.path.insert(0, 'C:\\Users\\user\\Desktop\\imbalance_exp\\imbalance_exp\\imbalance_exp')
import torch
from torch_geometric.loader import DataLoader
import imb_dataset as imb
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GraphConv
from torch_geometric.nn import global_mean_pool
import neptune.new as neptune
import pandas as pd
from sklearn.metrics import confusion_matrix, matthews_corrcoef
import seaborn as sns
from neptune.new.types import File
from tqdm import tqdm
import time
known = imb.ImbalanceDataset(root='imb_50v2', set='known', split=0.5)
unknown = imb.ImbalanceDataset(root='imb_50v2', set='unknown', split=0.5)
all_data = imb.ImbalanceDataset(root='imb_50v2', set='All', split=None)
torch.manual_seed(12345)
known = known.shuffle()
lr = 0.001
training_perc = 0.9
N = len(known)
mini_batch_size = 32
epochs = 600
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
case = 2
class switcher:
def sets(self, case):
default = known
return getattr(self, 'case_' + str(case), lambda: default)()
def case_1(self):
training_set = known[:int(training_perc*len(known))]
validation_set = known[int(training_perc*len(known)):]
training_loader = DataLoader(training_set, batch_size=mini_batch_size, shuffle=True)
validation_loader = DataLoader(validation_set, batch_size=mini_batch_size, shuffle=False)
training_ones = []
training_zeros = []
validation_ones = []
validation_zeros = []
for i in range(len(training_set)):
if training_set[i].y == 1:
training_ones.append(training_set[i])
else:
training_zeros.append(training_set[i])
for i in range(len(validation_set)):
if validation_set[i].y == 1:
validation_ones.append(validation_set[i])
else:
validation_zeros.append(validation_set[i])
return training_loader, validation_loader, training_ones, training_zeros, validation_ones, validation_zeros
def case_2(self):
one_index = round(len(known) * 0.25)
known_ones = known[:one_index].copy()
known_ones.shuffle()
known_zeros = known[one_index:].copy()
known_zeros.shuffle()
training_ones = known_ones[:int(training_perc*len(known_ones))]
training_zeros = known_zeros[:len(training_ones)]
training_set = torch.utils.data.ConcatDataset([training_ones, training_zeros])
validation_ones = known_ones[int(training_perc*len(known_ones)):]
validation_zeros = known_zeros[len(training_ones):]
validation_set = torch.utils.data.ConcatDataset([validation_ones, validation_zeros])
training_loader = DataLoader(training_set, batch_size=mini_batch_size, shuffle=True)
validation_loader = DataLoader(validation_set, batch_size=mini_batch_size, shuffle=False)
training_ones = []
training_zeros = []
validation_ones = []
validation_zeros = []
for i in range(len(training_set)):
if training_set[i].y == 1:
training_ones.append(training_set[i])
else:
training_zeros.append(training_set[i])
for i in range(len(validation_set)):
if validation_set[i].y == 1:
validation_ones.append(validation_set[i])
else:
validation_zeros.append(validation_set[i])
return training_loader, validation_loader, training_ones, training_zeros, validation_ones, validation_zeros
class GCN(torch.nn.Module):
def __init__(self, hidden_channels):
super(GCN, self).__init__()
torch.manual_seed(12345)
self.conv1 = GraphConv(known.num_node_features, hidden_channels)
self.conv2 = GraphConv(hidden_channels, hidden_channels)
self.conv3 = GraphConv(hidden_channels, hidden_channels)
self.lin = Linear(hidden_channels, known.num_classes)
def forward(self, x, edge_index, batch):
# 1. Obtain node embeddings
x = self.conv1(x, edge_index)
x = x.relu()
x = self.conv2(x, edge_index)
x = x.relu()
x = self.conv3(x, edge_index)
# 2. Readout layer
x = global_mean_pool(x, batch) # [batch_size, hidden_channels]
# 3. Apply a final classifier
x = F.dropout(x, p=0.5, training=self.training)
x = self.lin(x)
return x
model = GCN(hidden_channels=64).to(device)
print(model)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
criterion = torch.nn.CrossEntropyLoss()
def train():
model.train()
total_loss = 0
for data in training_loader: # Iterate in batches over the training dataset.
data = data.to(device)
out = model(data.x, data.edge_index, data.batch) # Perform a single forward pass.
loss = criterion(out, data.y) # Compute the loss solely based on the training nodes.
loss.backward() # Derive gradients.
optimizer.step() # Update parameters based on gradients.
optimizer.zero_grad() # Clear gradients.
def test(loader):
model.eval()
correct = 0
for data in loader: # Iterate in batches over the training/test dataset.
data = data.to(device)
out = model(data.x, data.edge_index, data.batch)
pred = out.argmax(dim=1) # Use the class with highest probability.
correct += int((pred == data.y).sum()) # Check against ground-truth labels.
return correct / len(loader.dataset) # Derive ratio of correct predictions.
output_frame = pd.DataFrame(columns=['epoch', 'lr', 'known', 'unknown', 'train_ones', 'train_zeros', 'val_ones', 'val_zeros', 'tn_all', 'fp_all', 'fn_all', 'tp_all', 'tn_known', 'fp_known', 'fn_known', 'tp_known', 'precision_all', 'recall_all', 'f1_all', 'accuracy_all', 'mcc_all', 'precision_known', 'recall_known', 'f1_known', 'accuracy_known', 'mcc_known', 'time_elapsed'])
for i in range(1,6):
training_loader, validation_loader, training_ones, training_zeros, validation_ones, validation_zeros = switcher().sets(case)
train_accuracy = []
val_accuracy = []
start_time = time.time()
for epoch in tqdm(range(1, epochs + 1), total=epochs):
train()
train_acc = test(training_loader)
train_accuracy.append(train_acc)
val_acc = test(validation_loader)
val_accuracy.append(val_acc)
accuracy = pd.DataFrame()
accuracy['train_acc'] = train_accuracy
accuracy['val_acc'] = val_accuracy
accuracy.to_csv(f'C:\\Users\\Anthony Sirico\\Documents\\GitHub\\PyGeo_Circuit_exp\\PyGeo_Circuit_exp\\imbalance_exp\\csv files\\accuracy_{i}.csv')
unknown_loader = DataLoader(unknown, batch_size=1, shuffle=False)
predictions = []
all_correct = 0
known_correct = 0
for test in unknown_loader:
test = test.to(device)
out = model(test.x, test.edge_index, test.batch)
pred = out.argmax(dim=1)
predictions.append(pred)
all_correct += int((pred == test.y_all).sum())
known_correct += int((pred == test.y_known).sum())
pred_df = pd.DataFrame()
pred_df['y_all_true'] = [i.item() for i in unknown.data.y_all]
pred_df['y_known_true'] = [i.item() for i in unknown.data.y_known]
pred_df['y_pred'] = [i.item() for i in predictions]
pred_df.to_csv(f'C:\\Users\\Anthony Sirico\\Documents\\GitHub\\PyGeo_Circuit_exp\\PyGeo_Circuit_exp\\imbalance_exp\\csv files\\pred_df_{i}.csv')
cf_matrix_all = confusion_matrix(pred_df['y_all_true'], pred_df['y_pred'])
ax = sns.heatmap(cf_matrix_all, annot=True, fmt='g', cmap='Blues')
ax.title.set_text('Confusion Matrix based on all data')
tn_all, fp_all, fn_all, tp_all = cf_matrix_all.ravel()
end_time = time.time()
time_elapsed = end_time - start_time
precision_all = tp_all / (tp_all + fp_all)
recall_all = tp_all / (tp_all + fn_all)
f1_all = 2 * (precision_all * recall_all) / (precision_all + recall_all)
accuracy_all = (tp_all + tn_all) / (tp_all + tn_all + fp_all + fn_all)
mcc_all = matthews_corrcoef(pred_df['y_all_true'], pred_df['y_pred'])
cf_matrix_known = confusion_matrix(pred_df['y_known_true'], pred_df['y_pred'])
ax = sns.heatmap(cf_matrix_known, annot=True, fmt='g', cmap='Blues')
ax.title.set_text('Confusion Matrix based on known data')
tn_known, fp_known, fn_known, tp_known = cf_matrix_known.ravel()
precision_known = tp_known / (tp_known + fp_known)
recall_known = tp_known / (tp_known + fn_known)
f1_known = 2 * (precision_known * recall_known) / (precision_known + recall_known)
accuracy_known = (tp_known + tn_known) / (tp_known + tn_known + fp_known + fn_known)
mcc_known = matthews_corrcoef(pred_df['y_known_true'], pred_df['y_pred'])
#'epoch', 'lr', 'known', 'unknown', 'train_ones', 'train_zeros', 'val_ones', 'val_zeros', 'tn_all', 'fp_all', 'fn_all', 'tp_all', 'tn_known', 'fp_known', 'fn_known', 'tp_known
output_frame.loc[i] = [epochs, lr, len(known), len(unknown), len(training_ones), len(training_zeros), len(validation_ones), len(validation_zeros), tn_all, fp_all, fn_all, tp_all, tn_known, fp_known, fn_known, tp_known, precision_all, recall_all, f1_all, accuracy_all, mcc_all, precision_known, recall_known, f1_known, accuracy_known, mcc_known, time_elapsed]
output_frame.to_csv('C:\\Users\\Anthony Sirico\\Documents\\GitHub\\PyGeo_Circuit_exp\\PyGeo_Circuit_exp\\imbalance_exp\\csv files\\final_output.csv')
training_perc -= 0.2

ValueError: operands could not be broadcast together with shapes (1,1,1500) (1,512)

For my project I am trying to compare between LSTM and GRU for video captioning. I started with an open source LSTM code. And after making changes for the GRU the model trains perfectly. But I am getting the mentioned value error when I am trying to test. So I am attaching the code of testing below
###Testing Block###
# class to perform inference on all test files and save as test_output.txt
class Video2Text(object):
''' Initialize the parameters for the model '''
def __init__(self):
self.latent_dim = 512
self.num_encoder_tokens = 4096
self.num_decoder_tokens = 1500
self.time_steps_encoder = 80
self.time_steps_decoder = None
self.preload = True
self.preload_data_path = 'preload_data'
self.max_probability = -1
# processed data
self.encoder_input_data = []
self.decoder_input_data = []
self.decoder_target_data = []
self.tokenizer = None
# models
self.encoder_model = None
self.decoder_model = None
self.inf_encoder_model = None
self.inf_decoder_model = None
self.save_model_path = 'model_final'
self.test_path = 'testing_data'
def load_inference_models(self):
# load tokenizer
with open(os.path.join(self.save_model_path, 'tokenizer' + str(self.num_decoder_tokens)), 'rb') as file:
self.tokenizer = joblib.load(file)
# inference encoder model
self.inf_encoder_model = load_model(os.path.join(self.save_model_path, 'encoder_model.h5'))
# inference decoder model
decoder_inputs = Input(shape=(None, self.num_decoder_tokens))
decoder_dense = Dense(self.num_decoder_tokens, activation='softmax')
decoder_gru = GRU(self.latent_dim, return_sequences=True, return_state=True)
decoder_state_input_h = Input(shape=(self.latent_dim,))
decoder_state_input_c = Input(shape=(self.latent_dim,))
#decoder_states_inputs = [decoder_state_input_h,decoder_state_input_c]
decoder_state_input=[decoder_state_input_h]
decoder_outputs, state_h = decoder_gru(decoder_inputs, initial_state=decoder_state_input_h)
print(state_h.shape)
decoder_states = [state_h]
print(np.shape(decoder_states))
decoder_outputs = decoder_dense(decoder_outputs)
print(decoder_outputs.shape)
self.inf_decoder_model = Model(
[decoder_inputs] + decoder_state_input,
[decoder_outputs] + decoder_states)
self.inf_decoder_model.load_weights(os.path.join(self.save_model_path, 'decoder_model_weights.h5'))
def decode_sequence2bs(self, input_seq):
states_value = self.inf_encoder_model.predict(input_seq)
#print(np.shape(states_value))
target_seq = np.zeros((1, self.num_decoder_tokens))
target_seq[0, self.tokenizer.word_index['bos']] = 1
print(input_seq.shape)
caption = self.greedy_search(input_seq)
return caption
def greedy_search(self, f):
"""
:param f: the loaded numpy array after creating videos to frames and extracting features
:return: the final sentence which has been predicted greedily
"""
inv_map = self.index_to_word()
states_value = self.inf_encoder_model.predict(f.reshape(-1, 80, 4096))
print(states_value.shape)
target_seq = np.zeros((1, 1500))
final_sentence = ''
target_seq[0, self.tokenizer.word_index['bos']] = 1
print(target_seq.shape)
for i in range(15):
output_tokens, h = self.inf_decoder_model.predict([target_seq] + states_value)
states_value = [h]
output_tokens = output_tokens.reshape(self.num_decoder_tokens)
y_hat = np.argmax(output_tokens)
if y_hat == 0:
continue
if inv_map[y_hat] is None:
break
if inv_map[y_hat] == 'eos':
break
else:
final_sentence = final_sentence + inv_map[y_hat] + ' '
target_seq = np.zeros((1, 1500))
target_seq[0, y_hat] = 1
#print(final_sentence)
return final_sentence
def decoded_sentence_tuning(self, decoded_sentence):
decode_str = []
filter_string = ['bos', 'eos']
unigram = {}
last_string = ""
for idx2, c in enumerate(decoded_sentence):
if c in unigram:
unigram[c] += 1
else:
unigram[c] = 1
if(last_string == c and idx2 > 0):
continue
if c in filter_string:
continue
if len(c) > 0:
decode_str.append(c)
if idx2 > 0:
last_string = c
return decode_str
def index_to_word(self):
# inverts word tokenizer
index_to_word = {value: key for key, value in self.tokenizer.word_index.items()}
return index_to_word
def get_test_data(self, path):
X_test = []
X_test_filename = []
%cd /content/drive/My\ Drive/Video-Captioning-main/data/
with open (os.path.join(path, 'testing_id.txt')) as testing_file:
lines = testing_file.readlines()
for filename in lines:
filename = filename.strip()
#print(filename)
f = np.load(os.path.join(path , 'feat', filename + '.npy'))
X_test.append(f)
X_test_filename.append(filename[:-4])
X_test = np.array(X_test)
#print(f)
return X_test, X_test_filename
def test(self):
X_test, X_test_filename = self.get_test_data(os.path.join(self.test_path))
print(len(X_test), len(X_test_filename))
# generate inference test outputs
%cd /content/drive/My\ Drive/Video-Captioning-main/model_final/
with open(os.path.join(self.save_model_path, 'test_output_greedy.txt'), 'w') as file:
for idx, x in enumerate(X_test):
file.write(X_test_filename[idx]+',')
decoded_sentence = self.decode_sequence2bs(x.reshape(-1, 80, 4096))
#print(decoded_sentence)
file.write(decoded_sentence + ' ')
file.write('\n')
# re-init max prob
#self.max_probability = -1
And the code for building the GRU. In here you can see that encoder_gru outputs one state in addition to encoder_out. But in LSTM it provided 2 additional outputs. So I think this is finally causing the error. Because then in lstm I got (1,1,1500) and (2,1,512).
# Setting up the encoder
encoder_inputs = Input(shape=(time_steps_encoder, num_encoder_tokens), name="encoder_inputs")
encoder_gru = GRU(latent_dim, return_state=True,return_sequences=True, name='endcoder_gru')
encoder_out, encoder_state = encoder_gru(encoder_inputs)
# Set up the decoder
decoder_inputs = Input(shape=(time_steps_decoder, num_decoder_tokens), name= "decoder_inputs")
decoder_gru = GRU(latent_dim, return_sequences=True, return_state=True, name='decoder_gru')
decoder_outputs, decoder_state = decoder_gru(decoder_inputs, initial_state=encoder_state)
decoder_dense = Dense(num_decoder_tokens, activation='softmax', name='decoder_relu')
decoder_outputs = decoder_dense(decoder_outputs)
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.summary()
plot_model(model, to_file='model_train.png', show_shapes=True, show_layer_names=True)
And following is the exact line where I am getting the error:
<ipython-input-66-ebbb2fcfd630> in greedy_search(self, f)
78 print(target_seq.shape)
79 for i in range(15):
---> 80 output_tokens, h = self.inf_decoder_model.predict([target_seq] + states_value)
81 states_value = [h]
82 output_tokens = output_tokens.reshape(self.num_decoder_tokens)
ValueError: operands could not be broadcast together with shapes (1,1,1500) (1,512)
In testing block I tried with making the shapes (1,1500) and (1,512), but even then it gives the same error.

[Theano]TypeError: cost must be a scalar

I am undergoing a research project that requires me to write a regularizer for a DNN.
import lasagne
from lasagne.nonlinearities import leaky_rectify, softmax
import theano, theano.tensor as T
import numpy as np
import sklearn.datasets, sklearn.preprocessing, sklearn.model_selection
import matplotlib.pyplot as plt
from tabulate import tabulate
import time
import math
#psi function that will be used in the penalty function
def psi(g,l):
m = g.shape[1]
C = (1/T.pow(2,m))*(1/T.pow(math.pi,((m-1)/2))) / (T.gamma((m+1)/2))
logDens = T.log(C) + m*T.log(l) - l*T.sqrt(T.sum(g**2))
dens = T.exp(logDens)
return(dens)
#pstar function that will be used in the penalty function
def pStar(g,lambda1,lambda0,theta):
psi1 = psi(g,lambda1)
psi0 = psi(g,lambda0)
## if a coefficient is really large then both these will numerically be zero
if theta*psi1 ==0 and (1-theta)*psi0==0:
p = 1
else:
p = (theta*psi1) / (theta*psi1 + (1 - theta)*psi0)
return p
#Seperable
def pen_S(l):
theta = 0.5
lambda1 = 1
lambda0 = 12
for j in range(len(l)):
t = l[j]
m = t.shape[1]
n = t.shape[0].eval()
cost = T.zeros((1,1))
for i in range(n):
g = t[i]
temp = -lambda1*T.sum(g**2) + T.log(pStar(T.zeros((1,m)),lambda1,lambda0,theta)/pStar(g,lambda1,lambda0,theta))
cost = cost + temp
return cost
# Number of simulations
N_runs = 1
# Maximum number of epochs
max_epochs = 1500
# Define number of layers and number of neurons
H_layers = np.asarray([40, 20])
# Minibatch size
batch_size = 300
# Lasagne Regularizers to be tested
regularizers = [pen_S]
# Define the regularization factors for each algorithm
reg_factors = [10**-3.5]
# Define the names (for display purposes)
names = ['SSGL_Sep']
# Load the dataset (DIGITS)
digits = sklearn.datasets.load_digits()
X = digits.data
y = digits.target
# MNIST
#mnist = sklearn.datasets.fetch_mldata('MNIST original', data_home='C:/Users/ISPAMM/Downloads')
#X = mnist.data
#y = mnist.target
# Preprocessing (input)
scaler = sklearn.preprocessing.MinMaxScaler()
X = scaler.fit_transform(X)
# Output structures
tr_errors = np.zeros((len(regularizers), N_runs))
tst_errors = np.zeros((len(regularizers), N_runs))
tr_times = np.zeros((len(regularizers), N_runs))
tr_obj = np.zeros((len(regularizers), N_runs, max_epochs))
sparsity_weights = np.zeros((len(regularizers), N_runs, len(H_layers)+1))
sparsity_neurons = np.zeros((len(regularizers), N_runs, len(H_layers)+1))
# Define the input and output symbolic variables
input_var = T.matrix(name='X')
target_var = T.ivector(name='y')
# Utility function for minibatches
def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
assert len(inputs) == len(targets)
if shuffle:
indices = np.arange(len(inputs))
np.random.shuffle(indices)
for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
if shuffle:
excerpt = indices[start_idx:start_idx + batchsize]
else:
excerpt = slice(start_idx, start_idx + batchsize)
yield inputs[excerpt], targets[excerpt]
for k in np.arange(0, N_runs):
print("Run ", k+1, " of ", N_runs, "...\n", end="")
# Split the data
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, test_size=0.25)
# Define the network structure
network = lasagne.layers.InputLayer((None, X.shape[1]), input_var)
for h in H_layers:
network = lasagne.layers.DenseLayer(network, h, nonlinearity=leaky_rectify, W=lasagne.init.GlorotNormal())
network = lasagne.layers.DenseLayer(network, len(np.unique(y)), nonlinearity=softmax, W=lasagne.init.GlorotNormal())
params_original = lasagne.layers.get_all_param_values(network)
params = lasagne.layers.get_all_params(network, trainable=True)
# Define the loss function
prediction = lasagne.layers.get_output(network)
loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
# Define the test function
test_prediction = lasagne.layers.get_output(network, deterministic=True)
test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
dtype=theano.config.floatX)
test_fn = theano.function([input_var, target_var], test_acc, allow_input_downcast=True)
for r in np.arange(0, len(regularizers)):
# Set to original parameters
lasagne.layers.set_all_param_values(network, params_original)
# Define the regularized loss function
loss_reg = loss.mean() + reg_factors[r] * lasagne.regularization.regularize_network_params(network, regularizers[r])
# Update function
# updates_reg = lasagne.updates.nesterov_momentum(loss_reg, params,learning_rate=0.01)
updates_reg = lasagne.updates.adam(loss_reg, params)
# Training function
train_fn = theano.function([input_var, target_var], loss_reg, updates=updates_reg, allow_input_downcast=True)
# Train network
print("\tTraining with ", names[r], " regularization, epoch: ", end="")
start = time.time()
for epoch in range(max_epochs):
loss_epoch = 0
batches = 0
if np.mod(epoch, 10) == 0:
print(epoch, "... ", end="")
for batch in iterate_minibatches(X_train, y_train, batch_size, shuffle=True):
input_batch, target_batch = batch
loss_epoch += train_fn(input_batch, target_batch)
batches += 1
tr_obj[r,k,epoch] = loss_epoch/batches
end = time.time()
tr_times[r,k] = end - start
print(epoch, ".")
# Final test with accuracy
print("\tTesting the network with ", names[r], " regularization...")
tr_errors[r,k] = test_fn(X_train, y_train)
tst_errors[r,k] = test_fn(X_test, y_test)
# Check sparsity
params_trained = lasagne.layers.get_all_param_values(network, trainable=True)
sparsity_weights[r,k,:] = [1-(x.round(decimals=3).ravel().nonzero()[0].shape[0]/x.size) for x in params_trained[0::2]]
sparsity_neurons[r,k,:] = [x.round(decimals=3).sum(axis=1).nonzero()[0].shape[0] for x in params_trained[0::2]]
tr_obj_mean = np.mean(tr_obj, axis=1)
# Plot the average loss
plt.figure()
plt.title('Training objective')
for r in np.arange(0, len(regularizers)):
plt.semilogy(tr_obj_mean[r, :], label=names[r])
plt.legend()
# Print the results
print(tabulate([['Tr. accuracy [%]'] + np.mean(tr_errors, axis=1).round(decimals=4).tolist(),
['Test. accuracy [%]'] + np.mean(tst_errors, axis=1).round(decimals=4).tolist(),
['Tr. times [secs.]'] + np.mean(tr_times, axis=1).round(decimals=4).tolist(),
['Sparsity [%]'] + np.mean(sparsity_weights, axis=1).round(decimals=4).tolist(),
['Neurons'] + np.mean(sparsity_neurons, axis=1).round(decimals=4).tolist()],
headers=['']+names))
Here is my defined regularizer pen_S(l), but when I run the code to train the network, i was promted with 'TypeError: cost must be a scalar.' But I think my output of pen_S is already a scalar.
Can anyone help me with this?

```AttributeError: 'module' object has no attribute 'set_random_seed'``` when i run ```python2 ./train.py``` from the terminal

The complete set of error messages are shown below:
(FYP_v2) sg97-ubuntu#SG97-ubuntu:~/SGSN$ python2 ./train.py
Traceback (most recent call last):
File "./train.py", line 165, in <module>
main()
File "./train.py", line 65, in main
tf.set_random_seed(args.random_seed)
AttributeError: 'module' object has no attribute 'set_random_seed'
(FYP_v2) sg97-ubuntu#SG97-ubuntu:~/SGSN$
I checked out this (AttributeError: 'module' object has no attribute 'set_random_seed') question on stackoverflow but it doesn't really apply to my situation since I'm not using Caffe.
I've also provided the python code below for reference
from __future__ import print_function
import argparse
from datetime import datetime
from random import shuffle
import os
import sys
import time
import math
import tensorflow as tf
import numpy as np
from utils import *
from train_image_reader import *
from net import *
parser = argparse.ArgumentParser(description='')
parser.add_argument("--snapshot_dir", default='./snapshots', help="path of snapshots")
parser.add_argument("--image_size", type=int, default=256, help="load image size")
parser.add_argument("--x_data_txt_path", default='./datasets/x_traindata.txt', help="txt of x images")
parser.add_argument("--y_data_txt_path", default='./datasets/y_traindata.txt', help="txt of y images")
parser.add_argument("--random_seed", type=int, default=1234, help="random seed")
parser.add_argument('--base_lr', type=float, default=0.0002, help='initial learning rate for adam')
parser.add_argument('--epoch', dest='epoch', type=int, default=50, help='# of epoch')
parser.add_argument('--epoch_step', dest='epoch_step', type=int, default=20, help='# of epoch to decay lr')
parser.add_argument("--lamda", type=float, default=10.0, help="L1 lamda")
parser.add_argument('--beta1', dest='beta1', type=float, default=0.5, help='momentum term of adam')
parser.add_argument("--summary_pred_every", type=int, default=200, help="times to summary.")
parser.add_argument("--save_pred_every", type=int, default=8000, help="times to save.")
parser.add_argument("--x_image_forpath", default='./datasets/train/X/images/', help="forpath of x training datas.")
parser.add_argument("--x_label_forpath", default='./datasets/train/X/labels/', help="forpath of x training labels.")
parser.add_argument("--y_image_forpath", default='./datasets/train/Y/images/', help="forpath of y training datas.")
parser.add_argument("--y_label_forpath", default='./datasets/train/Y/labels/', help="forpath of y training labels.")
args = parser.parse_args()
def save(saver, sess, logdir, step):
model_name = 'model'
checkpoint_path = os.path.join(logdir, model_name)
if not os.path.exists(logdir):
os.makedirs(logdir)
saver.save(sess, checkpoint_path, global_step=step)
print('The checkpoint has been created.')
def get_data_lists(data_path):
f = open(data_path, 'r')
datas=[]
for line in f:
data = line.strip("\n")
datas.append(data)
return datas
def l1_loss(src, dst):
return tf.reduce_mean(tf.abs(src - dst))
def gan_loss(src, dst):
return tf.reduce_mean((src-dst)**2)
def main():
if not os.path.exists(args.snapshot_dir):
os.makedirs(args.snapshot_dir)
x_datalists = get_data_lists(args.x_data_txt_path) # a list of x images
y_datalists = get_data_lists(args.y_data_txt_path) # a list of y images
tf.set_random_seed(args.random_seed)
x_img = tf.placeholder(tf.float32,shape=[1, args.image_size, args.image_size,3],name='x_img')
x_label = tf.placeholder(tf.float32,shape=[1, args.image_size, args.image_size,3],name='x_label')
y_img = tf.placeholder(tf.float32,shape=[1, args.image_size, args.image_size,3],name='y_img')
y_label = tf.placeholder(tf.float32,shape=[1, args.image_size, args.image_size,3],name='y_label')
fake_y = generator(image=x_img, reuse=False, name='generator_x2y') # G
fake_x_ = generator(image=fake_y, reuse=False, name='generator_y2x') # S
fake_x = generator(image=y_img, reuse=True, name='generator_y2x') # G'
fake_y_ = generator(image=fake_x, reuse=True, name='generator_x2y') # S'
dy_fake = discriminator(image=fake_y, gen_label = x_label, reuse=False, name='discriminator_y') # D
dx_fake = discriminator(image=fake_x, gen_label = y_label, reuse=False, name='discriminator_x') # D'
dy_real = discriminator(image=y_img, gen_label = y_label, reuse=True, name='discriminator_y') # D
dx_real = discriminator(image=x_img, gen_label = x_label, reuse=True, name='discriminator_x') #D'
final_loss = gan_loss(dy_fake, tf.ones_like(dy_fake)) + gan_loss(dx_fake, tf.ones_like(dx_fake)) + args.lamda*l1_loss(x_label, fake_x_) + args.lamda*l1_loss(y_label, fake_y_) # final objective function
dy_loss_real = gan_loss(dy_real, tf.ones_like(dy_real))
dy_loss_fake = gan_loss(dy_fake, tf.zeros_like(dy_fake))
dy_loss = (dy_loss_real + dy_loss_fake) / 2
dx_loss_real = gan_loss(dx_real, tf.ones_like(dx_real))
dx_loss_fake = gan_loss(dx_fake, tf.zeros_like(dx_fake))
dx_loss = (dx_loss_real + dx_loss_fake) / 2
dis_loss = dy_loss + dx_loss # discriminator loss
final_loss_sum = tf.summary.scalar("final_objective", final_loss)
dx_loss_sum = tf.summary.scalar("dx_loss", dx_loss)
dy_loss_sum = tf.summary.scalar("dy_loss", dy_loss)
dis_loss_sum = tf.summary.scalar("dis_loss", dis_loss)
discriminator_sum = tf.summary.merge([dx_loss_sum, dy_loss_sum, dis_loss_sum])
x_images_summary = tf.py_func(cv_inv_proc, [x_img], tf.float32) #(1, 256, 256, 3) float32
y_fake_cv2inv_images_summary = tf.py_func(cv_inv_proc, [fake_y], tf.float32) #(1, 256, 256, 3) float32
x_label_summary = tf.py_func(label_proc, [x_label], tf.float32) #(1, 256, 256, 3) float32
x_gen_label_summary = tf.py_func(label_inv_proc, [fake_x_], tf.float32) #(1, 256, 256, 3) float32
image_summary = tf.summary.image('images', tf.concat(axis=2, values=[x_images_summary, y_fake_cv2inv_images_summary, x_label_summary, x_gen_label_summary]), max_outputs=3)
summary_writer = tf.summary.FileWriter(args.snapshot_dir, graph=tf.get_default_graph())
g_vars = [v for v in tf.trainable_variables() if 'generator' in v.name]
d_vars = [v for v in tf.trainable_variables() if 'discriminator' in v.name]
lr = tf.placeholder(tf.float32, None, name='learning_rate')
d_optim = tf.train.AdamOptimizer(lr, beta1=args.beta1)
g_optim = tf.train.AdamOptimizer(lr, beta1=args.beta1)
d_grads_and_vars = d_optim.compute_gradients(dis_loss, var_list=d_vars)
d_train = d_optim.apply_gradients(d_grads_and_vars) # update weights of D and D'
g_grads_and_vars = g_optim.compute_gradients(final_loss, var_list=g_vars)
g_train = g_optim.apply_gradients(g_grads_and_vars) # update weights of G, G', S and S'
train_op = tf.group(d_train, g_train)
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
init = tf.global_variables_initializer()
sess.run(init)
saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=50)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord, sess=sess)
counter = 0 # training step
for epoch in range(args.epoch):
shuffle(x_datalists) # change the order of x images
shuffle(y_datalists) # change the order of y images
lrate = args.base_lr if epoch < args.epoch_step else args.base_lr*(args.epoch-epoch)/(args.epoch-args.epoch_step)
for step in range(len(x_datalists)):
counter += 1
x_image_resize, x_label_resize, y_image_resize, y_label_resize = TrainImageReader(args.x_image_forpath, args.x_label_forpath, args.y_image_forpath, args.y_label_forpath, x_datalists, y_datalists, step, args.image_size)
batch_x_image = np.expand_dims(np.array(x_image_resize).astype(np.float32), axis = 0)
batch_x_label = np.expand_dims(np.array(x_label_resize).astype(np.float32), axis = 0)
batch_y_image = np.expand_dims(np.array(y_image_resize).astype(np.float32), axis = 0)
batch_y_label = np.expand_dims(np.array(y_label_resize).astype(np.float32), axis = 0)
start_time = time.time()
feed_dict = { lr : lrate, x_img : batch_x_image, x_label : batch_x_label, y_img : batch_y_image, y_label : batch_y_label}
if counter % args.save_pred_every == 0:
final_loss_value, dis_loss_value, _ = sess.run([final_loss, dis_loss, train_op], feed_dict=feed_dict)
save(saver, sess, args.snapshot_dir, counter)
elif counter % args.summary_pred_every == 0:
final_loss_value, dis_loss_value, final_loss_sum_value, discriminator_sum_value, image_summary_value, _ = \
sess.run([final_loss, dis_loss, final_loss_sum, discriminator_sum, image_summary, train_op], feed_dict=feed_dict)
summary_writer.add_summary(final_loss_sum_value, counter)
summary_writer.add_summary(discriminator_sum_value, counter)
summary_writer.add_summary(image_summary_value, counter)
else:
final_loss_value, dis_loss_value, _ = \
sess.run([final_loss, dis_loss, train_op], feed_dict=feed_dict)
print('epoch {:d} step {:d} \t final_loss = {:.3f}, dis_loss = {:.3f}'.format(epoch, step, final_loss_value, dis_loss_value))
coord.request_stop()
coord.join(threads)
if __name__ == '__main__':
main()
Use tf.random.set_seed() instead of tf.set_random_seed. Link to the tensorflow doc here: https://www.tensorflow.org/api_docs/python/tf/random/set_seed?version=stable

Machine Learning reward artificially capping

So when I run this, it works perfectly, however, for some reason the reward caps at 200. I'm not sure what could be causing this. I'm new to machine learning and this is my first project, so sorry if I am missing something stupid.I hypothesize that done is triggering before I want it too, but playing with that hasn't led to anything. Thanks so much.
import gym
import tensorflow as tf
import numpy as np
import os
import sys
env = gym.make('CartPole-v0')
discount_rate=.95
# TODO Build the policy gradient neural network
class Agent:
def __init__(self, num_actions, state_size):
initializer = tf.contrib.layers.xavier_initializer()
self.input_layer = tf.placeholder(dtype=tf.float32, shape=[None, state_size])
# Neural net starts here
hidden_layer = tf.layers.dense(self.input_layer, 8, activation=tf.nn.relu, kernel_initializer=initializer)
hidden_layer_2 = tf.layers.dense(hidden_layer, 8, activation=tf.nn.relu, kernel_initializer=initializer)
# Output of neural net
out = tf.layers.dense(hidden_layer_2, num_actions, activation=None)
self.outputs = tf.nn.softmax(out)
self.choice = tf.argmax(self.outputs, axis=1)
# Training Procedure
self.rewards = tf.placeholder(shape=[None, ], dtype=tf.float32)
self.actions = tf.placeholder(shape=[None, ], dtype=tf.int32)
one_hot_actions = tf.one_hot(self.actions, num_actions)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=out, labels=one_hot_actions)
self.loss = tf.reduce_mean(cross_entropy * self.rewards)
self.gradients = tf.gradients(self.loss, tf.trainable_variables())
# Create a placeholder list for gradients
self.gradients_to_apply = []
for index, variable in enumerate(tf.trainable_variables()):
gradient_placeholder = tf.placeholder(tf.float32)
self.gradients_to_apply.append(gradient_placeholder)
# Create the operation to update gradients with the gradients placeholder.
optimizer = tf.train.AdamOptimizer(learning_rate=1e-2)
self.update_gradients =
optimizer.apply_gradients(zip(self.gradients_to_apply, tf.trainable_variables()))
def discount_normalize_rewards(rewards):
discounted_rewards = np.zeros_like(rewards)
total_rewards = 0
for i in reversed(range(len(rewards))):
total_rewards = total_rewards * discount_rate + rewards[i]
discounted_rewards[i] = total_rewards
discounted_rewards -= np.mean(discounted_rewards)
discounted_rewards /= np.std(discounted_rewards)
return discounted_rewards
#initialize the training loop
tf.reset_default_graph()
# Modify these to match shape of actions and states in your environment
num_actions = 2
state_size = 4
path = "./cartpole-pg/"
training_episodes = 1000
max_steps_per_episode = 20000
episode_batch_size = 5
agent = Agent(num_actions, state_size)
init = tf.global_variables_initializer()
saver = tf.train.Saver(max_to_keep=2)
if not os.path.exists(path):
os.makedirs(path)
with tf.Session() as sess:
sess.run(init)
total_episode_rewards = []
# Create a buffer of 0'd gradients
gradient_buffer = sess.run(tf.trainable_variables())
for index, gradient in enumerate(gradient_buffer):
gradient_buffer[index] = gradient * 0
for episode in range(training_episodes):
state = env.reset()
episode_history = []
episode_rewards = 0
for step in range(max_steps_per_episode):
if episode % 100 == 0:
env.render()
# Get weights for each action
action_probabilities = sess.run(agent.outputs, feed_dict={agent.input_layer: [state]})
action_choice = np.random.choice(range(num_actions), p=action_probabilities[0])
state_next, reward, done, _ = env.step(action_choice)
episode_history.append([state, action_choice, reward, state_next])
state = state_next
episode_rewards += reward
if done:
total_episode_rewards.append(episode_rewards)
episode_history = np.array(episode_history)
episode_history[:,2] = discount_normalize_rewards(episode_history[:,2])
ep_gradients = sess.run(agent.gradients, feed_dict={agent.input_layer: np.vstack(episode_history[:, 0]),
agent.actions: episode_history[:, 1],
agent.rewards: episode_history[:, 2]})
# add the gradients to the grad buffer:
for index, gradient in enumerate(ep_gradients):
gradient_buffer[index] += gradient
break
if episode % episode_batch_size == 0:
feed_dict_gradients = dict(zip(agent.gradients_to_apply, gradient_buffer))
sess.run(agent.update_gradients, feed_dict=feed_dict_gradients)
for index, gradient in enumerate(gradient_buffer):
gradient_buffer[index] = gradient * 0
if episode % 1 == 0:
saver.save(sess, path + "pg-checkpoint", episode)
print("Reward: " + str(total_episode_rewards[-1:]))
env.close()
Episodes for Cartpole terminate when the pole falls and at 200 successful steps. See the max_episode_steps in the linked file if you want to change this. The reason there is a 200 step max is to make evaluating trials easier (ie you always get episode ends so you can evaluate episode stats) and so that the environment doesn't get stuck in a never ending trial.
register(
id='CartPole-v0',
entry_point='gym.envs.classic_control:CartPoleEnv',
max_episode_steps=200,
reward_threshold=195.0,)

Categories

Resources