I install the Faste-RCNN script from here, this version is modified to fit python3. After following all the instructions I still got an error as below:
Loading initial model weights from ./data/imagenet_weights/vgg16.ckpt
Unable to open table file .\data\imagenet_weights\vgg16.ckpt: Data loss: not an sstable (bad magic number): perhaps your file is in a different file format and you need to use a different restore operator?
2022-02-25 09:30:16.917233: W C:\tf_jenkins\workspace\rel-win\M\windows-gpu\PY\36\tensorflow\core\util\tensor_slice_reader.cc:95] Could not open .\data\imagenet_weights\vgg16.ckpt: Data loss: not an sstable (bad magic number): perhaps your file is in a different file format and you need to use a different restore operator? Traceback (most recent call last):
File "D:/Object detection/FRCNNPY3.6/train.py", line 218, in <module>
train.train()
File "D:/Object detection/FRCNNPY3.6/train.py", line 123, in train
variables_to_restore = self.net.get_variables_to_restore(variables, var_keep_dic)
File "D:\Object detection\FRCNNPY3.6\lib\nets\vgg16.py", line 66, in get_variables_to_restore
if v.name.split(':')[0] in var_keep_dic:
TypeError: argument of type 'NoneType' is not iterable
I was trying to solve the " 'NoneType' is not iterable " error, but still no progress.
Of course, the file be stored as vgg16.ckpt type and the path is D:\Object detection\FRCNNPY3.6\data\imagenet_weights.
From the error message I find out this line "Loading initial model weights from ./data/imagenet_weights/vgg16.ckpt" shows the path where it loading, the direction of slash is wrong for windows, this form is used in Linux, but I don't know where can I modify it.
The complete program as below:
import time
import numpy as np
import tensorflow as tf
from tensorflow.python import pywrap_tensorflow
import lib.config.config as cfg
from lib.datasets import roidb as rdl_roidb
from lib.datasets.factory import get_imdb
from lib.datasets.imdb import imdb as imdb2
from lib.layer_utils.roi_data_layer import RoIDataLayer
from lib.nets.vgg16 import vgg16
from lib.utils.timer import Timer
try:
import cPickle as pickle
except ImportError:
import pickle
import os
def get_training_roidb(imdb):
"""Returns a roidb (Region of Interest database) for use in training."""
if True:
print('Appending horizontally-flipped training examples...')
imdb.append_flipped_images()
print('done')
print('Preparing training data...')
rdl_roidb.prepare_roidb(imdb)
print('done')
return imdb.roidb
def combined_roidb(imdb_names):
"""
Combine multiple roidbs
"""
def get_roidb(imdb_name):
imdb = get_imdb(imdb_name)
print('Loaded dataset `{:s}` for training'.format(imdb.name))
imdb.set_proposal_method("gt")
print('Set proposal method: {:s}'.format("gt"))
roidb = get_training_roidb(imdb)
return roidb
roidbs = [get_roidb(s) for s in imdb_names.split('+')]
roidb = roidbs[0]
if len(roidbs) > 1:
for r in roidbs[1:]:
roidb.extend(r)
tmp = get_imdb(imdb_names.split('+')[1])
imdb = imdb2(imdb_names, tmp.classes)
else:
imdb = get_imdb(imdb_names)
return imdb, roidb
class Train:
def __init__(self):
# Create network
if cfg.FLAGS.network == 'vgg16':
self.net = vgg16(batch_size=cfg.FLAGS.ims_per_batch)
else:
raise NotImplementedError
self.imdb, self.roidb = combined_roidb("voc_2007_trainval")
self.data_layer = RoIDataLayer(self.roidb, self.imdb.num_classes)
self.output_dir = cfg.get_output_dir(self.imdb, 'default')
def train(self):
# Create session
tfconfig = tf.ConfigProto(allow_soft_placement=True)
tfconfig.gpu_options.allow_growth = True
sess = tf.Session(config=tfconfig)
with sess.graph.as_default():
tf.set_random_seed(cfg.FLAGS.rng_seed)
layers = self.net.create_architecture(sess, "TRAIN", self.imdb.num_classes, tag='default')
loss = layers['total_loss']
lr = tf.Variable(cfg.FLAGS.learning_rate, trainable=False)
momentum = cfg.FLAGS.momentum
optimizer = tf.train.MomentumOptimizer(lr, momentum)
gvs = optimizer.compute_gradients(loss)
# Double bias
# Double the gradient of the bias if set
if cfg.FLAGS.double_bias:
final_gvs = []
with tf.variable_scope('Gradient_Mult'):
for grad, var in gvs:
scale = 1.
if cfg.FLAGS.double_bias and '/biases:' in var.name:
scale *= 2.
if not np.allclose(scale, 1.0):
grad = tf.multiply(grad, scale)
final_gvs.append((grad, var))
train_op = optimizer.apply_gradients(final_gvs)
else:
train_op = optimizer.apply_gradients(gvs)
# We will handle the snapshots ourselves
self.saver = tf.train.Saver(max_to_keep=100000)
# Write the train and validation information to tensorboard
# writer = tf.summary.FileWriter(self.tbdir, sess.graph)
# valwriter = tf.summary.FileWriter(self.tbvaldir)
# Load weights
# Fresh train directly from ImageNet weights
print('Loading initial model weights from {:s}'.format(cfg.FLAGS.pretrained_model))
variables = tf.global_variables()
# Initialize all variables first
sess.run(tf.variables_initializer(variables, name='init'))
var_keep_dic = self.get_variables_in_checkpoint_file(cfg.FLAGS.pretrained_model)
# Get the variables to restore, ignorizing the variables to fix
variables_to_restore = self.net.get_variables_to_restore(variables, var_keep_dic)
restorer = tf.train.Saver(variables_to_restore)
restorer.restore(sess, cfg.FLAGS.pretrained_model)
print('Loaded.')
# Need to fix the variables before loading, so that the RGB weights are changed to BGR
# For VGG16 it also changes the convolutional weights fc6 and fc7 to
# fully connected weights
self.net.fix_variables(sess, cfg.FLAGS.pretrained_model)
print('Fixed.')
sess.run(tf.assign(lr, cfg.FLAGS.learning_rate))
last_snapshot_iter = 0
timer = Timer()
iter = last_snapshot_iter + 1
last_summary_time = time.time()
while iter < cfg.FLAGS.max_iters + 1:
# Learning rate
if iter == cfg.FLAGS.step_size + 1:
# Add snapshot here before reducing the learning rate
# self.snapshot(sess, iter)
sess.run(tf.assign(lr, cfg.FLAGS.learning_rate * cfg.FLAGS.gamma))
timer.tic()
# Get training data, one batch at a time
blobs = self.data_layer.forward()
# Compute the graph without summary
try:
rpn_loss_cls, rpn_loss_box, loss_cls, loss_box, total_loss = self.net.train_step(sess, blobs, train_op)
except Exception:
# if some errors were encountered image is skipped without increasing iterations
print('image invalid, skipping')
continue
timer.toc()
iter += 1
# Display training information
if iter % (cfg.FLAGS.display) == 0:
print('iter: %d / %d, total loss: %.6f\n >>> rpn_loss_cls: %.6f\n '
'>>> rpn_loss_box: %.6f\n >>> loss_cls: %.6f\n >>> loss_box: %.6f\n ' % \
(iter, cfg.FLAGS.max_iters, total_loss, rpn_loss_cls, rpn_loss_box, loss_cls, loss_box))
print('speed: {:.3f}s / iter'.format(timer.average_time))
if iter % cfg.FLAGS.snapshot_iterations == 0:
self.snapshot(sess, iter )
def get_variables_in_checkpoint_file(self, file_name):
try:
reader = pywrap_tensorflow.NewCheckpointReader(file_name)
var_to_shape_map = reader.get_variable_to_shape_map()
return var_to_shape_map
except Exception as e: # pylint: disable=broad-except
print(str(e))
if "corrupted compressed block contents" in str(e):
print("It's likely that your checkpoint file has been compressed "
"with SNAPPY.")
def snapshot(self, sess, iter):
net = self.net
if not os.path.exists(self.output_dir):
os.makedirs(self.output_dir)
# Store the model snapshot
filename = 'vgg16_faster_rcnn_iter_{:d}'.format(iter) + '.ckpt'
# filename = os.path.join(self.output_dir, filename)
filename = os.path.join(self.output_dir, filename)
self.saver.save(sess, filename)
print('Wrote snapshot to: {:s}'.format(filename))
# Also store some meta information, random state, etc.
nfilename = 'vgg16_faster_rcnn_iter_{:d}'.format(iter) + '.pkl'
nfilename = os.path.join(self.output_dir, nfilename)
# current state of numpy random
st0 = np.random.get_state()
# current position in the database
cur = self.data_layer._cur
# current shuffled indeces of the database
perm = self.data_layer._perm
# Dump the meta info
with open(nfilename, 'wb') as fid:
pickle.dump(st0, fid, pickle.HIGHEST_PROTOCOL)
pickle.dump(cur, fid, pickle.HIGHEST_PROTOCOL)
pickle.dump(perm, fid, pickle.HIGHEST_PROTOCOL)
pickle.dump(iter, fid, pickle.HIGHEST_PROTOCOL)
return filename, nfilename
if __name__ == '__main__':
train = Train()
train.train()
Related
I am trying to get an actor critic variant of the pendulum running, but I seem to be running into a particular problem.
RuntimeError: Found dtype Double but expected Float
I saw this had come up multiple times before so I have been through those and have attempted to change the data types of my loss (kept in comments) but it is still not working. Could anyone point out how to resolve this so that I can learn from it?
Full code below
import gym, os
import numpy as np
from itertools import count
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.distributions import Normal
from collections import namedtuple
SavedAction = namedtuple('SavedAction', ['log_prob', 'value'])
LOG_SIG_MAX = 2
LOG_SIG_MIN = -20
class ActorCritic(nn.Module):
"""
Implementing both heads of the actor critic model
"""
def __init__(self, state_space, action_space):
super(ActorCritic, self).__init__()
self.state_space = state_space
self.action_space = action_space
# HL 1
self.linear1 = nn.Linear(self.state_space, 128)
# HL 2
self.linear2 = nn.Linear(128, 256)
# Outputs
self.critic_head = nn.Linear(256, 1)
self.action_mean = nn.Linear(256, self.action_space)
self.action_std = nn.Linear(256, self.action_space)
# Saving
self.saved_actions = []
self.rewards = []
# Optimizer
self.optimizer = optim.Adam(self.parameters(), lr = 1e-3)
self.eps = np.finfo(np.float32).eps.item()
def forward(self, state):
"""
Forward pass for both actor and critic
"""
# State to Layer 1
l1_output = F.relu(self.linear1(state))
# Layer 1 to Layer 2
l2_output = F.relu(self.linear2(l1_output))
# Layer 2 to Action
mean = self.action_mean(l2_output)
std = self.action_std(l2_output)
std = torch.clamp(std, min=LOG_SIG_MIN, max = LOG_SIG_MAX)
std = std.exp()
# Layer 2 to Value
value_est = self.critic_head(l2_output)
return value_est, mean, std
def select_action(self,state):
state = torch.from_numpy(state).float().unsqueeze(0)
value_est, mean, std = self.forward(state)
value_est = value_est.reshape(-1)
# Make prob Normal dist
dist = Normal(mean, std)
action = dist.sample()
action = torch.tanh(action)
ln_prob = dist.log_prob(action)
ln_prob = ln_prob.sum()
self.saved_actions.append(SavedAction(ln_prob, value_est))
action = action.numpy()
return action[0]
def compute_returns(self, gamma): # This is the error causing code
"""
Calculate losses and do backprop
"""
R = 0
saved_actions = self.saved_actions
policy_losses = []
value_losses = []
returns = []
for r in self.rewards[::-1]:
# Discount value
R = r + gamma*R
returns.insert(0,R)
returns = torch.tensor(returns)
returns = (returns - returns.mean())/(returns.std()+self.eps)
for (log_prob, value), R in zip(saved_actions, returns):
advantage = R - value.item()
advantage = advantage.type(torch.FloatTensor)
policy_losses.append(-log_prob*advantage)
value_losses.append(F.mse_loss(value, torch.tensor([R])))
self.optimizer.zero_grad()
loss = torch.stack(policy_losses).sum() + torch.stack(value_losses).sum()
loss = loss.type(torch.FloatTensor)
loss.backward()
self.optimizer.step()
del self.rewards[:]
del self.saved_actions[:]
env = gym.make("Pendulum-v0")
state_space = env.observation_space.shape[0]
action_space = env.action_space.shape[0]
# Train Expert AC
model = ActorCritic(state_space, action_space)
train = True
if train == True:
# Main loop
window = 50
reward_history = []
for ep in count():
state = env.reset()
ep_reward = 0
for t in range(1,1000):
if ep%50 == 0:
env.render()
action = model.select_action(state)
state, reward, done, _ = env.step(action)
model.rewards.append(reward)
ep_reward += reward
if done:
break
print(reward)
model.compute_returns(0.99) # Error begins here
reward_history.append(ep_reward)
# Result information
if ep % 50 == 0:
mean = np.mean(reward_history[-window:])
print(f"Episode: {ep} Last Reward: {ep_reward} Rolling Mean: {mean}")
if np.mean(reward_history[-100:])>199:
print(f"Environment solved at episode {ep}, average run length > 200")
break
Complete error log, some elements redacted for privacy. Originally the loop actor critic and main loop were in separate files. Comments added to appropriate error causing lines.
Traceback (most recent call last):
File "pendulum.py", line 59, in <module>
model.compute_returns(0.99)
File "/home/x/Software/git/x/x/solvers/actorcritic_cont.py", line 121, in compute_returns
loss.backward()
File "/home/x/.local/lib/python3.8/site-packages/torch/_tensor.py", line 255, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
File "/home/x/.local/lib/python3.8/site-packages/torch/autograd/__init__.py", line 147, in backward
Variable._execution_engine.run_backward(
RuntimeError: Found dtype Double but expected Float
Answering here in case anyone has similar issues in the future.
The output of the reward in OpenAI Gym Pendulum-v0 is a double, so when you compute the return over the episode you need to change that to a float tensor.
I did this just by:
returns = torch.tensor(returns)
returns = (returns - returns.mean())/(returns.std()+self.eps)
returns = returns.type(torch.FloatTensor)
This is how the dataset was created
def createDataset1(
outputPath,
imagePathList,
labelList,
lexiconList=None,
validset_percent=10,
testset_percent=0,
random_seed=1111,
checkValid=True,
):
"""
Create LMDB dataset for CRNN training.
ARGS:
outputPath : LMDB output path
imagePathList : list of image path
labelList : list of corresponding groundtruth texts
lexiconList : (optional) list of lexicon lists
checkValid : if true, check the validity of every image
"""
train_path = os.path.join(outputPath, "training", "9M")
valid_path = os.path.join(outputPath, "validation", "9M")
# CAUTION: if train_path (lmdb) already exists, this function add dataset
# into it. so remove former one and re-create lmdb.
if os.path.exists(train_path):
os.system(f"rm -r {train_path}")
if os.path.exists(valid_path):
os.system(f"rm -r {valid_path}")
os.makedirs(train_path, exist_ok=True)
os.makedirs(valid_path, exist_ok=True)
gt_train_path = gt_file.replace(".txt", "_train.txt")
gt_valid_path = gt_file.replace(".txt", "_valid.txt")
data_log = open(gt_train_path, "w", encoding="utf-8")
if testset_percent != 0:
test_path = os.path.join(outputPath, "evaluation", dataset_name)
if os.path.exists(test_path):
os.system(f"rm -r {test_path}")
os.makedirs(test_path, exist_ok=True)
gt_test_path = gtFile.replace(".txt", "_test.txt")
assert(len(imagePathList) == len(labelList))
nSamples = len(imagePathList)
num_valid_dataset = int(nSamples * validset_percent / 100.0)
num_test_dataset = int(nSamples * testset_percent / 100.0)
num_train_dataset = nSamples - num_valid_dataset - num_test_dataset
print("validation datasets: ",num_valid_dataset,"\n", "test datasets: ", num_test_dataset, " \n training datasets: ", num_train_dataset)
env = lmdb.open(outputPath, map_size=1099511627776)
cache = {}
cnt = 1
random.seed(random_seed)
random.shuffle(imagePathList)
for i in tqdm(range(nSamples)):
data_log.write(imagePathList[i])
imagePath = imagePathList[i]
label = labelList[i]
if len(label) == 0:
continue
if not os.path.exists(imagePath):
print('%s does not exist' % imagePath)
continue
with open(imagePath, 'rb') as f:
imageBin = f.read()
if checkValid:
if not checkImageIsValid(imageBin):
print('%s is not a valid image' % imagePath)
continue
embed_vec = fasttext_model[label]
imageKey = 'image-%09d' % cnt
labelKey = 'label-%09d' % cnt
embedKey = 'embed-%09d' % cnt
cache[imageKey] = imageBin
cache[labelKey] = label.encode()
cache[embedKey] = ' '.join(str(v) for v in embed_vec.tolist()).encode()
if lexiconList:
lexiconKey = 'lexicon-%09d' % cnt
cache[lexiconKey] = ' '.join(lexiconList[i])
if cnt % 1000 == 0:
writeCache(env, cache)
cache = {}
print('Written %d / %d' % (cnt, nSamples))
#finish train dataset and start validation dataset
if i + 1 == num_train_dataset:
print(f"# Train dataset: {num_train_dataset} is finished")
cache["num-samples".encode()] = str(num_train_dataset).encode()
writeCache(env, cache)
data_log.close()
#start validation set
env = lmdb.open(valid_path, map_size=30 * 2 ** 30)
cache = {}
cnt = 0
data_log = open(gt_valid_path, "w", encoding="utf-8")
# Finish train/valid dataset and Start test dataset
if (i + 1 == num_train_dataset + num_valid_dataset) and num_test_dataset != 0:
print(f"# Valid dataset: {num_valid_dataset} is finished")
cache["num-samples".encode()] = str(num_valid_dataset).encode()
writeCache(env, cache)
data_log.close()
# start test set
env = lmdb.open(test_path, map_size=30 * 2 ** 30)
cache = {}
cnt = 0 # not 1 at this time
data_log = open(gt_test_path, "w", encoding="utf-8")
cnt += 1
if testset_percent == 0:
cache["num-samples".encode()] = str(num_valid_dataset).encode()
writeCache(env, cache)
print(f"# Valid datast: {num_valid_dataset} is finished")
else:
cache["num-samples".encode()] = str(num_test_dataset).encode()
writeCache(env, cache)
print(f"# Test datast: {num_test_dataset} is finished")
This is how i am trying to retrieve the data
class LmdbDataset(data.Dataset):
def __init__(self, root, voc_type, max_len, num_samples, transform=None):
super(LmdbDataset, self).__init__()
if global_args.run_on_remote:
dataset_name = os.path.basename(root)
data_cache_url = "/cache/%s" % dataset_name
if not os.path.exists(data_cache_url):
os.makedirs(data_cache_url)
if mox.file.exists(root):
mox.file.copy_parallel(root, data_cache_url)
else:
raise ValueError("%s not exists!" % root)
self.env = lmdb.open(data_cache_url, max_readers=32, readonly=True)
else:
self.env = lmdb.open(root, max_readers=32, readonly=True)
assert self.env is not None, "cannot create lmdb from %s" % root
self.txn = self.env.begin()
self.voc_type = voc_type
self.transform = transform
self.max_len = max_len
# nums = b"num-samples"
# print('NUM SAMPLES ------ \n',nums)
nSamples = self.txn.get('num-samples'.encode())
print("STRING nSamples :", nSamples)
self.nSamples = int(self.txn.get(b"num-samples"))
self.nSamples = min(self.nSamples, num_samples)
assert voc_type in ['LOWERCASE', 'ALLCASES', 'ALLCASES_SYMBOLS']
self.EOS = 'EOS'
self.PADDING = 'PADDING'
self.UNKNOWN = 'UNKNOWN'
self.voc = get_vocabulary(voc_type, EOS=self.EOS, PADDING=self.PADDING, UNKNOWN=self.UNKNOWN)
self.char2id = dict(zip(self.voc, range(len(self.voc))))
self.id2char = dict(zip(range(len(self.voc)), self.voc))
self.rec_num_classes = len(self.voc)
self.lowercase = (voc_type == 'LOWERCASE')
I am getting the error below whenever the code tries to call elf.txn.get(b"num-samples")
Traceback (most recent call last):
File "main.py", line 268, in <module>
main(args)
File "main.py", line 157, in main
train_dataset, train_loader = get_data_lmdb(args.synthetic_train_data_dir, args.voc_type, args.max_len, args.num_train,
File "main.py", line 66, in get_data_lmdb
dataset_list.append(LmdbDataset(data_dir_, voc_type, max_len, num_samples))
File "/Users/SEED/lib/datasets/dataset.py", line 189, in __init__
self.nSamples = int(self.txn.get(b"num-samples"))
TypeError: int() argument must be a string, a bytes-like object or a number, not 'NoneType'
I have tried many different suggestions online and some stackoverflow threads but could not figure out what is wrong.
What is causing this error and how can I fix this?
Your code is dense, convoluted and hard to follow the flow of data. It mixes lots of computation with IO, side effects, and even system calls (os.system(f"rm -r {test_path}"), you should use shutil.rmtree instead for example).
Try breaking up each action that you wish to perform so that it primarily does one specific thing:
logical operations but no side effects
input (read from file or network)
output (write to file, generate a result)
file system operations/cleanup
At each stage, you should perform verifications, and use the rule of least power. If you expect self.txn to always have 'num-samples, then you should use self.txn[b'num-samples']rather.get, which defaults to None`. This makes it easier to catch errors earlier in the chain.
Also I have no idea what the lmbd module is. Is that a library, or another file in your codebase? You should link to any libraries you are using if they aren't well known. I see several python packages pertaining to lmbd.
From the code example, I can dissect for you. May be it will help you to close the issue.
The log says..
self.nSamples = int(self.txn.get(b"num-samples"))
TypeError: int() argument must be a string, a bytes-like object or a number, not 'NoneType'
self.txn should be a dictionary to use get method there. As you are trying to get it, there can be two scenarios
num-samples can be '' in the data means there is no value
there is no variable called num-samples in the data.
Both of them can cause a NoneType which cannot be handled by type conversion(to int). So, you have to check whether that field is present in the data or not, by using self.txn.keys() and see if that key is present
Furthermore, if
self.nSamples = int(self.txn.get(b"num-samples"))
is failing and not the above statement
nSamples = self.txn.get('num-samples'.encode())
then you can simply decode the variable and use it there
like
Samples = nSamples.decode("utf-8")
try:
int_samples = int(Samples)
except TypeError:
print(Samples)
The following training curve is generated using the same Tensorflow + Keras script written in Python:
RED line uses five features.
GREEN line uses seven features.
BLUE line uses nine features.
Can anyone tell me the probable cause of the oscillation of the GREEN line so that I can troubleshoot my script?
Source code:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
#os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0" # Use both gpus for training.
import sys, random
import time
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import ModelCheckpoint
import numpy as np
from lxml import etree, objectify
# <editor-fold desc="GPU">
# resolve GPU related issues.
try:
physical_devices = tf.config.list_physical_devices('GPU')
for gpu_instance in physical_devices:
tf.config.experimental.set_memory_growth(gpu_instance, True)
except Exception as e:
pass
# END of try
# </editor-fold>
# <editor-fold desc="Lxml helper">
class LxmlHelper:
#classmethod
def objectify_xml(cls, input_path_dir):
file_dom = etree.parse(input_path_dir) # parse xml and convert it into DOM
file_xml_bin = etree.tostring(file_dom, pretty_print=False, encoding="ascii") # encode DOM into ASCII object
file_xml_text = file_xml_bin.decode() # convert binary ASCII object into ASCII text
objectified_xml = objectify.fromstring(file_xml_text) # convert text into a Doxygen object
return objectified_xml
# </editor-fold>
# <editor-fold desc="def encode(letter)">
def encode(letter: str):
if letter == 'H':
return [1.0, 0.0, 0.0]
elif letter == 'E':
return [0.0, 1.0, 0.0]
elif letter == 'C':
return [0.0, 0.0, 1.0]
elif letter == '-':
return [0.0, 0.0, 0.0]
# END of function
def encode_string_1(pattern_str: str):
# Iterate over the string
one_hot_binary_str = []
for ch in pattern_str:
try:
one_hot_binary_str = one_hot_binary_str + encode(ch)
except Exception as e:
print(pattern_str, one_hot_binary_str, ch)
# END of for loop
return one_hot_binary_str
# END of function
def encode_string_2(pattern_str: str):
# Iterate over the string
one_hot_binary_str = []
for ch in pattern_str:
temp_encoded_vect = [encode(ch)]
one_hot_binary_str = one_hot_binary_str + temp_encoded_vect
# END of for loop
return one_hot_binary_str
# END of function
# </editor-fold>
# <editor-fold desc="def load_data()">
def load_data_k(fname: str, class_index: int, feature_start_index: int, **selection):
"""Loads data for training and validation
:param fname: (``string``) - name of the file with the data
:param selection: (``kwargs``) - see below
:return: four tensorflow tensors: training input, training output, validation input and validation output
:Keyword Arguments:
* *top_n_lines* (``number``) --
take top N lines of the input and disregard the rest
* *random_n_lines* (``number``) --
take random N lines of the input and disregard the rest
* *validation_part* (``float``) --
separate N_lines * given_fraction of the input lines from the training set and use
them for validation. When the given_fraction = 1.0, then the same input set of
N_lines is used both for training and validation (this is the default)
"""
i = 0
file = open(fname)
if "top_n_lines" in selection:
lines = [next(file) for _ in range(int(selection["top_n_lines"]))]
elif "random_n_lines" in selection:
tmp_lines = file.readlines()
lines = random.sample(tmp_lines, int(selection["random_n_lines"]))
else:
lines = file.readlines()
data_x, data_y, data_z = [], [], []
for l in lines:
row = l.strip().split() # return a list of words from the line.
x = [float(ix) for ix in row[feature_start_index:]] # convert 3rd to 20th word into a vector of float numbers.
y = encode(row[class_index]) # convert the 3rd word into binary.
z = encode_string_1(row[class_index+1])
data_x.append(x) # append the vector into 'data_x'
data_y.append(y) # append the vector into 'data_y'
data_z.append(z) # append the vector into 'data_z'
# END for l in lines
num_rows = len(data_x)
given_fraction = selection.get("validation_part", 1.0)
if given_fraction > 0.9999:
valid_x, valid_y, valid_z = data_x, data_y, data_z
else:
n = int(num_rows * given_fraction)
data_x, data_y, data_z = data_x[n:], data_y[n:], data_z[n:]
valid_x, valid_y, valid_z = data_x[:n], data_y[:n], data_z[:n]
# END of if-else block
tx = tf.convert_to_tensor(data_x, np.float32)
ty = tf.convert_to_tensor(data_y, np.float32)
tz = tf.convert_to_tensor(data_z, np.float32)
vx = tf.convert_to_tensor(valid_x, np.float32)
vy = tf.convert_to_tensor(valid_y, np.float32)
vz = tf.convert_to_tensor(valid_z, np.float32)
return tx, ty, tz, vx, vy, vz
# END of the function
# </editor-fold>
# <editor-fold desc="def create_model()">
def create_model(n_hidden_1, n_hidden_2, num_classes, num_features):
# create the model
model = Sequential()
model.add(tf.keras.layers.InputLayer(input_shape=(num_features,)))
model.add(tf.keras.layers.Dense(n_hidden_1, activation='sigmoid'))
model.add(tf.keras.layers.Dense(n_hidden_2, activation='sigmoid'))
###model.add(tf.keras.layers.Dense(n_hidden_3, activation='sigmoid'))
model.add(tf.keras.layers.Dense(num_classes, activation='softmax'))
# instantiate the optimizer
opt = keras.optimizers.SGD(learning_rate=LEARNING_RATE)
# compile the model
model.compile(
optimizer=opt,
loss="categorical_crossentropy",
metrics="categorical_accuracy"
)
# return model
return model
# </editor-fold>
if __name__ == "__main__":
# <editor-fold desc="(input/output parameters)">
my_project_routine = LxmlHelper.objectify_xml("my_project_evaluate.xml")
# input data
INPUT_DATA_FILE = str(my_project_routine.input.input_data_file)
INPUT_PATH = str(my_project_routine.input.input_path)
CLASS_INDEX = int(my_project_routine.input.class_index)
FEATURE_INDEX = int(my_project_routine.input.feature_index)
# output data
OUTPUT_PATH = str(my_project_routine.output.output_path)
MODEL_FILE = str(my_project_routine.output.model_file)
TRAINING_PROGRESS_FILE = str(my_project_routine.output.training_progress_file)
# Learning parameters
LEARNING_RATE = float(my_project_routine.training_params.learning_rate)
EPOCH_SIZE = int(my_project_routine.training_params.epoch_size)
BATCH_SIZE = int(my_project_routine.training_params.batch_size)
INPUT_LINES_COUNT = int(my_project_routine.input.input_lines_count)
VALIDATION_PART = float(my_project_routine.training_params.validation_part)
SAVE_PERIOD = str(my_project_routine.output.save_period)
# NN parameters
HIDDEN_LAYER_1_NEURON_COUNT = int(my_project_routine.hidden_layers.one)
HIDDEN_LAYER_2_NEURON_COUNT = int(my_project_routine.hidden_layers.two)
###HIDDEN_LAYER_3_NEURON_COUNT = int(my_project_routine.hidden_layers.three)
CLASS_COUNT = int(my_project_routine.class_count)
FEATURES_COUNT = int(my_project_routine.features_count)
input_file_path_str = os.path.join(INPUT_PATH, INPUT_DATA_FILE)
training_progress_file_path_str = os.path.join(OUTPUT_PATH, TRAINING_PROGRESS_FILE)
model_file_path = os.path.join(OUTPUT_PATH, MODEL_FILE)
# command-line arg processing
input_file_name_str = None
if len(sys.argv) > 1:
input_file_name_str = sys.argv[1]
else:
input_file_name_str = input_file_path_str
# END of if-else
# </editor-fold>
# <editor-fold desc="(load data from file)">
# load training data from the disk
train_x, train_y, _, validate_x, validate_y, _ = \
load_data_k(
fname=input_file_name_str,
class_index=CLASS_INDEX,
feature_start_index=FEATURE_INDEX,
random_n_lines=INPUT_LINES_COUNT,
validation_part=VALIDATION_PART
)
print("training data size : ", len(train_x))
print("validation data size : ", len(validate_x))
# </editor-fold>
### STEPS_PER_EPOCH = len(train_x) // BATCH_SIZE
### VALIDATION_STEPS = len(validate_x) // BATCH_SIZE
# <editor-fold desc="(model creation)">
# load previously saved NN model
model = None
try:
model = keras.models.load_model(model_file_path)
print("Loading NN model from file.")
model.summary()
except Exception as ex:
print("No NN model found for loading.")
# END of try-except
# </editor-fold>
# <editor-fold desc="(model run)">
# # if there is no model loaded, create a new model
if model is None:
csv_logger = keras.callbacks.CSVLogger(training_progress_file_path_str)
checkpoint = ModelCheckpoint(
model_file_path,
monitor='loss',
verbose=1,
save_best_only=True,
mode='auto',
save_freq='epoch'
)
callbacks_vector = [
csv_logger,
checkpoint
]
# Set mirror strategy
#strategy = tf.distribute.MirroredStrategy(devices=["/device:GPU:0","/device:GPU:1"])
#with strategy.scope():
print("New NN model created.")
# create sequential NN model
model = create_model(
n_hidden_1=HIDDEN_LAYER_1_NEURON_COUNT,
n_hidden_2=HIDDEN_LAYER_2_NEURON_COUNT,
##n_hidden_3=HIDDEN_LAYER_3_NEURON_COUNT,
num_classes=CLASS_COUNT,
num_features=FEATURES_COUNT
)
# Train the model with the new callback
history = model.fit(
train_x, train_y,
validation_data=(validate_x, validate_y),
batch_size=BATCH_SIZE,
epochs=EPOCH_SIZE,
callbacks=[callbacks_vector],
shuffle=True,
verbose=2
)
print(history.history.keys())
# END of ... with
# END of ... if
# </editor-fold>
Plotting Script
import os
from argparse import ArgumentParser
import random
from typing import List
import matplotlib.pyplot as plt
import numpy as np
import math
import sys
import datetime
class Quad:
def __init__(self, x_vector, y_vector, color_char, label_str):
self.__x_vector = x_vector
self.__y_vector = y_vector
self.__color_char = color_char
self.__label_str = label_str
def get_x_vector(self):
return self.__x_vector
def get_y_vector(self):
return self.__y_vector
def get_color_char(self):
return self.__color_char
def get_label_str(self):
return self.__label_str
class HecaPlotClass:
def __init__(self):
self.__x_label_str: str = None
self.__y_label_str: str = None
self.__title_str: str = None
self.__trio_vector: List[Quad] = []
self.__plotter = plt
#property
def x_label_str(self):
return self.__x_label_str
#x_label_str.setter
def x_label_str(self, t):
self.__x_label_str = t
#property
def y_label_str(self):
return self.__y_label_str
#y_label_str.setter
def y_label_str(self, t):
self.__y_label_str = t
#property
def title_str(self):
return self.__title_str
#title_str.setter
def title_str(self, t):
self.__title_str = t
def add_y_axes(self, trio_obj: Quad):
self.__trio_vector.append(trio_obj)
def generate_plot(self):
for obj in self.__trio_vector:
x_vector = obj.get_x_vector()
y_vector = obj.get_y_vector()
label_str = obj.get_label_str()
# print(label_str)
# print(len(x_vector))
# print(len(y_vector))
self.__plotter.plot(
x_vector,
y_vector,
color=obj.get_color_char(),
label=label_str
)
# END of ... for loop
# Naming the x-axis, y_1_vector-axis and the whole graph
self.__plotter.xlabel(self.__x_label_str)
self.__plotter.ylabel(self.__y_label_str)
self.__plotter.title(self.__title_str)
# Adding legend, which helps us recognize the curve according to it's color
self.__plotter.legend()
# To load the display window
#self.__plotter.show()
def save_png(self, output_directory_str):
output_file_str = os.path.join(output_directory_str, self.__title_str + '.png')
self.__plotter.savefig(output_file_str)
def save_pdf(self, output_directory_str):
output_file_str = os.path.join(output_directory_str, self.__title_str + '.pdf')
self.__plotter.savefig(output_file_str)
class MainClass(object):
__colors_vector = ['red', 'green', 'blue', 'cyan', 'magenta', 'yellow', 'orange', 'lightgreen', 'crimson']
__working_dir = r"."
__file_names_vector = ["training_progress-32.txt", "training_progress-64.txt", "training_progress-128.txt"]
__input_files_vector = []
__output_directory = None
__column_no_int = 0
__split_percentage_at_tail_int = 100
__is_pdf_output = False
__is_png_output = False
# <editor-fold desc="def load_data()">
#classmethod
def __load_data(cls, fname: str, percetage_int:int, column_no_int:int):
np_array = np.loadtxt(
fname,
# usecols=range(1,11),
dtype=np.float32,
skiprows=1,
delimiter=","
)
size_vector = np_array.shape
array_len_int = size_vector[0]
rows_count_int = int(percetage_int * array_len_int / 100)
np_array = np_array[-rows_count_int:]
x = np_array[:, 0]
y = np_array[:, column_no_int]
return x, y
# END of the function
# </editor-fold>
# <editor-fold desc="(__parse_args())">
#classmethod
def __parse_args(cls):
# initialize argument parser
my_parser = ArgumentParser()
my_parser.add_argument("-c", help="column no.", type=int)
my_parser.add_argument('-i', nargs='+', help='a list of input files', required=True)
my_parser.add_argument("-o", help="output directory", type=str)
my_parser.add_argument("-n", help="percentage of data to split from tail", type=float)
my_parser.add_argument("--pdf", help="PDF output", action='store_true')
my_parser.add_argument("--png", help="PNG output", action='store_true')
# parse the argument
args = my_parser.parse_args()
cls.__input_files_vector = args.i
cls.__output_directory = args.o
cls.__split_percentage_at_tail_int = args.n
cls.__column_no_int = args.c
cls.__is_pdf_output = args.pdf
cls.__is_png_output = args.png
# </editor-fold>
#classmethod
def main(cls):
cls.__parse_args()
if cls.__input_files_vector is None:
cls.__input_files_vector = cls.__file_names_vector
if cls.__output_directory is None:
cls.__output_directory = cls.__working_dir
if cls.__split_percentage_at_tail_int is None:
cls.__split_percentage_at_tail_int = 100
if cls.__column_no_int is None:
cls.__column_no_int = 1
my_project_plot_obj = HecaPlotClass()
i = 0
for file_path_str in cls.__input_files_vector:
print(file_path_str)
x_vector, y_vector = cls.__load_data(os.path.join(cls.__working_dir, file_path_str), cls.__split_percentage_at_tail_int, cls.__column_no_int)
my_project_plot_obj.x_label_str = "Epoch"
my_project_plot_obj.y_label_str = "Accuracy"
my_project_plot_obj.title_str = "training_plot-{date:%Y-%m-%d_%H:%M:%S}".format(date=datetime.datetime.now())
my_project_plot_obj.x_axis_vector = x_vector
if i == 0:
random_int = 0
else:
random_int = i % (len(cls.__colors_vector)-1)
# END of ... if
print("random_int : ", random_int)
my_project_plot_obj.add_y_axes(Quad(x_vector, y_vector, cls.__colors_vector[random_int], file_path_str))
i = i + 1
# END of ... for loop
my_project_plot_obj.generate_plot()
my_project_plot_obj.save_png(cls.__output_directory)
my_project_plot_obj.save_pdf(cls.__output_directory)
if __name__ == "__main__":
MainClass.main()
The primary reason could be improper (non-random ~ ordered) distribution of data.
If you notice the accuracy beyond epoch 180, there is a orderly switching between the accuracy between ~0.43 (approx.) and ~0.33 (~approx.), and occasionally ~0.23 (approx.). The more important thing to notice is that the accuracy is decreasing (there's no improvement in validation accuracy) as we increase the epochs.
The accuracy can increase in such cases if you (1) reduce batch size, or (2) use a better optimizer like Adam. And check the learning rate.
These changes can help the shift and oscillation, as well.
Additionally, Running average of the accuracy can be plotted to avoid the oscillation. This is again a mitigation scheme rather than a correction scheme. But, what it does is removes the order (partition of the data) and mixes the nearby data.
Lastly, I would also reshuffle the data and normalize after each layer. See if that helps.
Generally, sharp jumps and flat lines in the accuracy usually mean that a group of examples is classified as a given class at a same time. If your dataset contains, say, 50 examples with the same combination of 7 features then they would go into the same class at the same time. This is what probably causes sharp jumps - identical or similar examples clustered together.
So for example, if you have 50 men aged 64, and a decision boundary to classify them as more prone to an illness shifts from >65 to >63, then accuracy changes rapidly as all of them change classification at the same time.
Regarding the oscillation of the curve - due to the fact above, oscillation will be amplified by small changes in learning. Your network learns based on cross entropy, which means that it minimizes the difference between target and your predictions. This means that it operates on the difference between probability and target (say, 0.3 vs class 0) instead of class and target like accuracy (so, 0 vs 0) in the same example. Cross entropy is much more smooth as it is not affected by the issue outlined above.
I have trained the yolov2 and yolov3 models using Keras with this Github project https://github.com/experiencor/keras-yolo2
Now I want to use the trained model (.h5) in darknet prediction. Essentially I need to convert this h5 model into the format expected by darknet(.weights). I have seen this project https://github.com/allanzelener/YAD2K/blob/master/yad2k.py
which does the reverse of what I want?
Did anyone try this before?
This is a problem I also faced but I solved it a bit by using the following code.
# Script converter_h5-2-wts.py
# -*- coding: utf-8 -*-
''' yolov3_keras_to_darknet.py'''
import argparse
import numpy
import numpy as np
import keras
from keras.models import load_model
from keras import backend as K
def parser():
parser = argparse.ArgumentParser(description="Darknet\'s yolov3.cfg and
yolov3.weights \
converted into Keras\'s yolov3.h5!")
parser.add_argument('-cfg_path', help='yolov3.cfg')
parser.add_argument('-h5_path', help='yolov3.h5')
parser.add_argument('-output_path', help='yolov3.weights')
return parser.parse_args()
class WeightSaver(object):
def __init__(self,h5_path,output_path):
self.model = load_model(h5_path)
self.layers = {weight.name:weight for weight in self.model.weights}
self.sess = K.get_session()
self.fhandle = open(output_path,'wb')
self._write_head()
def _write_head(self):
numpy_data = numpy.ndarray(shape=(3,),
dtype='int32',
buffer=np.array([0,2,0],dtype='int32') )
self.save(numpy_data)
numpy_data = numpy.ndarray(shape=(1,),
dtype='int64',
buffer=np.array([320000],dtype='int64'))
self.save(numpy_data)
def get_bn_layername(self,num):
layer_name = 'batch_normalization_{num}'.format(num=num)
bias = self.layers['{0}/beta:0'.format(layer_name)]
scale = self.layers['{0}/gamma:0'.format(layer_name)]
mean = self.layers['{0}/moving_mean:0'.format(layer_name)]
var = self.layers['{0}/moving_variance:0'.format(layer_name)]
bias_np = self.get_numpy(bias)
scale_np = self.get_numpy(scale)
mean_np = self.get_numpy(mean)
var_np = self.get_numpy(var)
return bias_np,scale_np,mean_np,var_np
def get_convbias_layername(self,num):
layer_name = 'conv2d_{num}'.format(num=num)
bias = self.layers['{0}/bias:0'.format(layer_name)]
bias_np = self.get_numpy(bias)
return bias_np
def get_conv_layername(self,num):
layer_name = 'conv2d_{num}'.format(num=num)
conv = self.layers['{0}/kernel:0'.format(layer_name)]
conv_np = self.get_numpy(conv)
return conv_np
def get_numpy(self,layer_name):
numpy_data = self.sess.run(layer_name)
return numpy_data
def save(self,numpy_data):
bytes_data = numpy_data.tobytes()
self.fhandle.write(bytes_data)
self.fhandle.flush()
def close(self):
self.fhandle.close()
class KerasParser(object):
def __init__(self, cfg_path, h5_path, output_path):
self.block_gen = self._get_block(cfg_path)
self.weights_saver = WeightSaver(h5_path, output_path)
self.count_conv = 0
self.count_bn = 0
def _get_block(self,cfg_path):
block = {}
with open(cfg_path,'r', encoding='utf-8') as fr:
for line in fr:
line = line.strip()
if '[' in line and ']' in line:
if block:
yield block
block = {}
block['type'] = line.strip(' []')
elif not line or '#' in line:
continue
else:
key,val = line.strip().replace(' ','').split('=')
key,val = key.strip(), val.strip()
block[key] = val
yield block
def close(self):
self.weights_saver.close()
def conv(self, block):
self.count_conv += 1
batch_normalize = 'batch_normalize' in block
print('handing.. ',self.count_conv)
# If bn exists, process bn first, in order of bias, scale, mean, var
if batch_normalize:
bias,scale,mean,var = self.bn()
self.weights_saver.save(bias)
scale = scale.reshape(1,-1)
mean = mean.reshape(1,-1)
var = var.reshape(1,-1)
remain = np.concatenate([scale,mean,var],axis=0)
self.weights_saver.save(remain)
# biase
else:
conv_bias = self.weights_saver.get_convbias_layername(self.count_conv)
self.weights_saver.save(conv_bias)
# weights
conv_weights = self.weights_saver.get_conv_layername(self.count_conv)
# (height, width, in_dim, out_dim) (out_dim, in_dim, height, width)
conv_weights = np.transpose(conv_weights,[3,2,0,1])
self.weights_saver.save(conv_weights)
def bn(self):
self.count_bn += 1
bias,scale,mean,var = self.weights_saver.get_bn_layername(self.count_bn)
return bias,scale,mean,var
def main():
args = parser()
keras_loader = KerasParser(args.cfg_path, args.h5_path, args.output_path)
for block in keras_loader.block_gen:
if 'convolutional' in block['type']:
keras_loader.conv(block)
keras_loader.close()
if __name__ == "__main__":
main()
Please do the indentation as the code pasted is from text file. I could not leave the four spaces on in the front of the line so please indent the code at your end.
usage is as below
$python converter_h5-2-wts.py -cfg_path text.cfg -h5_path test.h5 -output_path test.weights
If this works for you please limit vote for this post only but thank the original coder for the source code in python. I just searched some time back for this code.
I trained a Wide & Deep model using the pre-made Estimator class (DNNLinearCombinedClassifier), by essentially following the tutorial on tensorflow.org.
I wanted to do inference/serving, but without using tensorflow-serving. This basically comes down to feeding some test data to the correct input tensor and retrieving the output tensor.
However, I am not sure what the input nodes/layer should be. In the tensorflow graph (graph.pbtxt), the following nodes seem relevant. But they are also related to the input queue which is mainly used during training, but not necessarily inference (I can just send one instance at a time).
name: "enqueue_input/random_shuffle_queue"
name: "enqueue_input/Placeholder"
name: "enqueue_input/Placeholder_1"
name: "enqueue_input/Placeholder_2"
...
name: "enqueue_input/Placeholder_84"
name: "enqueue_input/random_shuffle_queue_EnqueueMany_1"
name: "enqueue_input/random_shuffle_queue_EnqueueMany_2"
name: "enqueue_input/random_shuffle_queue_EnqueueMany_3"
name: "enqueue_input/random_shuffle_queue_EnqueueMany_4"
name: "enqueue_input/random_shuffle_queue_EnqueueMany"
name: "enqueue_input/sub/y"
name: "enqueue_input/sub"
name: "enqueue_input/Maximum/x"
name: "enqueue_input/Maximum"
name: "enqueue_input/Cast"
name: "enqueue_input/mul/y"
name: "enqueue_input/mul"
Does anyone know the answer? Thanks in advance!
If you want inference, but without using tensorflow-serving, you can just use the tf.estimator.Estimator predict method.
But if you want to do it manually (so that is runs faster), you need a workaround. I am not sure if what I did was exactly the best approach, but it worked. Here's my solution.
1) Let's do the imports and create variables and fake data:
import os
import numpy as np
from functools import partial
import pickle
import tensorflow as tf
N = 10000
EPOCHS = 1000
BATCH_SIZE = 2
X_data = np.random.random((N, 10))
y_data = (np.random.random((N, 1)) >= 0.5).astype(int)
my_dir = os.getcwd() + "/"
2) Define an input_fn, which you will use tf.data.Dataset. Save the tensor names in a dictionary ("input_tensor_map"), which maps the input key to the tensor name.
def my_input_fn(X, y=None, is_training=False):
def internal_input_fn(X, y=None, is_training=False):
if (not isinstance(X, dict)):
X = {"x": X}
if (y is None):
dataset = tf.data.Dataset.from_tensor_slices(X)
else:
dataset = tf.data.Dataset.from_tensor_slices((X, y))
if (is_training):
dataset = dataset.repeat().shuffle(100)
batch_size = BATCH_SIZE
else:
batch_size = 1
dataset = dataset.batch(batch_size)
dataset_iter = dataset.make_initializable_iterator()
if (y is None):
features = dataset_iter.get_next()
labels = None
else:
features, labels = dataset_iter.get_next()
input_tensor_map = dict()
for input_name, tensor in features.items():
input_tensor_map[input_name] = tensor.name
with open(os.path.join(my_dir, 'input_tensor_map.pickle'), 'wb') as f:
pickle.dump(input_tensor_map, f, protocol=pickle.HIGHEST_PROTOCOL)
tf.add_to_collection(tf.GraphKeys.TABLE_INITIALIZERS, dataset_iter.initializer)
return (features, labels) if (not labels is None) else features
return partial(internal_input_fn, X=X, y=y, is_training=is_training)
3) Define your model, to be used in your tf.estimator.Estimator. For example:
def my_model_fn(features, labels, mode):
output = tf.layers.dense(inputs=features["x"], units=1, activation=None)
logits = tf.identity(output, name="logits")
prediction = tf.nn.sigmoid(logits, name="predictions")
classes = tf.to_int64(tf.greater(logits, 0.0), name="classes")
predictions_dict = {
"class": classes,
"probabilities": prediction
}
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions_dict)
one_hot_labels = tf.squeeze(tf.one_hot(tf.cast(labels, dtype=tf.int32), 2))
loss = tf.losses.sigmoid_cross_entropy(multi_class_labels=one_hot_labels, logits=logits)
tf.summary.scalar("loss", loss)
accuracy = tf.reduce_mean(tf.to_float(tf.equal(labels, classes)))
tf.summary.scalar("accuracy", accuracy)
# Configure the Training Op (for TRAIN mode)
if (mode == tf.estimator.ModeKeys.TRAIN):
train_op = tf.train.AdamOptimizer().minimize(loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
return tf.estimator.EstimatorSpec(mode=mode, loss=loss)
4) Train and freeze your model. The freeze method is from TensorFlow: How to freeze a model and serve it with a python API, which I added a tiny modification.
def freeze_graph(output_node_names):
"""Extract the sub graph defined by the output nodes and convert
all its variables into constant
Args:
model_dir: the root folder containing the checkpoint state file
output_node_names: a string, containing all the output node's names,
comma separated
"""
if (output_node_names is None):
output_node_names = 'loss'
if not tf.gfile.Exists(my_dir):
raise AssertionError(
"Export directory doesn't exists. Please specify an export "
"directory: %s" % my_dir)
if not output_node_names:
print("You need to supply the name of a node to --output_node_names.")
return -1
# We retrieve our checkpoint fullpath
checkpoint = tf.train.get_checkpoint_state(my_dir)
input_checkpoint = checkpoint.model_checkpoint_path
# We precise the file fullname of our freezed graph
absolute_model_dir = "/".join(input_checkpoint.split('/')[:-1])
output_graph = absolute_model_dir + "/frozen_model.pb"
# We clear devices to allow TensorFlow to control on which device it will load operations
clear_devices = True
# We start a session using a temporary fresh Graph
with tf.Session(graph=tf.Graph()) as sess:
# We import the meta graph in the current default Graph
saver = tf.train.import_meta_graph(input_checkpoint + '.meta', clear_devices=clear_devices)
# We restore the weights
saver.restore(sess, input_checkpoint)
# We use a built-in TF helper to export variables to constants
output_graph_def = tf.graph_util.convert_variables_to_constants(
sess, # The session is used to retrieve the weights
tf.get_default_graph().as_graph_def(), # The graph_def is used to retrieve the nodes
output_node_names.split(",") # The output node names are used to select the usefull nodes
)
# Finally we serialize and dump the output graph to the filesystem
with tf.gfile.GFile(output_graph, "wb") as f:
f.write(output_graph_def.SerializeToString())
print("%d ops in the final graph." % len(output_graph_def.node))
return output_graph_def
# *****************************************************************************
tf.logging.set_verbosity(tf.logging.INFO)
estimator = tf.estimator.Estimator(model_fn=my_model_fn, model_dir=my_dir)
if (estimator.latest_checkpoint() is None):
estimator.train(input_fn=my_input_fn(X=X_data, y=y_data, is_training=True), steps=EPOCHS)
freeze_graph("predictions,classes")
tf.logging.set_verbosity(tf.logging.INFO)
estimator = tf.estimator.Estimator(model_fn=my_model_fn, model_dir=my_dir)
if (estimator.latest_checkpoint() is None):
estimator.train(input_fn=my_input_fn(X=X_data, y=y_data, is_training=True), steps=EPOCHS)
freeze_graph("predictions,classes")
5) Finally, you can use the frozen graph for inference, input tensors names are in the dictionary that you saved. Again, the method to load the freezed model from TensorFlow: How to freeze a model and serve it with a python API.
def load_frozen_graph(prefix="frozen_graph"):
frozen_graph_filename = os.path.join(my_dir, "frozen_model.pb")
# We load the protobuf file from the disk and parse it to retrieve the
# unserialized graph_def
with tf.gfile.GFile(frozen_graph_filename, "rb") as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
# Then, we import the graph_def into a new Graph and returns it
with tf.Graph().as_default() as graph:
# The name var will prefix every op/nodes in your graph
# Since we load everything in a new graph, this is not needed
tf.import_graph_def(graph_def, name=prefix)
return graph
# *****************************************************************************
X_test = {"x": np.random.random((int(N/2), 10))}
prefix = "frozen_graph"
graph = load_frozen_graph(prefix)
for op in graph.get_operations():
print(op.name)
with open(os.path.join(my_dir, 'input_tensor_map.pickle'), 'rb') as f:
input_tensor_map = pickle.load(f)
with tf.Session(graph=graph) as sess:
input_feed = dict()
for key, tensor_name in input_tensor_map.items():
tensor = graph.get_tensor_by_name(prefix + "/" + tensor_name)
input_feed[tensor] = X_test[key]
logits = graph.get_operation_by_name(prefix + "/logits").outputs[0]
probabilities = graph.get_operation_by_name(prefix + "/predictions").outputs[0]
classes = graph.get_operation_by_name(prefix + "/classes").outputs[0]
logits_values, probabilities_values, classes_values = sess.run([logits, probabilities, classes], feed_dict=input_feed)