Tensor shape changes in tf.while_loop() - python

# initialize start token
target = tf.constant([[2]], dtype=tf.int32, shape=[1, 1]) # 2 - <BOS>
dummy_outputs = [4, 7, 1, 9, 15, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] # for testing
self.max_length = 10
def generate_next_token(tar):
# out, _ = self.model([features, tf.expand_dims(tar, 0)], training=False) # shape: [1, seq_length, vocab_size]
# out = out[:, -1, :] # take last token of sequence. shape: [1, vocab_size]
# out = tf.random.categorical(out, 1) # shape: [1, 1]
# out = tf.cast(out, tf.int32)
out = tf.constant([[dummy_outputs.pop(0)]], dtype=tf.int32, shape=[1, 1])
print(tar.shape)
tar = tf.concat([tar, out], axis=-1)
return tar
def end_of_sequence_not_reached(tar):
print(tar.shape)
return tf.math.logical_and(tf.less(tf.shape(tar)[-1], self.max_length),
tf.not_equal(tar[-1], 3)) # 3 - <EOS>
target = tf.while_loop(cond=end_of_sequence_not_reached, body=generate_next_token, loop_vars=[target], shape_invariants=[tf.TensorShape([1, None])])
Somehow, the shape of tar changes from (1, n) to (n) after every iteration of the tf.while_loop, and dimensions of length 1 are lost, so I have to work around it by doing
if len(tar.shape) < len(prev_shape):
tar = tf.expand_dims(tar, 0)
Why is this and how can I prevent it?

Related

How can I calculate histograms with merged bins?

I want to ask you about calculating the histogram in Python using OpenCV. I used this code:
hist = cv2.calcHist(im, [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
The result gave me the histogram of each color channel with 8 bins, but what I want to get is:
1st bin (R=0-32,G=0-32,B=0-32),
2nd bin (R=33-64,G=0-32,B=0-32),
and so on,
so I will have 512 bins in total.
From my point of view, your cv2.calcHist call isn't correct:
hist = cv2.calcHist(im, [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
The first parameter should be a list of images:
hist = cv2.calcHist([im], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
Let's see this small example:
import cv2
import numpy as np
# Red blue square of size [4, 4], i.e. eight pixels (255, 0, 0) and eight pixels (0, 0, 255); Attention: BGR ordering!
image = np.zeros((4, 4, 3), dtype=np.uint8)
image[:, 0:2, 2] = 255
image[:, 2:4, 0] = 255
# Calculate histogram with two bins [0 - 127] and [128 - 255] per channel:
# Result should be hist["bin 0", "bin 0", "bin 1"] = 8 (red) and hist["bin 1", "bin 0", "bin 0"] = 8 (blue)
# Original cv2.calcHist call with two bins [0 - 127] and [128 - 255]
hist = cv2.calcHist(image, [0, 1, 2], None, [2, 2, 2], [0, 256, 0, 256, 0, 256])
print(hist, '\n') # Not correct
# Correct cv2.calcHist call
hist = cv2.calcHist([image], [0, 1, 2], None, [2, 2, 2], [0, 256, 0, 256, 0, 256])
print(hist, '\n') # Correct
[[[8. 0.]
[0. 0.]]
[[0. 0.]
[0. 4.]]]
[[[0. 8.]
[0. 0.]]
[[8. 0.]
[0. 0.]]]
As you can, your version only has 12 values in total, whereas there are 16 pixels in the image! Also, it's not clear, what "bins" (if at all) are represented.
So, having the proper cv2.calcHist call, your general idea/approach is correct! Maybe, you just need a little hint, "how to read" the resuling hist:
import cv2
import numpy as np
# Colored rectangle of size [32, 16] with one "color" per bin for eight bins per channel,
# i.e. 512 pixels, such that each of the resulting 512 bins has value 1
x = np.linspace(16, 240, 8, dtype=np.uint8)
image = np.reshape(np.moveaxis(np.array(np.meshgrid(x, x, x)), [0, 1, 2, 3], [3, 0, 1, 2]), (32, 16, 3))
# Correct cv2.calcHist call
hist = cv2.calcHist([image], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
# Lengthy output of each histogram bin
for B in np.arange(hist.shape[0]):
for G in np.arange(hist.shape[1]):
for R in np.arange(hist.shape[2]):
r = 'R=' + str(R*32).zfill(3) + '-' + str((R+1)*32-1).zfill(3)
g = 'G=' + str(G*32).zfill(3) + '-' + str((G+1)*32-1).zfill(3)
b = 'B=' + str(B*32).zfill(3) + '-' + str((B+1)*32-1).zfill(3)
print('(' + r + ', ' + g + ', ' + b + '): ', int(hist[B, G, R]))
(R=000-031, G=000-031, B=000-031): 1
(R=032-063, G=000-031, B=000-031): 1
(R=064-095, G=000-031, B=000-031): 1
[... 506 more lines ...]
(R=160-191, G=224-255, B=224-255): 1
(R=192-223, G=224-255, B=224-255): 1
(R=224-255, G=224-255, B=224-255): 1
Hope that helps!

How to implement stdp in tensorflow?

I'm trying to implement STDP (Spike-Timing Dependent Plasticity) in tensorflow. It's a bit complicated. Any ideas (to get running entirely within a tensorflow graph)?
It works like this: say I have 2 input neurons, and they connect to 3 output neurons, via this matrix: [[1.0, 1.0, 0.0], [0.0, 0.0, 1.0]] (input neuron 0 connects to output neurons 0 and 1...).
Say I have these spikes for the input neurons (2 neurons, 7 timesteps):
Input Spikes:
[[0, 0, 1, 1, 0, 1, 0],
[1, 1, 0, 0, 0, 0, 1]]
And these spikes for the output neurons (3 neurons, 7 timesteps):
Output Spikes:
[[0, 0, 0, 1, 0, 0, 1],
[1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 1, 1]]
Now, for each non-zero weight, I want to compute a dw. For instance, for input neuron 0 connecting to output neuron 0:
The time stamps of the spikes for input neuron 0 are [2, 3, 5], and the timestamps for output neuron 0 are [3, 6]. Now, I compute all the delta times:
Delta Times = [ 2-3, 2-6, 3-3, 3-6, 5-3, 5-6 ] = [ -1, -4, 0, -3, 2, -1 ]
Then, I compute some function (the actual STDP function, which isn't important for this question - some exponential thing)
dw = SUM [ F(-1), F(-4), F(0), F(-3), F(2), F(-1) ]
And that's the dw for the weight connecting input neuron 0 to output neuron 0. Repeat for all non-zero weights.
So I can do all this in numpy, but I'd like to be able to do it entirely within a single tensorflow graph. In particular, I'm stuck on computing the delta times. And how to do all this for all non-zero weights, in parallel.
This is the actual stdp function, btw (the constants can be parameters):
def stdp_f(x):
return tf.where(
x == 0, np.zeros(x.shape), tf.where(
x > 0, 1.0 * tf.exp(-1.0 * x / 10.0), -1.0 * 1.0 * tf.exp(x / 10.0)))
A note on performance: the method given by #jdehesa, below, is both correct and clever. But it also turns out to be slow. In particular, for a real neural network of 784 input neurons feeding into 400 neurons, over 500 time steps, the spike_match = step performs multiplication of (784, 1, 500, 1) and (1, 400, 1, 500) tensors.
I am not familiar with STDP, so I hope I understood correctly what you meant. I think this does what you describe:
import tensorflow as tf
def f(x):
# STDP function
return x * 1
def stdp(input_spikes, output_spikes):
input_shape = tf.shape(input_spikes)
t = input_shape[-1]
# Compute STDP function for all possible time difference values
stdp_values = f(tf.cast(tf.range(-t + 1, t), dtype=input_spikes.dtype))
# Arrange in matrix such that position [i, j] contains f(i - j)
matrix_idx = tf.expand_dims(tf.range(t - 1, 2 * t - 1), 1) + tf.range(0, -t, -1)
stdp_matrix = tf.gather(stdp_values, matrix_idx)
# Find spike matches
spike_match = (input_spikes[:, tf.newaxis, :, tf.newaxis] *
output_spikes[tf.newaxis, :, tf.newaxis, :])
# Sum values where there are spike matches
return tf.reduce_sum(spike_match * stdp_matrix, axis=(2, 3))
# Test
input_spikes = [[0, 0, 1, 1, 0, 1, 0],
[1, 1, 0, 0, 0, 0, 1]]
output_spikes = [[0, 0, 0, 1, 0, 0, 1],
[1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 1, 1]]
with tf.Graph().as_default(), tf.Session() as sess:
ins = tf.placeholder(tf.float32, [None, None])
outs = tf.placeholder(tf.float32, [None, None])
res = stdp(ins, outs)
res_val = sess.run(res, feed_dict={ins: input_spikes, outs: output_spikes})
print(res_val)
# [[ -7. 10. -15.]
# [-13. 7. -24.]]
Here I assume that f is probably expensive (and that its value is the same for every pair of neurons), so I compute it only once for every possible time delta and then redistribute the computed values in a matrix, so I can multiply at the pairs of coordinates where the input and output spikes happen.
I used the identity function for f as a placeholder, so the resulting values are actually just the sum of time differences in this case.
EDIT: Just for reference, replacing f with the STDP function you included:
def f(x):
return tf.where(x == 0,
tf.zeros_like(x),
tf.where(x > 0,
1.0 * tf.exp(-1.0 * x / 10.0),
-1.0 * 1.0 * tf.exp(x / 10.0)))
The result is:
[[-3.4020822 2.1660795 -5.694256 ]
[-2.974073 0.45364904 -3.1197631 ]]

TypeError: unsupported operand type(s) for //: 'NoneType' and 'int'

In ROI pooling, you have a region of interest(ROI) and you want to pool it to a specific size.
ROI gif
I try to implement the same thing like in the gif.
The whole image is
image_pl = tf.placeholder(dtype=tf.float32) # the whole region
the region of interest is
x = 0
y = 3
w = 7
h = -1
roi = tf.slice(image_pl, [x, y, 0], [w, h, -1]) # region of interest
Now I try to get the 4 parts, because every region of interest, no matter of their size, I want to crop to the size 2x2
w_roi = roi.get_shape()[0].value
h_roi = roi.get_shape()[1].value
roi_part1 = tf.slice(roi, [0, 0, 0], [w_roi // 2, h_roi // 2, -1]) # fisrt part from the gif
roi_part2 = tf.slice(roi, [w_roi - w_roi // 2, 0, 0], [w_roi, h_roi // 2, -1]) # second part from the gif
roi_part3 = tf.slice(roi, [0, h_roi - h_roi // 2, 0], [w_roi // 2, h_roi, -1])
roi_part4 = tf.slice(roi, [w_roi - w_roi // 2, h_roi - h_roi // 2, 0], [w_roi, h_roi, -1])
But here I get the error.
TypeError: unsupported operand type(s) for //: 'NoneType' and 'int'
Here is the whole code.
How can I implement the idea of that gif?
import numpy as np
import tensorflow as tf
image_pl = tf.placeholder(dtype=tf.float32) # the whole region
x = 0
y = 3
w = 7
h = -1
roi = tf.slice(image_pl, [x, y, 0], [w, h, -1]) # region of interest
w_roi = roi.get_shape()[0].value
h_roi = roi.get_shape()[1].value
roi_part1 = tf.slice(roi, [0, 0, 0], [w_roi // 2, h_roi // 2, -1]) # fisrt part from the gif
roi_part2 = tf.slice(roi, [w_roi - w_roi // 2, 0, 0], [w_roi, h_roi // 2, -1]) # second part from the gif
roi_part3 = tf.slice(roi, [0, h_roi - h_roi // 2, 0], [w_roi // 2, h_roi, -1])
roi_part4 = tf.slice(roi, [w_roi - w_roi // 2, h_roi - h_roi // 2, 0], [w_roi, h_roi, -1])
output1 = tf.reduce_max(roi_part1) # maximum of the region 1 in the region of interest
output2 = tf.reduce_max(roi_part2)
output3 = tf.reduce_max(roi_part3)
output4 = tf.reduce_max(roi_part4)
output = tf.concat([output1, output2, output3, output4], 0)
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
with tf.Session(config=config) as sess:
sess.run(tf.global_variables_initializer())
image = np.random.uniform(0, 1, (8, 8, 1))
curr_image, curr_roi, curr_output = sess.run([image_pl, roi, output], feed_dict={image_pl: image})

Multi layer perceptron weights not changing

I am fairly new to Machine Learning and started off with Machine Learning an algorithmic perspective. I am trying to make a logistic classifier that identifies malign programs from benign ones by tweaking the code given on book website. However the weights associated with the hidden layer and output layer are not changing even after 100000 epochs.
I have tried running the algorithm with the complete dataset as well as a partial version of it, still no luck.
Here is my MLP class
import numpy as np
class mlp:
def __init__(self, inputs, targets, nhidden, beta=1, momentum=0.9, outtype='logistic'):
""" Constructor """
# Set up network size
self.nin = np.shape(inputs)[1]
self.nout = np.shape(targets)[1]
self.ndata = np.shape(inputs)[0]
self.nhidden = nhidden
self.beta = beta
self.momentum = momentum
self.outtype = outtype
# Initialise network
self.weights1 = (np.zeros((self.nin + 1, self.nhidden), dtype=float) - 0.5) * 2 / np.sqrt(self.nin)
self.weights2 = (np.zeros((self.nhidden + 1, self.nout), dtype=float) - 0.5) * 2 / np.sqrt(self.nhidden)
def earlystopping(self, inputs, targets, valid, validtargets, eta, niterations=100):
valid = np.concatenate((valid, -np.ones((np.shape(valid)[0], 1))), axis=1)
old_val_error1 = 100002
old_val_error2 = 100001
new_val_error = 100000
count = 0
while (((old_val_error1 - new_val_error) > 0.001) or ((old_val_error2 - old_val_error1) > 0.001)):
count += 1
print
count
self.mlptrain(inputs, targets, eta, niterations)
old_val_error2 = old_val_error1
old_val_error1 = new_val_error
validout = self.mlpfwd(valid)
new_val_error = 0.5 * np.sum((validtargets - validout) ** 2)
print("Stopped", new_val_error, old_val_error1, old_val_error2)
return new_val_error
def mlptrain(self, inputs, targets, eta, niterations):
""" Train the thing """
# Add the inputs that match the bias node
inputs = np.concatenate((inputs, -np.ones((self.ndata, 1))), axis=1)
change = range(self.ndata)
print(self.weights2)
updatew1 = np.zeros((np.shape(self.weights1)))
updatew2 = np.zeros((np.shape(self.weights2)))
for n in range(niterations):
self.outputs = self.mlpfwd(inputs)
#error = 0.5 * np.sum((self.outputs - targets) ** 2)
if (np.mod(n, 100) == 0):
print ("Iteration: ", n, " Weight2: ", self.weights2)
# Different types of output neurons
if self.outtype == 'linear':
deltao = (self.outputs - targets) / self.ndata
elif self.outtype == 'logistic':
deltao = self.beta * (self.outputs - targets) * self.outputs * (1.0 - self.outputs)
elif self.outtype == 'softmax':
deltao = (self.outputs - targets) * (self.outputs * (-self.outputs) + self.outputs) / self.ndata
else:
print("error")
deltah = self.hidden * self.beta * (1.0 - self.hidden) * (np.dot(deltao, np.transpose(self.weights2)))
updatew1 = eta * (np.dot(np.transpose(inputs), deltah[:, :-1])) + self.momentum * updatew1
updatew2 = eta * (np.dot(np.transpose(self.hidden), deltao)) + self.momentum * updatew2
self.weights1 -= updatew1
self.weights2 -= updatew2
# Randomise order of inputs (not necessary for matrix-based calculation)
# np.random.shuffle(change)
# inputs = inputs[change,:]
# targets = targets[change,:]
print(self.weights2)
def mlpfwd(self, inputs):
""" Run the network forward """
self.hidden = np.dot(inputs, self.weights1);
self.hidden = 1.0 / (1.0 + np.exp(-self.beta * self.hidden))
self.hidden = np.concatenate((self.hidden, -np.ones((np.shape(inputs)[0], 1))), axis=1)
outputs = np.dot(self.hidden, self.weights2);
# Different types of output neurons
if self.outtype == 'linear':
return outputs
elif self.outtype == 'logistic':
return 1.0 / (1.0 + np.exp(-self.beta * outputs))
elif self.outtype == 'softmax':
normalisers = np.sum(np.exp(outputs), axis=1) * np.ones((1, np.shape(outputs)[0]))
return np.transpose(np.transpose(np.exp(outputs)) / normalisers)
else:
print("error")
def confmat(self, inputs, targets):
"""Confusion matrix"""
# Add the inputs that match the bias node
inputs = np.concatenate((inputs, -np.ones((np.shape(inputs)[0], 1))), axis=1)
outputs = self.mlpfwd(inputs)
nclasses = np.shape(targets)[1]
if nclasses == 1:
nclasses = 2
outputs = np.where(outputs > 0.5, 1, 0)
else:
# 1-of-N encoding
outputs = np.argmax(outputs, 1)
targets = np.argmax(targets, 1)
cm = np.zeros((nclasses, nclasses))
for i in range(nclasses):
for j in range(nclasses):
cm[i, j] = np.sum(np.where(outputs == j, 1, 0) * np.where(targets == i, 1, 0))
print(outputs)
print(targets)
print("Confusion matrix is:")
print(cm)
print("Percentage Correct: ", np.trace(cm) / np.sum(cm) * 100)
Here is my calling code that supplies data
import mlp
import numpy as np
apk_train_data = np.array([
[4, 1, 6, 29, 0, 3711, 1423906, 0],
[20, 1, 5, 24, 0, 4082, 501440, 0],
[3, 0, 1, 6, 0, 5961, 2426358, 0],
[0, 0, 2, 27, 0, 6074, 28762, 0],
[12, 1, 3, 17, 0, 4066, 505, 0],
[1, 0, 2, 5, 0, 1284, 38504, 0],
[2, 0, 2, 10, 0, 2421, 5827165, 0],
[5, 0, 17, 97, 0, 25095, 7429, 0],
[1, 1, 3, 22, 6, 4539, 9100705, 0],
[2, 0, 4, 15, 0, 2054, 264563, 0],
[3, 1, 6, 19, 0, 3562, 978171, 0],
[8, 0, 5, 12, 3, 1741, 1351990, 0],
[9, 0, 5, 12, 2, 1660, 2022743, 0],
[9, 0, 5, 12, 2, 1664, 2022743, 0],
[10, 4, 11, 70, 8, 43944, 51488321, 1],
[6, 0, 3, 18, 0, 8511, 19984102, 1],
[11, 2, 6, 44, 0, 61398, 32139, 1],
[0, 0, 0, 0, 0, 1008, 23872, 1],
[7, 1, 1, 16, 3, 46792, 94818, 1],
[3, 2, 1, 13, 2, 8263, 208820, 1],
[0, 0, 0, 2, 0, 2749, 3926, 1],
[10, 0, 1, 9, 0, 5220, 2275848, 1],
[1, 1, 3, 34, 6, 50030, 814322, 1],
[2, 2, 4, 48, 7, 86406, 12895, 1],
[0, 1, 5, 45, 2, 63060, 803121, 1],
[1, 0, 2, 11, 7, 7602, 1557, 1],
[3, 0, 1, 15, 3, 20813, 218352, 1]
])
apk_test_data = np.array([
[0, 0, 1, 9, 0, 4317, 118082, 0],
[8, 0, 5, 12, 3, 1742, 1351990, 0],
[8, 0, 5, 12, 3, 1744, 1351990, 0],
[0, 0, 1, 11, 2, 17630, 326164, 1],
[10, 2, 6, 45, 7, 22668, 30257520, 1],
[1, 0, 1, 8, 0, 9317, 33000349, 1],
[3, 0, 1, 15, 3, 20813, 218352, 1]
])
p = mlp.mlp(apk_train_data[:, 0:7], apk_train_data[:, 7:], 9)
p.mlptrain(apk_train_data[:, 0:7], apk_train_data[:, 7:], 0.25, 100000)
p.confmat(apk_test_data[:, 0:7], apk_test_data[:, 7:])
Each vector has 7 dimensions and last entry is the target
Here is the full text file containing the dataset
https://drive.google.com/open?id=1q_aGNgHxTBh_mmVAzVXKBa27NTJKeKV8
Please tell me what am I doing wrong. In case there is some easy to use library to do the same please suggest the same.
As mentioned in the comments, initializing the network weights randomly should make the network train.
# Initialise network
self.weights1 = (np.random.rand(self.nin+1,self.nhidden)-0.5)*2/np.sqrt(self.nin)
self.weights2 = (np.random.rand(self.nhidden+1,self.nout)-0.5)*2/np.sqrt(self.nhidden)
Then, my observation from your data is that the properties are not at all comparable. This will mean that your network gradient updates will be dominated by a single feature. To remedy that, one option is to standardize your data.
from sklearn.preprocessing import StandardScaler
for i in range(apk_train_data.shape[1]-1):
scaler = StandardScaler().fit(apk_train_data[:,i].copy())
apk_train_data[:,i] = scaler.transform(apk_train_data[:,i].copy())
apk_test_data[:,i] = scaler.transform(apk_test_data[:,i].copy())
And last but not least, having eta be 0.25 is way too big. I'll illustrate by using the opposite extreme:
p.mlptrain(apk_train_data[:, 0:7], apk_train_data[:, 7:], 0.0001, 100000)
p.confmat(apk_test_data[:, 0:7], apk_test_data[:, 7:])
# >> Percentage Correct: 71.4285714286
p.confmat(apk_train_data[:,0:7], apk_train_data[:,7:])
# >> Percentage Correct: 88.8888888889

Tensorflow: How to create a Pascal VOC style image

I'm working on implementing a semantic segmentation network in Tensorflow, and I'm trying to figure out how to write out summary images of the labels during training. I want to encode the images in a similar style to the class segmentation annotations used in the Pascal VOC dataset.
For example, let's assume I have a network that trains on a batch size of 1 with 4 classes. The networks final predictions have shape [1, 3, 3, 4]
Essentially I want to take the output predictions and run it through argmin to get a tensor containing the most likely class at each point in the output:
[[[0, 1, 3],
[2, 0, 1],
[3, 1, 2]]]
The annotated images use a color palette of 255 colors to encode labels. I have a tensor containing all the color triples:
[[ 0, 0, 0],
[128, 0, 0],
[ 0, 128, 0],
[128, 128, 0],
[ 0, 0, 128],
...
[224, 224, 192]]
How could I obtain a tensor of shape [1, 3, 3, 3] (a single 3x3 color image) that indexes into the color palette using the values obtained from argmin?
[[palette[0], palette[1], palette[3]],
[palette[2], palette[0], palette[1]],
[palette[3], palette[1], palette[2]]]
I could easily wrap some numpy and PIL code in tf.py_func but I'm wondering if there is a pure Tensorflow way of obtaining this result.
EDIT:
For those curious, this is the solution I got using just numpy. It works quite well, but I still dislike the use of tf.py_func:
import numpy as np
import tensorflow as tf
def voc_colormap(N=256):
bitget = lambda val, idx: ((val & (1 << idx)) != 0)
cmap = np.zeros((N, 3), dtype=np.uint8)
for i in range(N):
r = g = b = 0
c = i
for j in range(8):
r |= (bitget(c, 0) << 7 - j)
g |= (bitget(c, 1) << 7 - j)
b |= (bitget(c, 2) << 7 - j)
c >>= 3
cmap[i, :] = [r, g, b]
return cmap
VOC_COLORMAP = voc_colormap()
def grayscale_to_voc(input, name="grayscale_to_voc"):
return tf.py_func(grayscale_to_voc_impl, [input], tf.uint8, stateful=False, name=name)
def grayscale_to_voc_impl(input):
return np.squeeze(VOC_COLORMAP[input])
You can use tf.gather_nd(), but you will need to modify the shapes of the palette and logits to obtain the desired image, for example:
import tensorflow as tf
import numpy as np
import PIL.Image as Image
# We can load the palette from some random image in the PASCAL VOC dataset
palette = Image.open('.../VOC2012/SegmentationClass/2007_000032.png').getpalette()
# We build a random logits tensor of the requested size
batch_size = 1
height = width = 3
num_classes = 4
np.random.seed(1234)
logits = np.random.random_sample((batch_size, height, width, num_classes))
logits_argmax = np.argmax(logits, axis=3) # shape = (1, 3, 3)
# array([[[3, 3, 0],
# [1, 3, 1],
# [0, 2, 0]]])
sess = tf.InteractiveSession()
image = tf.gather_nd(
params=tf.reshape(palette, [-1, 3]), # reshaped from list to RGB
indices=tf.reshape(logits_argmax, [batch_size, -1, 1]))
image = tf.cast(tf.reshape(image, [batch_size, height, width, 3]), tf.uint8)
sess.run(image)
# array([[[[128, 128, 0],
# [128, 128, 0],
# [ 0, 0, 0]],
# [[128, 0, 0],
# [128, 128, 0],
# [128, 0, 0]],
# [[ 0, 0, 0],
# [ 0, 128, 0],
# [ 0, 0, 0]]]], dtype=uint8)
The resulting tensor can be directly fed to a tf.summary.image(), but depending on your implementation you should upsample it before the summary.

Categories

Resources