I'm finding the best cluster set in my data by getting a result which has the lowest average distance from many k means trials on Tensorflow.
But my code doesn't update initial centroids in each trial so all results are same.
Here's my code1 - tensor_kmeans.py
import numpy as np
import pandas as pd
import random
import tensorflow as tf
from tensorflow.contrib.factorization import KMeans
from sklearn import metrics
import imp
import pickle
# load as DataFrame
pkl = 'fasttext_words_k.pkl'
with open(pkl, 'rb') as f:
unique_words_in_fasttext = pickle.load(f).T
vector =[]
for i in range(len(unique_words_in_fasttext)):
vector.append(list(unique_words_in_fasttext.iloc[i,:]))
vector = [np.array(f) for f in vector ]
# Import data
full_data_x = vector
# Parameters
num_steps = 100 # Total steps to train
batch_size = 1024 # The number of samples per batch
n_clusters = 1300 # The number of clusters
num_classes = 100 # The 10 digits
num_rows = 13074
num_features = 300 # Each image is 28x28 pixels
### tensor kmeans ###
# Input images
X = tf.placeholder(tf.float32, shape=[None , num_features])
# Labels (for assigning a label to a centroid and testing)
# Y = tf.placeholder(tf.float32, shape=[None, num_classes])
# K-Means Parameters
kmeans = KMeans(inputs=X, num_clusters=n_clusters, distance_metric='cosine',
use_mini_batch=True, initial_clusters="random")
# Build KMeans graph
training_graph = kmeans.training_graph()
if len(training_graph) > 6: # Tensorflow 1.4+
(all_scores, cluster_idx, scores, cluster_centers_initialized,
cluster_centers_var, init_op, train_op) = training_graph
else:
(all_scores, cluster_idx, scores, cluster_centers_initialized,
init_op, train_op) = training_graph
cluster_idx = cluster_idx[0] # fix for cluster_idx being a tuple
avg_distance = tf.reduce_mean(scores)
# Initialize the variables (i.e. assign their default value)
init_vars = tf.global_variables_initializer()
# Start TensorFlow session
sess = tf.Session()
# Run the initializer
sess.run(init_vars, feed_dict={X: full_data_x})
sess.run(init_op, feed_dict={X: full_data_x})
# Training
for i in range(1, num_steps + 1):
_, d, idx = sess.run([train_op, avg_distance, cluster_idx],
feed_dict={X: full_data_x})
if i % 10 == 0 or i == 1:
print("Step %i, Avg Distance: %f" % (i, d))
labels = list(range(num_rows))
# Assign a label to each centroid
# Count total number of labels per centroid, using the label of each training
# sample to their closest centroid (given by 'idx')
counts = np.zeros(shape=(n_clusters, num_classes))
for i in range(len(idx)):
counts[idx[i]] += labels[i]
# Assign the most frequent label to the centroid
labels_map = [np.argmax(c) for c in counts]
labels_map = tf.convert_to_tensor(labels_map)
# Evaluation ops
# Lookup: centroid_id -> label
cluster_label = tf.nn.embedding_lookup(labels_map, cluster_idx)
# assign variables
cluster_list_k = idx
and here's a code outside the code1.
k_li=[]
rotation = 50
best_labels = []
best_k = -1
for i in range(rotation):
import tensor_kmeans
k_li.append(tensor_kmeans.k)
if len(k_li) > 0:
for i in range(len(k_li)):
if k_li[i] > best_k:
best_labels = tensor_kmeans.cluster_list_k
best_k = k_li[i]
tensor_kmeans = imp.reload(tensor_kmeans)
Where can I find the problem?
I'm waiting your answer, thank you.
Each time you call KMeans() you should use a new random_seed, i.e.
kmeans = KMeans(inputs=X, num_clusters=n_clusters, distance_metric='cosine',
use_mini_batch=True, initial_clusters="random", random_seed=SOME_NEW_VALUE)
Otherwise the function KMeans() will assume random_seed=0, so that the results are reproducible (i.e. the results are always the same).
A simple way to resolve your issue would be to make a function out of code1 - tensor_kmeans.py, then calling this function with a new random_seed (as input parameter) for each trial.
Related
I am running graph machine learning training based on the GraphSAGE model. My PyTorch version is '1.12.1+cpu', my python version is 3.9.
However, I met this error.
Can anybody help me? I think there should be something wrong with the data loader, but I can't find the exact error. I go into this code, but I can't find the exact errors.
train_loader = torch_geometric.loader.NeighborSampler(edge_index=data.edge_index,
node_idx=train_idx, sizes=[15, 10, 5],
batch_size=1024, shuffle=True)
Here is the code:
from torch.utils.data import DataLoader, Dataset
import torch_geometric
import torch
import torch.nn.functional as F
from tqdm import tqdm
from torch_geometric.data import NeighborSampler
from torch_geometric.nn import SAGEConv
import os.path as osp
import pandas as pd
import numpy as np
import collections
from pandas.core.common import flatten
# importing obg dataset
from ogb.nodeproppred import PygNodePropPredDataset, Evaluator
from pandas.core.common import flatten
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(rc={'figure.figsize': (16.7, 8.27)})
sns.set_theme(style="ticks")
import collections
from scipy.special import softmax
import umap
# download and loading the obg dataset
root = osp.join(osp.dirname(osp.realpath('./')), 'data', 'products', 'ogbn-products')
dataset = PygNodePropPredDataset('ogbn-products', root)
# split_idx contains a dictionary of train, validation and test node indices
split_idx = dataset.get_idx_split()
# predefined ogb evaluator method used for validation of predictions
evaluator = Evaluator(name='ogbn-products')
# lets check the node ids distribution of train, test and val
print('Number of training nodes:', split_idx['train'].size(0))
print('Number of validation nodes:', split_idx['valid'].size(0))
print('Number of test nodes:', split_idx['test'].size(0))
# loading the dataset
data = dataset[0]
# lets check some graph statistics of ogb-product graph
print("Number of nodes in the graph:", data.num_nodes)
print("Number of edges in the graph:", data.num_edges)
print("Node feature matrix with shape:", data.x.shape) # [num_nodes, num_node_features]
print("Graph connectivity in COO format with shape:", data.edge_index.shape) # [2,
num_edges]
print("Target to train against :", data.y.shape)
print("Node feature length", dataset.num_features)
# %%
# checking the number of unique labels
# there are 47 unique categories of product
data.y.unique()
# load integer to real product category from label mapping provided inside the dataset
df =
pd.read_csv('./data/products/ogbn_products/mapping/labelidx2productcategory.csv.gz')
# lets see some of the product categories
# creating a dictionary of product category and corresponding integer label
label_idx, prod_cat = df.iloc[:, 0].values, df.iloc[:, 1].values
label_mapping = dict(zip(label_idx, prod_cat))
# counting the numbers of samples for each category
y = data.y.tolist()
y = list(flatten(y))
count_y = collections.Counter(y)
print(count_y)
df[:10]
train_idx = split_idx['train']
# train_loader = torch_geometric.loader.neighbor_sampler(data.edge_index,
node_idx=train_idx,
# sizes=[15, 10, 5], batch_size=1024,
# shuffle=True)
train_loader = torch_geometric.loader.NeighborSampler(edge_index=data.edge_index,
node_idx=train_idx, sizes=[15, 10, 5],
batch_size=1024, shuffle=True)
class SAGE(torch.nn.Module):
def __init__(self, in_channels, hidden_channels, out_channels, num_layers=3):
super(SAGE, self).__init__()
self.num_layers = num_layers
self.convs = torch.nn.ModuleList()
self.convs.append(SAGEConv(in_channels, hidden_channels))
for _ in range(num_layers - 2):
self.convs.append(SAGEConv(hidden_channels, hidden_channels))
self.convs.append(SAGEConv(hidden_channels, out_channels))
def reset_parameters(self):
for conv in self.convs:
conv.reset_parameters()
def forward(self, x, adjs):
# `train_loader` computes the k-hop neighborhood of a batch of nodes,
# and returns, for each layer, a bipartite graph object, holding the
# bipartite edges `edge_index`, the index `e_id` of the original edges,
# and the size/shape `size` of the bipartite graph.
# Target nodes are also included in the source nodes so that one can
# easily apply skip-connections or add self-loops.
for i, (edge_index, _, size) in enumerate(adjs):
xs = []
x_target = x[:size[1]] # Target nodes are always placed first.
x = self.convs[i]((x, x_target), edge_index)
if i != self.num_layers - 1:
x = F.relu(x)
x = F.dropout(x, p=0.5, training=self.training)
xs.append(x)
if i == 0:
x_all = torch.cat(xs, dim=0)
layer_1_embeddings = x_all
elif i == 1:
x_all = torch.cat(xs, dim=0)
layer_2_embeddings = x_all
elif i == 2:
x_all = torch.cat(xs, dim=0)
layer_3_embeddings = x_all
# return x.log_softmax(dim=-1)
return layer_1_embeddings, layer_2_embeddings, layer_3_embeddings
def inference(self, x_all):
pbar = tqdm(total=x_all.size(0) * self.num_layers)
pbar.set_description('Evaluating')
# Compute representations of nodes layer by layer, using *all*
# available edges. This leads to faster computation in contrast to
# immediately computing the final representations of each batch.
total_edges = 0
for i in range(self.num_layers):
xs = []
for batch_size, n_id, adj in subgraph_loader:
edge_index, _, size = adj.to(device)
total_edges += edge_index.size(1)
x = x_all[n_id].to(device)
x_target = x[:size[1]]
x = self.convs[i]((x, x_target), edge_index)
if i != self.num_layers - 1:
x = F.relu(x)
xs.append(x)
pbar.update(batch_size)
if i == 0:
x_all = torch.cat(xs, dim=0)
layer_1_embeddings = x_all
elif i == 1:
x_all = torch.cat(xs, dim=0)
layer_2_embeddings = x_all
elif i == 2:
x_all = torch.cat(xs, dim=0)
layer_3_embeddings = x_all
pbar.close()
return layer_1_embeddings, layer_2_embeddings, layer_3_embeddings
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = SAGE(dataset.num_features, 256, dataset.num_classes, num_layers=3)
model = model.to(device)
# loading node feature matrix and node labels
x = data.x.to(device)
y = data.y.squeeze().to(device)
def train(epoch):
model.train()
# pbar = tqdm(total=train_idx.size(0))
# pbar.set_description(f'Epoch {epoch:02d}')
total_loss = total_correct = 0
for batch_size, n_id, adjs in train_loader:
# `adjs` holds a list of `(edge_index, e_id, size)` tuples.
adjs = [adj.to(device) for adj in adjs]
optimizer.zero_grad()
l1_emb, l2_emb, l3_emb = model(x[n_id], adjs)
# print("Layer 1 embeddings", l1_emb.shape)
# print("Layer 2 embeddings", l1_emb.shape)
out = l3_emb.log_softmax(dim=-1)
loss = F.nll_loss(out, y[n_id[:batch_size]])
loss.backward()
optimizer.step()
total_loss += float(loss)
total_correct += int(out.argmax(dim=-1).eq(y[n_id[:batch_size]]).sum())
# pbar.update(batch_size)
# pbar.close()
loss = total_loss / len(train_loader)
approx_acc = total_correct / train_idx.size(0)
return loss, approx_acc
optimizer = torch.optim.Adam(model.parameters(), lr=0.003)
for epoch in range(1, 21):
loss, acc = train(epoch)
# print(f'Epoch {epoch:02d}, Loss: {loss:.4f}, Approx. Train: {acc:.4f}')
I am studying with first machine-learning practice.
This is the prediction system of monthly temperature.
train_t has the temperatures and train_x has the weight for each data.
However I have a question where initializing train_x
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from pprint import pprint
x = tf.placeholder(tf.float32,[None,5])
w = tf.Variable(tf.zeros([5,1]))
y = tf.matmul(x,w)
t = tf.placeholder(tf.float32,[None,1])
loss = tf.reduce_sum(tf.square(y-t))
train_step = tf.train.AdamOptimizer().minimize(loss)
sess = tf.Session()
sess.run(tf.initialize_all_variables())
train_t = np.array([5.2,5.7,8.6,14.9,18.2,20.4,25.5,26.4,22.8,17.5,11.1,6.6]) #montly temperature
train_t = train_t.reshape([12,1])
train_x = np.zeros([12,5])
for row, month in enumerate(range(1,13)):
for col, n in enumerate(range(0,5)):
train_x[row][col] = month**n ## why initialize like this??
i = 0
for _ in range(10000):
i += 1
sess.run(train_step,feed_dict={x:train_x,t:train_t})
if i % 1000 == 0:
loss_val = sess.run(loss,feed_dict={x:train_x,t:train_t})
print('step : %d,Loss: %f' % (i,loss_val))
w_val = sess.run(w)
pprint(w_val)
def predict(x):
result = 0.0
for n in range(0,5):
result += w_val[n][0] * x**n
return result
fig = plt.figure()
subplot = fig.add_subplot(1,1,1)
subplot.set_xlim(1,12)
subplot.scatter(range(1,13),train_t)
linex = np.linspace(1,12,100)
liney = predict(linex)
subplot.plot(linex, liney)
However I don't understand here
for row, month in enumerate(range(1,13)): #
for col, n in enumerate(range(0,5)): #
train_x[row][col] = month**n ## why initialize like this??
What does this mean??
There is no comment about this in my book??
Why train_x is initialized here??
In fact, this bloc of code:
train_t = np.array([5.2,5.7,8.6,14.9,18.2,20.4,25.5,26.4,22.8,17.5,11.1,6.6]) #montly temperature
train_t = train_t.reshape([12,1])
train_x = np.zeros([12,5])
for row, month in enumerate(range(1,13)):
for col, n in enumerate(range(0,5)):
train_x[row][col] = month**n
Is the generation of your data. It initialize train_t and train_x which are the data that will be injected into placeholders x and t
train_t is a tensor of temperatures
train_x is a tensor of sort of weight of each temperatures.
They constitute the dataset.
Both train_x and train_t are arrays with your training data. In the array train_t you have the target of your model, while train_x contains the features in input to your model.
The weights of your model (the ones that are trained) are w (which is the only tf.Variable in your code), which is initialized randomly.
The model you are training is a degree 4 (which is the max of range(0, 5)) polynomial on the linear variable month which ranges in range(1, 13). That snipped of code generates the features for a degree 4 polynomial starting from the linear variable month.
I am struggling to implement K-Nearest Neighbor in TensorFlow. I think that either I am overlooking a mistake or doing something terrible wrong.
The following code always predicts Mnist labels as 0.
from __future__ import print_function
import numpy as np
import tensorflow as tf
# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
K = 4
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
# In this example, we limit mnist data
Xtr, Ytr = mnist.train.next_batch(55000) # whole training set
Xte, Yte = mnist.test.next_batch(10000) # whole test set
# tf Graph Input
xtr = tf.placeholder("float", [None, 784])
ytr = tf.placeholder("float", [None, 10])
xte = tf.placeholder("float", [784])
# Euclidean Distance
distance = tf.neg(tf.sqrt(tf.reduce_sum(tf.square(tf.sub(xtr, xte)), reduction_indices=1)))
# Prediction: Get min distance neighbors
values, indices = tf.nn.top_k(distance, k=K, sorted=False)
nearest_neighbors = []
for i in range(K):
nearest_neighbors.append(np.argmax(ytr[indices[i]]))
sorted_neighbors, counts = np.unique(nearest_neighbors, return_counts=True)
pred = tf.Variable(nearest_neighbors[np.argmax(counts)])
# not works either
# neighbors_tensor = tf.pack(nearest_neighbors)
# y, idx, count = tf.unique_with_counts(neighbors_tensor)
# pred = tf.slice(y, begin=[tf.arg_max(count, 0)], size=tf.constant([1], dtype=tf.int64))[0]
accuracy = 0.
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
# loop over test data
for i in range(len(Xte)):
# Get nearest neighbor
nn_index = sess.run(pred, feed_dict={xtr: Xtr, xte: Xte[i, :]})
# Get nearest neighbor class label and compare it to its true label
print("Test", i, "Prediction:", nn_index,
"True Class:", np.argmax(Yte[i]))
# Calculate accuracy
if nn_index == np.argmax(Yte[i]):
accuracy += 1. / len(Xte)
print("Done!")
print("Accuracy:", accuracy)
Any help is greatly appreciated.
So in general it's not a good idea to go to numpy functions while defining your TensorFlow model. That's precisely why your code wasn't working. I have made just two changes to your code. I have replaced np.argmax with tf.argmax. I've also removed the comments from #This doesn't work either.
Here is the complete working code:
from __future__ import print_function
import numpy as np
import tensorflow as tf
# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
K = 4
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
# In this example, we limit mnist data
Xtr, Ytr = mnist.train.next_batch(55000) # whole training set
Xte, Yte = mnist.test.next_batch(10000) # whole test set
# tf Graph Input
xtr = tf.placeholder("float", [None, 784])
ytr = tf.placeholder("float", [None, 10])
xte = tf.placeholder("float", [784])
# Euclidean Distance
distance = tf.negative(tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(xtr, xte)), reduction_indices=1)))
# Prediction: Get min distance neighbors
values, indices = tf.nn.top_k(distance, k=K, sorted=False)
nearest_neighbors = []
for i in range(K):
nearest_neighbors.append(tf.argmax(ytr[indices[i]], 0))
neighbors_tensor = tf.stack(nearest_neighbors)
y, idx, count = tf.unique_with_counts(neighbors_tensor)
pred = tf.slice(y, begin=[tf.argmax(count, 0)], size=tf.constant([1], dtype=tf.int64))[0]
accuracy = 0.
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
# loop over test data
for i in range(len(Xte)):
# Get nearest neighbor
nn_index = sess.run(pred, feed_dict={xtr: Xtr, ytr: Ytr, xte: Xte[i, :]})
# Get nearest neighbor class label and compare it to its true label
print("Test", i, "Prediction:", nn_index,
"True Class:", np.argmax(Yte[i]))
#Calculate accuracy
if nn_index == np.argmax(Yte[i]):
accuracy += 1. / len(Xte)
print("Done!")
print("Accuracy:", accuracy)
I'm struggling with solving this issue and I believe it is due to my data. I'm thinking about this as a few to many regression problem, but there could be a better approach in tensorflow.
Training Data
I have some data generated from a video sequence. For each frame of video I have a distribution of x,y positions for each cluster. There are 157,110 frames and 200,000 clusters. The frames and clusters are the inputs, which are integers and I think could be considered labels (I'll be using another network to learn the sequences of clusters later on). As each histogram is related to both a frame and clusterID, the input is not "one hot". The histograms (outputs) have 19+8 (x+y) bins where each count is rarely above 10, and could be normalized.
A subset of the training data is available here: The first two columns are the frame and clusterID (inputs) and the remaining 19+8 columns are the histograms (outputs).
What is the best network to learn to generate the appropriate histogram for a given frame/clusterID pair?
The following code is my current attempt using an MLP. It does not converge; in fact cost does not decrease at all. Is there something wrong in my implementation, or my choice of MLP, or a lack of scaling in my input data?
#!/usr/bin/python
# This program uses tensorflow to learn cluster probabilities and associate them with frame and cluster IDs
# Arguments
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("clusterProbabilityfile", help="CSV file containing cluster probabilities")
parser.add_argument("trainingIterations", type=int, help="CSV file containing cluster probabilities")
args = parser.parse_args()
# Imports for ML
import tensorflow as tf
import numpy as np
from tensorflow.python.framework import dtypes
# Imports for loading CSV file
from tensorflow.python.platform import gfile
import csv
# Global vars
numInputUnits = 2;
numOutputUnits = 19+8
numHiddenUnits = (numOutputUnits-numInputUnits)/2
workingDirectory = args.clusterProbabilityfile.split('/')[0]+"/"
columnSplit = 2 # Column number that splits
# Shuffle training set
def shuffleTrainingSet(trainingSet):
trainingIndecies = np.arange(len(trainingSet.data)) # assumes len(data) == len(target)
np.random.shuffle(trainingIndecies) # shuffle indecies
data = trainingSet.data[trainingIndecies]
target = trainingSet.target[trainingIndecies]
training_set = tf.contrib.learn.datasets.base.Dataset(data=data, target=target)
return training_set
# Load training data from CSV file, convert to numpy arrays and construct Dataset
# Modified from tf.contrib.learn.datasets.base.load_csv_without_header
# Should these be randomized???
with gfile.Open(args.clusterProbabilityfile) as csv_file:
data_file = csv.reader(csv_file)
data, target = [], []
for row in data_file:
target.append(row[columnSplit+1:]) # All elements past the split column.
data.append(row[:columnSplit]) # All elements before and including the split column.
target = np.array(target, dtype=int)
data = np.array(data, dtype=int)
training_set = tf.contrib.learn.datasets.base.Dataset(data=data, target=target)
training_set = shuffleTrainingSet(training_set)
# Construct computation graph
# MLP approach (from https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3_NeuralNetworks/multilayer_perceptron.py)
# Single hidden layer!
inputVec = tf.placeholder(tf.float32, [None, numInputUnits])
outputVec = tf.placeholder(tf.float32, [None, numOutputUnits])
# Weights
hiddenWeights = tf.Variable(tf.random_normal([numInputUnits, numHiddenUnits])) # inputUnits -> hiddenUnits
outputWeights = tf.Variable(tf.random_normal([numHiddenUnits, numOutputUnits])) # hiddenUnits -> outputUnits
# Biases
hiddenBiases = tf.Variable(tf.random_normal([numHiddenUnits]))
outputBiases = tf.Variable(tf.random_normal([numOutputUnits]))
# Contruct MLP from layers
hiddenLayer = tf.add(tf.matmul(inputVec, hiddenWeights), hiddenBiases) # input * weight + bias = hidden
hiddenLayer = tf.nn.relu(hiddenLayer) # RELU Activation function for hidden layer.
outputLayer = tf.add(tf.matmul(hiddenLayer, outputWeights), outputBiases) # hidden * weight + bias = output
# loss and optimizer
#cross_entropy = -(outputVec * tf.log(outputLayer) + (1 - outputVec) * tf.log(1 - outputLayer))
#cost = tf.reduce_mean(cross_entropy)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(outputLayer, outputVec))
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost)
# Compute graph
sess = tf.Session()
sess.run(tf.initialize_all_variables())
for epoch in range(args.trainingIterations):
training_set = shuffleTrainingSet(training_set) # Reshuffle for each epoch.
epochCost = sess.run(cost, feed_dict={inputVec: training_set.data, outputVec: training_set.target})
print("{:d}\t{:f}".format(epoch, epochCost))
# Evaluate model
correct_prediction = tf.equal(tf.argmax(outputLayer,1), tf.argmax(outputVec,1)) # compare output layer with target output vector.
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print("Cost:", sess.run(cost,feed_dict={inputVec: training_set.data, outputVec: training_set.target}))
print("Accuracy:", sess.run(accuracy,feed_dict={inputVec: training_set.data, outputVec: training_set.target}))
I am trying to make a simple MLP to predict values of a pixel of an image - original blog .
Here's my earlier attempt using Keras in python - link
I've tried to do the same in tensorflow, but I am getting very large output values (~10^12) when they should be less than 1.
Here's my code:
import numpy as np
import cv2
from random import shuffle
import tensorflow as tf
'''
Image preprocessing
'''
image_file = cv2.imread("Mona Lisa.jpg")
h = image_file.shape[0]
w = image_file.shape[1]
preX = []
preY = []
for i in xrange(h):
for j in xrange(w):
preX.append([i,j])
preY.append(image_file[i,j,:].astype('float32')/255.0)
print preX[:5], preY[:5]
zipped = [i for i in zip(preX,preY)]
shuffle(zipped)
X_train = np.array([i for (i,j) in zipped]).astype('float32')
Y_train = np.array([j for (i,j) in zipped]).astype('float32')
print X_train[:10], Y_train[:10]
'''
Tensorflow code
'''
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
x = tf.placeholder(tf.float32, shape=[None,2])
y = tf.placeholder(tf.float32, shape=[None,3])
'''
Layers
'''
w1 = weight_variable([2,300])
b1 = bias_variable([300])
L1 = tf.nn.relu(tf.matmul(X_train,w1)+b1)
w2 = weight_variable([300,3])
b2 = bias_variable([3])
y_model = tf.matmul(L1,w2)+b2
'''
Training
'''
# criterion
MSE = tf.reduce_mean(tf.square(tf.sub(y,y_model)))
# trainer
train_op = tf.train.GradientDescentOptimizer(learning_rate = 0.01).minimize(MSE)
nb_epochs = 10
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)
cost = 0
for i in range(nb_epochs):
sess.run(train_op, feed_dict ={x: X_train, y: Y_train})
cost += sess.run(MSE, feed_dict ={x: X_train, y: Y_train})
cost /= nb_epochs
print cost
'''
Prediction
'''
pred = sess.run(y_model,feed_dict = {x:X_train})*255.0
print pred[:10]
output_image = []
index = 0
h = image_file.shape[0]
w = image_file.shape[1]
for i in xrange(h):
row = []
for j in xrange(w):
row.append(pred[index])
index += 1
row = np.array(row)
output_image.append(row)
output_image = np.array(output_image)
output_image = output_image.astype('uint8')
cv2.imwrite('out_mona_300x3_tf.png',output_image)
First of all, I think that instead of running the train_op and then the MSE
you can run both ops in a list and reduce your computational cost significantly.
for i in range(nb_epochs):
cost += sess.run([MSE, train_op], feed_dict ={x: X_train, y: Y_train})
Secondly, I suggest always writing out your cost function so you can see what is going on during the training phase. Either manually print it out or use tensorboard to log your cost and plot it (you can find examples on the official tf page).
You can also monitor your weights to see that they aren't blowing up.
A few things you can try:
Reduce learning rate, add regularization to weights.
Check that your training set (pixels) really consist of the values that
you expect them to.
You give the input layer weights and the output layer weights the same names w and b, so it seems something goes wrong in the gradient-descent procedure. Actually I'm surprised tensorflow doesn't issue an error or at leas a warning (or am I missing something?)