My tensorflow Convolutional Neural Network does not train

My tensorflow Convolutional Neural Network does not train - python

I tried to implement a class based convolutional neural network for face expression recognition data on kaggle using tensorflow. However, for some reason my network does not train and I keep getting the same cost and error rates at each iteration.
I tried using one hot vectors for labels, changing hyperparameters but they did not have any effect on the result.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.utils import shuffle
def get_data():
df = pd.read_csv('../large_files/fer2013/fer2013.csv')
Y = df.emotion.to_numpy()
XX = df.pixels
X = []
for i in range(len(XX)):
X.append(XX[i].split())
X = np.array(X).astype(np.float)
Z = df.Usage
train = (Z == 'Training').to_list()
test = [not i for i in train]
Xtrain = X[train].astype(np.float32)
Xtrain = Xtrain.reshape((Xtrain.shape[0], int(np.sqrt(Xtrain.shape[1])), int(np.sqrt(Xtrain.shape[1])), 1))
Xtest = X[test].astype(np.float32)
Xtest = Xtest.reshape((Xtest.shape[0], int(np.sqrt(Xtest.shape[1])), int(np.sqrt(Xtest.shape[1])), 1))
Ytrain = Y[train].astype(np.int32)
Ytest = Y[test].astype(np.int32)
return Xtrain / 255, Xtest / 255, Ytrain, Ytest
def convpool(X, W, b,poolsz):
conv_out = tf.nn.conv2d(X, W, strides = [1,1,1,1], padding = 'SAME')
conv_out = tf.nn.bias_add(conv_out, b)
pool_out = tf.nn.max_pool(conv_out, ksize=[1,poolsz,poolsz,1], strides=[1,poolsz,poolsz,1], padding = 'SAME')
return tf.nn.relu(pool_out)
def init_filter(shape):
w = np.random.rand(*shape) * np.sqrt(2 / np.prod(shape[:-1]))
return w.astype(np.float32)
def error_rate(Y,T):
return np.mean(Y != T)
class FullyConnectedLayer():
def __init__(self, M1, M2, activation = tf.nn.relu):
W = np.random.randn(M1,M2) / np.sqrt(M1 + M2)
self.W = tf.Variable(W.astype(np.float32))
b = np.zeros(M2)
self.b = tf.Variable(b.astype(np.float32))
self.activation = activation
def forward(self, X):
if self.activation == None:
return tf.matmul(X, self.W) + self.b
else:
return self.activation(tf.matmul(X, self.W) + self.b)
class ConvolutionLayer():
def __init__(self, filter_shape, b, poolsz = 2):
W = init_filter(filter_shape)
self.W = tf.Variable(W)
self.b = tf.Variable(b.astype(np.float32))
self.poolsize = poolsz
def forward(self, X):
return convpool(X, self.W, self.b, self.poolsize)
class CNN():
def __init__(self, filter_shapes, dense_layer_sizes):
self.filter_shapes = filter_shapes #List of shapes
self.dense_layer_sizes = dense_layer_sizes # List of hidden units for dense layers
def fit(self, trainset, testset, learning_rate = 0.001, momentum = 0.9, decay = 0.99, batch_sz = 200, poolsize = 2):
learning_rate = np.float32(learning_rate)
momentum = np.float32(momentum)
decay = np.float32(decay)
Xtrain = trainset[0]
Ytrain = trainset[1]
Xtest = testset[0]
Ytest = testset[1]
K = len(set(Ytrain))
# Crop Train and Test sets for divisibility to batch size
Ntrain = len(Ytrain)
Ntrain = Ntrain // batch_sz * batch_sz
Xtrain = Xtrain[:Ntrain,]
Ytrain = Ytrain[:Ntrain]
Ntest = len(Ytest)
Ntest = Ntest//batch_sz * batch_sz
Xtest = Xtest[:Ntest,]
Ytest = Ytest[:Ntest]
X_shape = Xtrain.shape
width = X_shape[1]
height = X_shape[2]
# Create Convolution Layers and Store Them
self.convolutionlayers = []
for shape in self.filter_shapes:
b = np.zeros(shape[-1], dtype = np.float32)
conv = ConvolutionLayer(shape, b, poolsz = poolsize)
self.convolutionlayers.append(conv)
# Size of both width and height is halved in each max pooling so in input size of first fully connected layer is found like this
final_filter_shape = self.filter_shapes[-1]
num_convs = len(self.convolutionlayers)
M1 = int((width/(2**num_convs)) * (height/(2**num_convs)) * final_filter_shape[-1])
# Create Fully Connected Layers and Store Them
self.vanillalayers = []
for M2 in self.dense_layer_sizes:
layer = FullyConnectedLayer(M1,M2)
self.vanillalayers.append(layer)
M1 = M2
final_layer = FullyConnectedLayer(M1, K, activation = None)
self.vanillalayers.append(final_layer)
self.AllLayers = self.convolutionlayers + self.vanillalayers
tfX = tf.placeholder(dtype=tf.float32, shape= (batch_sz, width, height, 1))
tfT = tf.placeholder(dtype=tf.int32, shape = (batch_sz,))
Yish = self.forward(tfX)
cost = tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(logits = Yish, labels=tfT))
train_op = tf.train.RMSPropOptimizer(learning_rate=learning_rate, decay=decay, momentum=momentum).minimize(cost)
predict_op = self.predict(tfX)
max_epoch = 10
print_period = 20
num_batches = Ntrain // batch_sz
TestCosts = []
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for i in range(max_epoch):
Xtrain, Ytrain = shuffle(Xtrain, Ytrain)
for j in range(num_batches):
Xbatch = Xtrain[j * batch_sz: (j + 1)*batch_sz,]
Ybatch = Ytrain[j * batch_sz: (j + 1)*batch_sz,]
sess.run(train_op, feed_dict = {tfX : Xbatch, tfT : Ybatch})
if j % print_period == 0:
test_cost = 0
prediction = np.zeros(Ntest)
for k in range(Ntest // batch_sz):
Xtestbatch = Xtest[k*batch_sz:(k*batch_sz + batch_sz),]
Ytestbatch = Ytest[k*batch_sz:(k*batch_sz + batch_sz),]
test_cost += sess.run(cost, feed_dict={tfX: Xtestbatch, tfT: Ytestbatch})
prediction[k*batch_sz:(k*batch_sz + batch_sz)] = sess.run(
predict_op, feed_dict={tfX: Xtestbatch})
err = error_rate(prediction, Ytest)
print("Cost / err at iteration i=%d, j=%d: %.3f / %.3f" % (i, j, test_cost, err))
TestCosts.append(test_cost)
plt.plot(TestCosts)
plt.show()
def forward(self, X):
Z = X
count = 0
for layer in self.AllLayers:
# If next layer is fully connected layer, reshape Z
if count >= len(self.convolutionlayers):
Z_shape = Z.get_shape().as_list()
Z = tf.reshape(Z, [Z_shape[0], np.prod(Z_shape[1:])])
Z = layer.forward(Z)
count += 1
return Z
def predict(self, X):
out = self.forward(X)
return tf.math.argmax(out, axis = 1)
def main():
Xtrain, Xtest, Ytrain, Ytest = get_data()
trainset = [Xtrain, Ytrain]
testset = [Xtest, Ytest]
filtershapes = [(5,5,1,10), (5,5,10,20), (5,5,20,40)]
fullylayers = [500,500]
cnn = CNN(filtershapes, fullylayers)
cnn.fit(trainset, testset)
if __name__ == '__main__':
main()

Related

tensorflow.keras.layers.RNN: TypeError: cannot unpack non-iterable RNN object

This error was not happening with Tensorflow 1.0
cell_1 = tgcnCell(gru_units, adj, num_nodes=num_nodes)
cell = tf.compat.v1.nn.rnn_cell.MultiRNNCell([cell_1], state_is_tuple=True)
outputs, states = keras.layers.RNN(cell, return_sequences=True,
return_state=True)
Whole Code:
# -*- coding: utf-8 -*-
import pickle as pkl
import tensorflow as tf
import pandas as pd
import numpy as np
import math
import os
import numpy.linalg as la
from tensorflow import keras
from tensorflow.keras import layers
#from tensorflow.contrib.rnn import RNNCell
from tensorflow.compat.v1.nn.rnn_cell import RNNCell
import scipy.sparse as sp
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Layer
from sklearn.metrics import mean_squared_error, mean_absolute_error
import time
from keras import backend
def normalized_adj(adj):
adj = sp.coo_matrix(adj)
rowsum = np.array(adj.sum(1))
d_inv_sqrt = np.power(rowsum, -0.5).flatten()
d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
normalized_adj = adj.dot(d_mat_inv_sqrt).transpose().dot(
d_mat_inv_sqrt).tocoo()
normalized_adj = normalized_adj.astype(np.float32)
return normalized_adj
def sparse_to_tuple(mx):
mx = mx.tocoo()
coords = np.vstack((mx.row, mx.col)).transpose()
L = tf.SparseTensor(coords, mx.data, mx.shape)
return tf.sparse.reorder(L)
def calculate_laplacian(adj, lambda_max=1):
adj = normalized_adj(adj + sp.eye(adj.shape[0]))
adj = sp.csr_matrix(adj)
adj = adj.astype(np.float32)
return sparse_to_tuple(adj)
def weight_variable_glorot(input_dim, output_dim, name=""):
init_range = np.sqrt(6.0 / (input_dim + output_dim))
initial = tf.random.uniform([input_dim, output_dim], minval=-init_range,
maxval=init_range, dtype=tf.float32)
return tf.Variable(initial, name=name)
def load_los_data(dataset):
# los_adj = pd.read_csv(r'/content/drive/MyDrive/tgcn/data/adj_matrix_clustered.csv',header=None)
los_adj = pd.read_csv(r'adj_matrix_clustered.csv', header=None)
adj = np.mat(los_adj)
# los_tf = pd.read_csv(r'/content/drive/MyDrive/tgcn/data/Cluster_series_date.csv', parse_dates=['5 Minutes'], index_col='5 Minutes')
los_tf = pd.read_csv(r'Cluster_series_date.csv', parse_dates=[
'5 Minutes'], index_col='5 Minutes')
return los_tf, adj
def preprocess_data(data, time_len, rate, seq_len, pre_len):
train_size = int(time_len * rate)
train_data = data[0:train_size]
test_data = data[train_size:time_len]
trainX, trainY, testX, testY = [], [], [], []
for i in range(len(train_data) - seq_len - pre_len):
a = train_data[i: i + seq_len + pre_len]
trainX.append(a[0: seq_len])
trainY.append(a[seq_len: seq_len + pre_len])
for i in range(len(test_data) - seq_len - pre_len):
b = test_data[i: i + seq_len + pre_len]
testX.append(b[0: seq_len])
testY.append(b[seq_len: seq_len + pre_len])
trainX1 = np.array(trainX)
trainY1 = np.array(trainY)
testX1 = np.array(testX)
testY1 = np.array(testY)
return trainX1, trainY1, testX1, testY1
def _concat(prefix, suffix, static=False):
"""Concat that enables int, Tensor, or TensorShape values.
This function takes a size specification, which can be an integer, a
TensorShape, or a Tensor, and converts it into a concatenated Tensor
(if static = False) or a list of integers (if static = True).
Args:
prefix: The prefix; usually the batch size (and/or time step size).
(TensorShape, int, or Tensor.)
suffix: TensorShape, int, or Tensor.
static: If `True`, return a python list with possibly unknown dimensions.
Otherwise return a `Tensor`.
Returns:
shape: the concatenation of prefix and suffix.
Raises:
ValueError: if `suffix` is not a scalar or vector (or TensorShape).
ValueError: if prefix or suffix was `None` and asked for dynamic
Tensors out.
"""
if isinstance(prefix, tf.Tensor):
p = prefix
p_static = tf.get_static_value(prefix)
if p.shape.ndims == 0:
p = tf.compat.v1.expand_dims(p, 0)
elif p.shape.ndims != 1:
raise ValueError(
"Prefix tensor must be either a scalar or vector, "
f"but received tensor: {p}")
else:
p = tf.TensorShape(prefix)
p_static = p.as_list() if p.ndims is not None else None
p = (
tf.constant(p.as_list(), dtype=tf.int32)
if p.is_fully_defined() else None)
if isinstance(suffix, tf.Tensor):
s = suffix
s_static = tf.get_static_value(suffix)
if s.shape.ndims == 0:
s = tf.compat.v1.expand_dims(s, 0)
elif s.shape.ndims != 1:
raise ValueError("suffix tensor must be either a scalar or vector, "
f"but received tensor: {s}")
else:
s = tf.TensorShape(suffix)
s_static = s.as_list() if s.ndims is not None else None
s = (
tf.constant(s.as_list(), dtype=tf.int32)
if s.is_fully_defined() else None)
if static:
shape = tf.TensorShape(p_static).concatenate(s_static)
shape = shape.as_list() if shape.ndims is not None else None
else:
if p is None or s is None:
raise ValueError(
"Prefix or suffix can't be None. "
f"Received prefix = {prefix} and suffix = {suffix}")
shape = tf.concat((p, s), 0)
return shape
def _zero_state_tensors(state_size, batch_size, dtype):
"""Create tensors of zeros based on state_size, batch_size, and dtype."""
def get_state_shape(s):
"""Combine s with batch_size to get a proper tensor shape."""
c = _concat(batch_size, s)
size = tf.zeros(c, dtype=dtype)
if not tf.executing_eagerly():
c_static = _concat(batch_size, s, static=True)
size.set_shape(c_static)
return size
return tf.nest.map_structure(get_state_shape, state_size)
class tgcnCell(RNNCell):
"""Temporal Graph Convolutional Network """
def call(self, inputs, **kwargs):
pass
def __init__(self, num_units, adj, num_nodes, input_size=None,
act=tf.nn.tanh, reuse=None):
# self.state_size = num_units
super(tgcnCell, self).__init__(_reuse=reuse)
self._act = act
self._nodes = num_nodes
self._units = num_units
self._adj = []
self._adj.append(calculate_laplacian(adj))
#property
def state_size(self):
return self._nodes * self._units
#property
def output_size(self):
return self._units
def __call__(self, inputs, state, scope=None):
with tf.compat.v1.variable_scope(scope or "tgcn"):
with tf.compat.v1.variable_scope("gates"):
value = tf.nn.sigmoid(
self._gc(inputs, state, 2 * self._units, bias=1.0, scope=scope))
r, u = tf.split(value=value, num_or_size_splits=2, axis=1)
with tf.compat.v1.variable_scope("candidate"):
r_state = r * state
c = self._act(self._gc(inputs, r_state,
self._units, scope=scope))
new_h = u * state + (1 - u) * c
return new_h, new_h
def _gc(self, inputs, state, output_size, bias=0.0, scope=None):
# inputs:(-1,num_nodes)
inputs = tf.expand_dims(inputs, 2)
# state:(batch,num_node,gru_units)
state = tf.reshape(state, (-1, self._nodes, self._units))
# concat
x_s = tf.concat([inputs, state], axis=2)
# input_size = x_s.get_shape()[2].value
input_size = x_s.get_shape()[2]
# (num_node,input_size,-1)
x0 = tf.transpose(x_s, perm=[1, 2, 0])
x0 = tf.reshape(x0, shape=[self._nodes, -1])
scope = tf.compat.v1.get_variable_scope()
with tf.compat.v1.variable_scope(scope):
for m in self._adj:
x1 = tf.sparse.sparse_dense_matmul(m, x0)
# print(x1)
x = tf.reshape(x1, shape=[self._nodes, input_size, -1])
x = tf.transpose(x, perm=[2, 0, 1])
x = tf.reshape(x, shape=[-1, input_size])
weights = tf.compat.v1.get_variable(
'weights', [input_size, output_size], initializer=tf.initializers.GlorotNormal()
)
# (batch_size * self._nodes, output_size)
x = tf.matmul(x, weights)
biases = tf.compat.v1.get_variable(
"biases", [output_size], initializer=tf.constant_initializer(bias, dtype=tf.float32))
x = tf.nn.bias_add(x, biases)
x = tf.reshape(x, shape=[-1, self._nodes, output_size])
x = tf.reshape(x, shape=[-1, self._nodes * output_size])
return x
time_start = time.time()
###### Settings ######
#flags = tf.app.flags
#FLAGS = tf.app.flags.FLAGS
#flags.DEFINE_float('learning_rate', 0.001, 'Initial learning rate.')
#flags.DEFINE_integer('training_epoch', 1, 'Number of epochs to train.')
#flags.DEFINE_integer('gru_units', 64, 'hidden units of gru.')
#flags.DEFINE_integer('seq_len',12 , ' time length of inputs.')
#flags.DEFINE_integer('pre_len', 3, 'time length of prediction.')
#flags.DEFINE_float('train_rate', 0.8, 'rate of training set.')
#flags.DEFINE_integer('batch_size', 32, 'batch size.')
#flags.DEFINE_string('dataset', 'los', 'sz or los.')
#flags.DEFINE_string('model_name', 'tgcn', 'tgcn')
model_name = 'tgcn'
data_name = 'los'
train_rate = 0.8
seq_len = 12
output_dim = pre_len = 3
batch_size = 32
lr = 0.001
training_epoch = 1
gru_units = 64
###### load data ######
if data_name == 'los':
data, adj = load_los_data('los')
time_len = data.shape[0]
num_nodes = data.shape[1]
data1 = np.mat(data, dtype=np.float32)
# normalization
max_value = np.max(data1)
data1 = data1/max_value
trainX, trainY, testX, testY = preprocess_data(
data1, time_len, train_rate, seq_len, pre_len)
totalbatch = int(trainX.shape[0]/batch_size)
training_data_count = len(trainX)
def TGCN(_X, _weights, _biases):
###
cell_1 = tgcnCell(gru_units, adj, num_nodes=num_nodes)
# cell = tf.nn.rnn_cell.MultiRNNCell([cell_1], state_is_tuple=True)
cell = tf.compat.v1.nn.rnn_cell.MultiRNNCell([cell_1], state_is_tuple=True)
_X = tf.unstack(_X, axis=1)
# outputs, states = tf.compat.v1.nn.static_rnn(cell, _X, dtype=tf.float32)
output, states = keras.layers.RNN(cell, return_sequences=True,
return_state=True)
m = []
for i in outputs:
o = tf.reshape(i, shape=[-1, num_nodes, gru_units])
o = tf.reshape(o, shape=[-1, gru_units])
m.append(o)
last_output = m[-1]
output = tf.matmul(last_output, _weights['out']) + _biases['out']
output = tf.reshape(output, shape=[-1, num_nodes, pre_len])
output = tf.transpose(output, perm=[0, 2, 1])
output = tf.reshape(output, shape=[-1, num_nodes])
return output, m, states
tf.compat.v1.disable_eager_execution()
###### placeholders ######
inputs = tf.compat.v1.placeholder(tf.float32, shape=[None, seq_len, num_nodes])
labels = tf.compat.v1.placeholder(tf.float32, shape=[None, pre_len, num_nodes])
#inputs = tf.keras.Input(type_spec=tf.TensorSpec(shape=[None, seq_len, num_nodes], dtype=tf.float32, name=None))
#labels = tf.keras.Input(type_spec=tf.TensorSpec(shape=[None, pre_len, num_nodes], dtype=tf.float32, name=None))
# inputs = tf.keras.Input(shape=(None, seq_len, num_nodes)) #this is alternative to using placeholder but gives error about shape of state with it
#labels = tf.keras.Input(shape=(None, pre_len, num_nodes))
# Graph weights
weights = {
'out': tf.Variable(tf.random.normal([gru_units, pre_len], mean=1.0), name='weight_o')}
biases = {
'out': tf.Variable(tf.random.normal([pre_len]), name='bias_o')}
if model_name == 'tgcn':
pred, ttts, ttto = TGCN(inputs, weights, biases)
y_pred = pred
###### optimizer ######
lambda_loss = 0.0015
Lreg = lambda_loss * sum(tf.nn.l2_loss(tf_var)
for tf_var in tf.trainable_variables())
label = tf.reshape(labels, [-1, num_nodes])
# loss
loss = tf.reduce_mean(tf.nn.l2_loss(y_pred-label) + Lreg)
# rmse
error = tf.sqrt(tf.reduce_mean(tf.square(y_pred-label)))
optimizer = tf.train.AdamOptimizer(lr).minimize(loss)
###### Initialize session ######
variables = tf.global_variables()
saver = tf.train.Saver(tf.global_variables())
#sess = tf.Session()
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
sess.run(tf.global_variables_initializer())
out = 'out/%s' % (model_name)
#out = 'out/%s_%s'%(model_name,'perturbation')
path1 = '%s_%s_lr%r_batch%r_unit%r_seq%r_pre%r_epoch%r' % (
model_name, data_name, lr, batch_size, gru_units, seq_len, pre_len, training_epoch)
path = os.path.join(out, path1)
if not os.path.exists(path):
os.makedirs(path)
###### evaluation ######
def evaluation(a, b):
rmse = math.sqrt(mean_squared_error(a, b))
mae = mean_absolute_error(a, b)
F_norm = la.norm(a-b, 'fro')/la.norm(a, 'fro')
r2 = 1-((a-b)**2).sum()/((a-a.mean())**2).sum()
var = 1-(np.var(a-b))/np.var(a)
return rmse, mae, 1-F_norm, r2, var
x_axe, batch_loss, batch_rmse, batch_pred = [], [], [], []
test_loss, test_rmse, test_mae, test_acc, test_r2, test_var, test_pred = [
], [], [], [], [], [], []
for epoch in range(training_epoch):
for m in range(totalbatch):
mini_batch = trainX[m * batch_size: (m+1) * batch_size]
mini_label = trainY[m * batch_size: (m+1) * batch_size]
_, loss1, rmse1, train_output = sess.run([optimizer, loss, error, y_pred],
feed_dict={inputs: mini_batch, labels: mini_label})
batch_loss.append(loss1)
batch_rmse.append(rmse1 * max_value)
# Test completely at every epoch
loss2, rmse2, test_output = sess.run([loss, error, y_pred],
feed_dict={inputs: testX, labels: testY})
test_label = np.reshape(testY, [-1, num_nodes])
rmse, mae, acc, r2_score, var_score = evaluation(test_label, test_output)
test_label1 = test_label * max_value
test_output1 = test_output * max_value
test_loss.append(loss2)
test_rmse.append(rmse * max_value)
test_mae.append(mae * max_value)
test_acc.append(acc)
test_r2.append(r2_score)
test_var.append(var_score)
test_pred.append(test_output1)
print('Iter:{}'.format(epoch),
'train_rmse:{:.4}'.format(batch_rmse[-1]),
'test_loss:{:.4}'.format(loss2),
'test_rmse:{:.4}'.format(rmse),
'test_acc:{:.4}'.format(acc))
if (epoch % 500 == 0):
saver.save(sess, path+'/model_100/TGCN_pre_%r' %
epoch, global_step=epoch)
time_end = time.time()
print(time_end-time_start, 's')
############## visualization ###############
b = int(len(batch_rmse)/totalbatch)
batch_rmse1 = [i for i in batch_rmse]
train_rmse = [(sum(batch_rmse1[i*totalbatch:(i+1)*totalbatch])/totalbatch)
for i in range(b)]
batch_loss1 = [i for i in batch_loss]
train_loss = [(sum(batch_loss1[i*totalbatch:(i+1)*totalbatch])/totalbatch)
for i in range(b)]
index = test_rmse.index(np.min(test_rmse))
test_result = test_pred[index]
var = pd.DataFrame(test_result)
var.to_csv(path+'/test_result.csv', index=False, header=False)
# plot_result(test_result,test_label1,path)
# plot_error(train_rmse,train_loss,test_rmse,test_acc,test_mae,path)
print('min_rmse:%r' % (np.min(test_rmse)),
'min_mae:%r' % (test_mae[index]),
'max_acc:%r' % (test_acc[index]),
'r2:%r' % (test_r2[index]),
'var:%r' % test_var[index])
def plot_result(test_result, test_label1, path):
# all test result visualization
fig1 = plt.figure(figsize=(7, 1.5))
a_pred = test_result[:, 0]
a_true = test_label1[:, 0]
plt.plot(a_pred, 'r-', label='prediction')
plt.plot(a_true, 'b-', label='true')
plt.legend(loc='best', fontsize=10)
plt.savefig(path+'/test_all.jpg')
plt.show()
# oneday test result visualization
fig1 = plt.figure(figsize=(7, 1.5))
a_pred = test_result[0:96, 0]
a_true = test_label1[0:96, 0]
plt.plot(a_pred, 'r-', label="prediction")
plt.plot(a_true, 'b-', label="true")
plt.legend(loc='best', fontsize=10)
plt.savefig(path+'/test_oneday.jpg')
plt.show()
def plot_error(train_rmse, train_loss, test_rmse, test_acc, test_mae, path):
###train_rmse & test_rmse
fig1 = plt.figure(figsize=(5, 3))
plt.plot(train_rmse, 'r-', label="train_rmse")
plt.plot(test_rmse, 'b-', label="test_rmse")
plt.legend(loc='best', fontsize=10)
plt.savefig(path+'/rmse.jpg')
plt.show()
#### train_loss & train_rmse
fig1 = plt.figure(figsize=(5, 3))
plt.plot(train_loss, 'b-', label='train_loss')
plt.legend(loc='best', fontsize=10)
plt.savefig(path+'/train_loss.jpg')
plt.show()
fig1 = plt.figure(figsize=(5, 3))
plt.plot(train_rmse, 'b-', label='train_rmse')
plt.legend(loc='best', fontsize=10)
plt.savefig(path+'/train_rmse.jpg')
plt.show()
# accuracy
fig1 = plt.figure(figsize=(5, 3))
plt.plot(test_acc, 'b-', label="test_acc")
plt.legend(loc='best', fontsize=10)
plt.savefig(path+'/test_acc.jpg')
plt.show()
# rmse
fig1 = plt.figure(figsize=(5, 3))
plt.plot(test_rmse, 'b-', label="test_rmse")
plt.legend(loc='best', fontsize=10)
plt.savefig(path+'/test_rmse.jpg')
plt.show()
# mae
fig1 = plt.figure(figsize=(5, 3))
plt.plot(test_mae, 'b-', label="test_mae")
plt.legend(loc='best', fontsize=10)
plt.savefig(path+'/test_mae.jpg')
plt.show()
Error it gives:
Traceback (most recent call last):
File "C:\Users\shiva\Desktop\f\ml_js\script.py", line 322, in <module>
pred, ttts, ttto = TGCN(inputs, weights, biases)
File "C:\Users\shiva\Desktop\f\ml_js\script.py", line 288, in TGCN
output, states = keras.layers.RNN(cell, return_sequences=True,
TypeError: cannot unpack non-iterable RNN object
And I know that this is happening because the result of RNN call is an object which is not iterable. For some reason, this error started happening in Tensorflow 2.0
I need the output and state from the RNN call

In pytorch, self-made dataset and testing dataset seem to exhaust all RAM

In pytorch, self-made dataset and testing dataset seem to exhaust all RAM
I am new to pytorch and I wrote a ResNet program in pytorch on MNIST for an experiment.
If I use the data loader as below, it is fine:
import torch as pt
from torch.utils.data import DataLoader, TensorDataset
import torchvision as ptv
mnist_train = ptv.datasets.MNIST(ROOT_DIR,
train=True,
transform=ptv.transforms.ToTensor(),
download=False)
dl = pt.utils.data.DataLoader(dataset=mnist_train,
batch_size=BATCH_SIZE,
shuffle=True,
drop_last=True)
If I use a self-made dataset as below to use a validation set at each iteration, the program will exhaust all my RAM. The testing set is not used in each iteration, but at the end to evaluate the model.
mnist_test = ptv.datasets.MNIST(ROOT_DIR,
train=False,
transform=ptv.transforms.ToTensor(),
download=False)
M_TEST, PIC_H, PIC_W = mnist_test.data.shape
x_test = mnist_test.data.double() / 255.
y_test = mnist_test.targets
a = pt.randperm(M_TEST) # ATTENTION pt.randperm
x_test = x_test[a]
y_test = y_test[a]
VAL_RATE = 0.1
M_VAL = int(np.ceil(M_TEST * VAL_RATE))
M_TEST -= M_VAL
x_test, x_val = pt.split(x_test, (M_TEST, M_VAL))
y_test, y_val = pt.split(y_test, (M_TEST, M_VAL))
x_test = x_test.view(-1, 1, PIC_H, PIC_W).double()
x_val = x_val.view(-1, 1, PIC_H, PIC_W).double()
dl_test = DataLoader(TensorDataset(x_test, y_test),
batch_size=BATCH_SIZE)
def acc(ht, yt):
return (pt.argmax(ht, 1) == yt.long()).double().mean()
# in iteration:
for epoch in range(N_EPOCHS):
for i, (bx, by) in enumerate(dl):
model.train(True)
optim.zero_grad()
bx = bx.view(-1, 1, PIC_H, PIC_W).double()
ht = model(bx)
cost = criterion(ht, by)
cost.backward()
optim.step()
model.train(False)
accv = acc(ht, by)
ht_val = model(x_val)
val_cost = criterion(ht_val, y_val)
val_acc = acc(ht_val, y_val)
So I suspect only the ptv.datasets.MNIST and the pt.utils.data.DataLoader is available, so I removed the usage of my self-made validation set at each iteration; and the RAM usage is normal after the removal. But the test progress still exhaust all my RAM even I only use the ptv.datasets.MNIST and the pt.utils.data.DataLoader as below:
mnist_test = ptv.datasets.MNIST(ROOT_DIR,
train=False,
transform=ptv.transforms.ToTensor(),
download=False)
dl_test = pt.utils.data.DataLoader(dataset=mnist_test,
batch_size=BATCH_SIZE,
shuffle=False,
drop_last=True)
test_cost_avg = 0.
test_acc_avg = 0.
GROUP = int(np.ceil(M_TEST / BATCH_SIZE / 10))
for i, (bx, by) in enumerate(dl_test):
bx = bx.view(-1, 1, PIC_H, PIC_W).double()
ht = model(bx)
test_cost_avg += criterion(ht, by)
test_acc_avg += acc(ht, by)
if i % GROUP == 0:
print(f'Testing # {i + 1}')
if i % GROUP != 0:
print(f'Testing # {i + 1}')
test_cost_avg /= i + 1
test_acc_avg /= i + 1
print(f'Tested: cost = {test_cost_avg}, acc = {test_acc_avg}')
print('Over')
Please give me a help. Thanks a lot!
Update:
I suspect there is something wrong with my model, because I have a simple CNN model on self-made dataset from pytorchvision's MNIST does not have this RAM exhaustion problem. So I paste my model in this problem as below FYI:
def my_conv(in_side, in_ch, out_ch, kernel, stride, padding='same'):
if 'same' == padding:
ps = kernel - 1
padding = ps // 2
else:
padding = 0
print(padding) # tmp
return pt.nn.Conv2d(in_ch, out_ch, kernel_size=kernel, stride=stride, padding=padding)
class MyResnetBlock(pt.nn.Module):
def __init__(self, residual, in_side, in_ch, out_ch, kernel=3, stride=1, **kwargs):
super().__init__(**kwargs)
self.residual = residual
self.in_side = in_side
self.in_ch = in_ch
self.out_ch = out_ch
self.kernel = kernel
self.stride = stride
self.conv1 = my_conv(in_side, in_ch, out_ch, kernel, stride)
self.bn1 = pt.nn.BatchNorm2d(out_ch)
self.relu1 = pt.nn.ReLU()
self.conv2 = my_conv(np.ceil(in_side / stride), out_ch, out_ch, kernel, 1)
self.bn2 = pt.nn.BatchNorm2d(out_ch)
self.relu2 = pt.nn.ReLU()
if residual:
self.conv_down = my_conv(in_side, in_ch, out_ch, kernel, stride)
def forward(self, input):
x = input
x = self.conv1(x)
x = self.bn1(x)
x = self.relu1(x)
x = self.conv2(x)
x = self.bn2(x)
if self.residual:
res = self.conv_down(input)
else:
res = input
x += res
x = self.relu2(x)
return x
class MyResnetByPt(pt.nn.Module):
def __init__(self, blocks_spec_list, in_side, init_in_ch, init_out_ch, **kwargs):
super().__init__(**kwargs)
self.conv1 = my_conv(in_side, init_in_ch, init_out_ch, 3, 1)
in_ch = out_ch = init_out_ch
blocks = []
for block_id, n_blocks in enumerate(blocks_spec_list):
for layer_id in range(n_blocks):
if layer_id == 0:
if block_id != 0:
out_ch *= 2
block = MyResnetBlock(True, in_side, in_ch, out_ch, 3, 2)
in_ch = out_ch
in_side = int(np.ceil(in_side / 2))
else:
block = MyResnetBlock(False, in_side, in_ch, out_ch, 3, 1)
blocks.append(block)
self.blocks = pt.nn.Sequential(*blocks)
self.final_ch = out_ch
self.avg_pool = pt.nn.AvgPool2d(kernel_size=(in_side, in_side),
stride=(1, 1),
padding=(0, 0))
self.fc = pt.nn.Linear(out_ch, N_CLS)
def forward(self, input):
x = input
x = self.conv1(x)
x = self.blocks(x)
x = self.avg_pool(x)
x = x.view(-1, self.final_ch)
x = self.fc(x)
return x
model = MyResnetByPt([2, 2, 2, 2], PIC_H, 1, 16)
model = model.double()

softmax_cross_entropy_with_logits nan

I have extracted CNN features from a pretrain vgg19 with size 4096. Then I am using a shallower architecture to train a classifier with softmax and center losses. Unfortunately, the softmax loss function returns nan. There is detailed discussion available here, however I am not able to resolve the problem with clip because labels and logits are in two different data format (int64, float32). Furthermore, I also changed the learning rate but still got the same error.
Can some please let me know, how to resolve this situation.
from __future__ import division
from __future__ import print_function
import csv
import numpy as np
import tensorflow as tf
from retrieval_model import setup_train_model
FLAGS = None
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
def get_name(read_file):
feat_lst = []
identifier_lst = []
with open(read_file, 'r') as csvfile:
read_file = csv.reader(csvfile, delimiter=',')
for row in read_file:
feat = row[:-1]
s_feat = [float(i) for i in feat]
identifier = row[-1]
feat_lst.append(s_feat)
identifier_lst.append(identifier)
return feat_lst, identifier_lst
def get_batch(batch_index, batch_size, labels, f_lst):
start_ind = batch_index * batch_size
end_ind = start_ind + batch_size
return f_lst[start_ind:end_ind], labels[start_ind:end_ind]
def creat_dict(orig_labels):
dict = {}
count = 0
for x in orig_labels:
n_label = dict.get(x, None)
if n_label is None:
dict[x] = count
count += 1
return dict
def main(_):
save_dir = 'model/one-branch-ckpt'
train_file = 'gtrain.csv'
img_feat, img_labels = get_name(train_file)
map_dict = creat_dict(img_labels)
img_labels = [map_dict.get(x) for x in img_labels]
im_feat_dim = 4096
batch_size = 50
max_num_epoch = 10
steps_per_epoch = len(img_feat) // batch_size
num_steps = steps_per_epoch * max_num_epoch
# Setup placeholders for input variables.
im_feat_plh = tf.placeholder(tf.float32, shape=[batch_size, im_feat_dim])
label_plh = tf.placeholder(tf.int64, shape=(batch_size), name='labels')
train_phase_plh = tf.placeholder(tf.bool)
# Setup training operation.
t_l = setup_train_model(im_feat_plh, train_phase_plh, label_plh, classes)
# Setup optimizer.
global_step = tf.Variable(0, trainable=False)
init_learning_rate = 0.0001
learning_rate = tf.train.exponential_decay(init_learning_rate, global_step,
steps_per_epoch, 0.794, staircase=True)
optim = tf.train.AdamOptimizer(init_learning_rate)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
train_step = optim.minimize(t_l, global_step=global_step)
# Setup model saver.
saver = tf.train.Saver(save_relative_paths=True,max_to_keep=1)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(num_steps):
im_feats, labels = get_batch(
i % steps_per_epoch, batch_size, img_labels, img_feat)
feed_dict = {
im_feat_plh: im_feats,
label_plh: labels,
train_phase_plh: True,
}
[_, loss_val] = sess.run([train_step, t_l], feed_dict=feed_dict)
if i % 100 == 0:
print('Epoch: %d Step: %d Loss: %f' % (i // steps_per_epoch, i, loss_val))
if i % steps_per_epoch == 0 and i > 0:
print('Saving checkpoint at step %d' % i)
saver.save(sess, save_dir, global_step=global_step)
if __name__ == '__main__':
np.random.seed(0)
tf.set_random_seed(0)
tf.app.run(main=main)
**************************retrieval_model********************************
def setup_train_model(im_feats, train_phase, im_labels, nrof_classes):
alfa = 0.9
# nrof_classes = 28783
i_embed = embedding_model(im_feats, train_phase, im_labels)
c_l = embedding_loss(i_embed, im_labels, alfa, nrof_classes)
loss = softmax_loss(i_embed, im_labels)
total_loss = loss + c_l
return total_loss
def add_fc(inputs, outdim, train_phase, scope_in):
fc = fully_connected(inputs, outdim, activation_fn=None, scope=scope_in + '/fc')
fc_bnorm = tf.layers.batch_normalization(fc, momentum=0.1, epsilon=1e-5,
training=train_phase, name=scope_in + '/bnorm')
fc_relu = tf.nn.relu(fc_bnorm, name=scope_in + '/relu')
fc_out = tf.layers.dropout(fc_relu, seed=0, training=train_phase, name=scope_in + '/dropout')
return fc_out
def embedding_loss(features, label, alfa, nrof_classes):
nrof_features = features.get_shape()[1]
centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32,
initializer=tf.constant_initializer(0), trainable=False)
label = tf.reshape(label, [-1])
centers_batch = tf.gather(centers, label)
diff = (1 - alfa) * (centers_batch - features)
#centers = tf.scatter_sub(centers, label, diff)
center_loss = tf.reduce_mean(tf.square(features - centers_batch))
#softmax_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=label, logits=features))
#total_loss = softmax_loss + center_loss
return center_loss
def embedding_model(im_feats, train_phase, im_labels,
fc_dim=2048, embed_dim=512):
# Image branch.
im_fc1 = add_fc(im_feats, fc_dim, train_phase, 'im_embed_1')
im_fc2 = fully_connected(im_fc1, embed_dim, activation_fn=None,
scope='im_embed_2')
return tf.nn.l2_normalize(im_fc2, 1, epsilon=1e-10)
def softmax_loss(feat, im_labels):
label = tf.reshape(im_labels, [-1])
softmax = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=label, logits=feat))
return softmax

Applying Normalization to Inputs in Tensorflow

I have created a custom class to be an ML model, and it is working fine, but I would like to normalize the inputs as they have a wide range of values (e.g. 0, 20000, 500, 10, 8). Currently, as a way of normalizing the inputs, I'm applying lambda x: np.log(x + 1) to each input (the +1 is so it doesn't error out when 0 is passed in). Would a normalization layer be better than my current approach? If so, how would I go about implementing it? My code for the model is below:
class FollowModel:
def __init__(self, input_shape, output_shape, hidden_layers, input_labels, learning_rate=0.001):
tf.reset_default_graph()
assert len(input_labels) == input_shape[1], 'Incorrect number of input labels!'
# Placeholders for input and output data
self.input_labels = input_labels
self.input_shape = input_shape
self.output_shape = output_shape
self.X = tf.placeholder(shape=input_shape, dtype=tf.float64, name='X')
self.y = tf.placeholder(shape=output_shape, dtype=tf.float64, name='y')
self.hidden_layers = hidden_layers
self.learning_rate = learning_rate
# Variables for two group of weights between the three layers of the network
self.W1 = tf.Variable(np.random.rand(input_shape[1], hidden_layers), dtype=tf.float64)
self.W2 = tf.Variable(np.random.rand(hidden_layers, output_shape[1]), dtype=tf.float64)
# Create the neural net graph
self.A1 = tf.sigmoid(tf.matmul(self.X, self.W1))
self.y_est = tf.sigmoid(tf.matmul(self.A1, self.W2))
# Define a loss function
self.deltas = tf.square(self.y_est - self.y) # want this to be 0
self.loss = tf.reduce_sum(self.deltas)
# Define a train operation to minimize the loss
self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(self.loss)
#initialize
self.model_init = tf.global_variables_initializer()
self.trained = False
def train(self, Xtrain, ytrain, Xtest, ytest, training_steps, batch_size, print_progress=True):
#intiialize session
self.trained = True
self.training_steps = training_steps
self.batch_size = batch_size
self.sess = tf.Session()
self.sess.run(self.model_init)
self.losses = []
self.accs = []
self.testing_accuracies = []
for i in range(training_steps*batch_size):
self.sess.run(self.optimizer, feed_dict={self.X: Xtrain, self.y: ytrain})
local_loss = self.sess.run(self.loss, feed_dict={self.X: Xtrain.values, self.y: ytrain.values})
self.losses.append(local_loss)
self.weights1 = self.sess.run(self.W1)
self.weights2 = self.sess.run(self.W2)
y_est_np = self.sess.run(self.y_est, feed_dict={self.X: Xtrain.values, self.y: ytrain.values})
correct = [estimate.argmax(axis=0) == target.argmax(axis=0)
for estimate, target in zip(y_est_np, ytrain.values)]
acc = 100 * sum(correct) / len(correct)
self.accs.append(acc)
if i % batch_size == 0:
batch_num = i / batch_size
if batch_num % 5 == 0:
self.testing_accuracies.append(self.test_accuracy(Xtest, ytest, False, True))
temp_table = pd.concat([Xtrain, ytrain], axis=1).sample(frac=1)
column_names = list(temp_table.columns.values)
X_columns, y_columns = column_names[0:len(column_names) - 2], column_names[len(column_names) - 2:]
Xtrain = temp_table[X_columns]
ytrain = temp_table[y_columns]
if print_progress: print('Step: %d, Accuracy: %.2f, Loss: %.2f' % (int(i/batch_size), acc, local_loss))
if print_progress: print("Training complete!\nloss: {}, hidden nodes: {}, steps: {}, epoch size: {}, total steps: {}".format(int(self.losses[-1]*100)/100, self.hidden_layers, training_steps, batch_size, training_steps*batch_size))
self.follow_accuracy = acc
return acc
def test_accuracy(self, Xtest, ytest, print_progress=True, return_accuracy=False):
if self.trained:
X = tf.placeholder(shape=Xtest.shape, dtype=tf.float64, name='X')
y = tf.placeholder(shape=ytest.shape, dtype=tf.float64, name='y')
W1 = tf.Variable(self.weights1)
W2 = tf.Variable(self.weights2)
A1 = tf.sigmoid(tf.matmul(X, W1))
y_est = tf.sigmoid(tf.matmul(A1, W2))
# Calculate the predicted outputs
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
y_est_np = sess.run(y_est, feed_dict={X: Xtest, y: ytest})
correctly_followed = 0
incorrectly_followed = 0
missed_follows = 0
correctly_skipped = 0
for estimate, actual in zip(y_est_np, ytest.values):
est = estimate.argmax(axis=0)
# print(estimate)
actual = actual.argmax(axis=0)
if est == 1 and actual == 0: incorrectly_followed += 1
elif est == 1 and actual == 1: correctly_followed += 1
elif est == 0 and actual == 1: missed_follows += 1
else: correctly_skipped += 1
# correct = [estimate.argmax(axis=0) == target.argmax(axis=0) for estimate, target in zip(y_est_np, ytest.values)]
total_followed = incorrectly_followed + correctly_followed
total_correct = correctly_followed + correctly_skipped
total_incorrect = incorrectly_followed + missed_follows
try: total_accuracy = int(total_correct * 10000 / (total_correct + total_incorrect)) / 100
except: total_accuracy = 0
total_skipped = correctly_skipped + missed_follows
try: follow_accuracy = int(correctly_followed * 10000 / total_followed) / 100
except: follow_accuracy = 0
try: skip_accuracy = int(correctly_skipped * 10000 / total_skipped) / 100
except: skip_accuracy = 0
if print_progress: print('Correctly followed {} / {} ({}%), correctly skipped {} / {} ({}%)'.format(
correctly_followed, total_followed, follow_accuracy, correctly_skipped, total_skipped, skip_accuracy))
self.follow_accuracy = follow_accuracy
if return_accuracy:
return total_accuracy
else:
print('The model is not trained!')
def make_prediction_on_normal_data(self, input_list):
assert len(input_list) == len(self.input_labels), 'Incorrect number of inputs (had {} should have {})'.format(len(input_list), len(self.input_labels))
# from ProcessData import normalize_list
# normalize_list(input_list)
input_array = np.array([input_list])
X = tf.placeholder(shape=(1, len(input_list)), dtype=tf.float64, name='X')
y = tf.placeholder(shape=(1, 2), dtype=tf.float64, name='y')
W1 = tf.Variable(self.weights1)
W2 = tf.Variable(self.weights2)
A1 = tf.sigmoid(tf.matmul(X, W1))
y_est = tf.sigmoid(tf.matmul(A1, W2))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
y_est_np = sess.run(y_est, feed_dict={X: input_array, y: self.create_blank_outputs()})
predicted_value = y_est_np[0].argmax(axis=0)
return predicted_value
def make_prediction_on_abnormal_data(self, input_list):
from ProcessData import normalize_list
normalize_list(input_list)
return self.make_prediction_on_normal_data(input_list)
def create_blank_outputs(self):
blank_outputs = np.zeros(shape=(1,2), dtype=np.int)
for i in range(len(blank_outputs[0])):
blank_outputs[0][i] = float(blank_outputs[0][i])
return blank_outputs

I don't see see why you want to create a layer that does that. The common practice of preprocessing your inputs is as you are currently doing.
Using the log operator is quite common for skewed data, but there are other preprocessing solutions such as sklearn's MinMaxScaler and StandardScaler
https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MinMaxScaler.html
https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html
Those are just examples of two other ways to scale your data.
There is such a thing called BatchNorm but it is not recommended as the first layer of the network as distribution of the data is fixed and doesn’t vary during training.

My LSTM code give flat prediction and don't know what is wrong..Anyone can help take a look?

Here is the code. I think the class mylstm has problem but I can not find it... The input is simple, which is just 7 columns data.
I tried to print out all the tensors but did not find what was wrong. Thanks for help!
class mylstm(nn.Module):
def __init__(self, input_size, hidden_size, T, logger):
super(mylstm, self).__init__()
self.T = T
self.input_size = input_size
self.hidden_size = hidden_size
self.logger = logger
self.lstm_layer = nn.LSTM(input_size = 7, hidden_size = hidden_size)
self.fc = nn.Linear(hidden_size, 1)
#self.fc.weight.data.normal_()
def forward(self, input_data):
hidden = self.init_hidden(input_data)
cell = self.init_hidden(input_data)
for t in range(self.T - 1):
if t < self.T - 1:
self.lstm_layer.flatten_parameters()
_, lstm_output = self.lstm_layer(input_data[:,t,:].unsqueeze(0), (hidden, cell))
hidden = lstm_output[0]
cell = lstm_output[1]
y_pred = self.fc(hidden[0])
return y_pred
def init_hidden(self, x):
return Variable(x.data.new(1, x.size(0), self.hidden_size).zero_())
# Train the model
class rnn:
def __init__(self, file_data, logger, input_size = 7, hidden_size = 64, T = 10,
learning_rate = 0.01, batch_size = 128, parallel = True, debug = False):
self.T = T
dat = pd.read_csv(file_data, nrows = 100 if debug else None)
self.logger = logger
self.logger.info("Shape of data: %s.\nMissing in data: %s.", dat.shape, dat.isnull().sum().sum())
self.X = dat.loc[:, [x for x in dat.columns.tolist()]].values
self.y = np.array(dat.rtm_spp)
self.batch_size = batch_size
self.lstm1 = mylstm(input_size = input_size,
hidden_size = hidden_size,
T = T, logger = logger)
if parallel:
self.lstm1 = nn.DataParallel(self.lstm1)
self.lstm1_optimizer = optim.Adam(params = filter(lambda p: p.requires_grad, self.lstm1.parameters()),
lr = learning_rate)
self.train_size = 20000
self.y = self.y - np.mean(self.y[:self.train_size]) # Question: why Adam requires data to be normalized?
self.logger.info("Training size: %d.", self.train_size)
def train(self, n_epochs = 10):
iter_per_epoch = int(np.ceil(self.train_size * 1. / self.batch_size))
logger.info("Iterations per epoch: %3.3f ~ %d.", self.train_size * 1. / self.batch_size, iter_per_epoch)
self.iter_losses = np.zeros(n_epochs * iter_per_epoch)
self.epoch_losses = np.zeros(n_epochs)
self.loss_func = nn.MSELoss()
n_iter = 0
learning_rate = 1.
for i in range(n_epochs):
perm_idx = np.random.permutation(self.train_size - self.T-1)
j = 0
while j < self.train_size:
batch_idx = perm_idx[j:(j + self.batch_size)]
X = np.zeros((len(batch_idx), self.T - 1, self.X.shape[1]))
#y_history = np.zeros((len(batch_idx), self.T - 1))
y_target = self.y[batch_idx + self.T]
for k in range(len(batch_idx)):
X[k, :, :] = self.X[batch_idx[k] : (batch_idx[k] + self.T - 1), :]
loss = self.train_iteration(X, y_target)
self.iter_losses[i * iter_per_epoch + j // self.batch_size] = loss
#if (j / self.batch_size) % 50 == 0:
j += self.batch_size
n_iter += 1
if n_iter % 10000 == 0 and n_iter > 0:
for param_group in self.lstm1_optimizer.param_groups:
param_group['lr'] = param_group['lr'] * 0.9
self.epoch_losses[i] = np.mean(self.iter_losses[range(i * iter_per_epoch, (i + 1) * iter_per_epoch)])
if i % 10 == 0:
self.logger.info("Epoch %d, loss: %3.3f.", i, self.epoch_losses[i])
y_train_pred = self.predict(on_train = True)
y_test_pred = self.predict(on_train = False)
def train_iteration(self, X,y_target):
self.lstm1_optimizer.zero_grad()
y_pred = self.lstm1(Variable(torch.from_numpy(X).type(torch.FloatTensor)))
y_true = Variable(torch.from_numpy(y_target).type(torch.FloatTensor))
y_true = y_true.view(y_true.shape[0],1)
y_pred=y_pred.squeeze(0)
print(y_pred)
loss = self.loss_func(y_pred, y_true)
loss.backward()
self.lstm1_optimizer.step()
return loss.data[0]
def predict(self, on_train = False):
if on_train:
y_pred = np.zeros(self.train_size - self.T +1)
else:
y_pred = np.zeros(self.X.shape[0] - self.train_size)
i = 0
while i < len(y_pred):
batch_idx = np.array(range(len(y_pred)))[i : (i + self.batch_size)]
X = np.zeros((len(batch_idx), self.T - 1, self.X.shape[1]))
#y_history = np.zeros((len(batch_idx), self.T - 1))
for j in range(len(batch_idx)):
if on_train:
X[j, :, :] = self.X[range(batch_idx[j], batch_idx[j] + self.T - 1), :]
else:
X[j, :, :] = self.X[range(batch_idx[j] + self.train_size - self.T, batch_idx[j] + self.train_size - 1), :]
input_data = Variable(torch.from_numpy(X).type(torch.FloatTensor))
# print(self.lstm1(torch.randn(128,9,7)))
#print(self.lstm1(X).data.numpy())
y_pred[i:(i + self.batch_size)] = self.lstm1(input_data).data.numpy()[:,0]
i += self.batch_size
return y_pred
model = rnn(file_data = 'L.csv', logger = logger, parallel = False,
learning_rate = .001)
model.train(n_epochs = 1000)
y_pred = model.predict()

It might be good if you can reduce your codes into the simplest form which still reproduce your problem. Asking people to debug over 200 lines of codes may be too big an ask. If you can give a small example of your problem, using a very simple NN model instead of the current one, many others will be willing to look into your codes and help identify the issue.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

My tensorflow Convolutional Neural Network does not train - python

Related

tensorflow.keras.layers.RNN: TypeError: cannot unpack non-iterable RNN object

In pytorch, self-made dataset and testing dataset seem to exhaust all RAM

softmax_cross_entropy_with_logits nan

Applying Normalization to Inputs in Tensorflow

My LSTM code give flat prediction and don't know what is wrong..Anyone can help take a look?

Categories

Resources