for loop sending wrong data to list - python

Below is the code, i am running a for loop to train on different training sizes. The first loop works correctly, where when training begins, the training and validation accuracy are sent to a list, then a frame then finally a csv. But on the subsequent loops, a data generator is sent to the list. Can anyone see where the issue is, because I cant find it.
Also if you have a better way of doing this (data compiling for analysis), I'm all ears.
The first block is the code snippet, the second block is the full code. The for loop starts about halfway down.
for i in range(1,6):
training_loader, validation_loader, training_ones, training_zeros, validation_ones, validation_zeros = switcher().sets(case)
train_accuracy = []
val_accuracy = []
start_time = time.time()
for epoch in tqdm(range(1, epochs + 1), total=epochs):
train()
train_acc = test(training_loader)
train_accuracy.append(train_acc)
val_acc = test(validation_loader)
val_accuracy.append(val_acc)
accuracy = pd.DataFrame()
accuracy['train_acc'] = train_accuracy
accuracy['val_acc'] = val_accuracy
accuracy.to_csv(f'C:\\Users\\Anthony Sirico\\Documents\\GitHub\\PyGeo_Circuit_exp\\PyGeo_Circuit_exp\\imbalance_exp\\csv files\\accuracy_{i}.csv')
import sys
sys.path.insert(0, 'C:\\Users\\user\\Desktop\\imbalance_exp\\imbalance_exp\\imbalance_exp')
import torch
from torch_geometric.loader import DataLoader
import imb_dataset as imb
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GraphConv
from torch_geometric.nn import global_mean_pool
import neptune.new as neptune
import pandas as pd
from sklearn.metrics import confusion_matrix, matthews_corrcoef
import seaborn as sns
from neptune.new.types import File
from tqdm import tqdm
import time
known = imb.ImbalanceDataset(root='imb_50v2', set='known', split=0.5)
unknown = imb.ImbalanceDataset(root='imb_50v2', set='unknown', split=0.5)
all_data = imb.ImbalanceDataset(root='imb_50v2', set='All', split=None)
torch.manual_seed(12345)
known = known.shuffle()
lr = 0.001
training_perc = 0.9
N = len(known)
mini_batch_size = 32
epochs = 600
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
case = 2
class switcher:
def sets(self, case):
default = known
return getattr(self, 'case_' + str(case), lambda: default)()
def case_1(self):
training_set = known[:int(training_perc*len(known))]
validation_set = known[int(training_perc*len(known)):]
training_loader = DataLoader(training_set, batch_size=mini_batch_size, shuffle=True)
validation_loader = DataLoader(validation_set, batch_size=mini_batch_size, shuffle=False)
training_ones = []
training_zeros = []
validation_ones = []
validation_zeros = []
for i in range(len(training_set)):
if training_set[i].y == 1:
training_ones.append(training_set[i])
else:
training_zeros.append(training_set[i])
for i in range(len(validation_set)):
if validation_set[i].y == 1:
validation_ones.append(validation_set[i])
else:
validation_zeros.append(validation_set[i])
return training_loader, validation_loader, training_ones, training_zeros, validation_ones, validation_zeros
def case_2(self):
one_index = round(len(known) * 0.25)
known_ones = known[:one_index].copy()
known_ones.shuffle()
known_zeros = known[one_index:].copy()
known_zeros.shuffle()
training_ones = known_ones[:int(training_perc*len(known_ones))]
training_zeros = known_zeros[:len(training_ones)]
training_set = torch.utils.data.ConcatDataset([training_ones, training_zeros])
validation_ones = known_ones[int(training_perc*len(known_ones)):]
validation_zeros = known_zeros[len(training_ones):]
validation_set = torch.utils.data.ConcatDataset([validation_ones, validation_zeros])
training_loader = DataLoader(training_set, batch_size=mini_batch_size, shuffle=True)
validation_loader = DataLoader(validation_set, batch_size=mini_batch_size, shuffle=False)
training_ones = []
training_zeros = []
validation_ones = []
validation_zeros = []
for i in range(len(training_set)):
if training_set[i].y == 1:
training_ones.append(training_set[i])
else:
training_zeros.append(training_set[i])
for i in range(len(validation_set)):
if validation_set[i].y == 1:
validation_ones.append(validation_set[i])
else:
validation_zeros.append(validation_set[i])
return training_loader, validation_loader, training_ones, training_zeros, validation_ones, validation_zeros
class GCN(torch.nn.Module):
def __init__(self, hidden_channels):
super(GCN, self).__init__()
torch.manual_seed(12345)
self.conv1 = GraphConv(known.num_node_features, hidden_channels)
self.conv2 = GraphConv(hidden_channels, hidden_channels)
self.conv3 = GraphConv(hidden_channels, hidden_channels)
self.lin = Linear(hidden_channels, known.num_classes)
def forward(self, x, edge_index, batch):
# 1. Obtain node embeddings
x = self.conv1(x, edge_index)
x = x.relu()
x = self.conv2(x, edge_index)
x = x.relu()
x = self.conv3(x, edge_index)
# 2. Readout layer
x = global_mean_pool(x, batch) # [batch_size, hidden_channels]
# 3. Apply a final classifier
x = F.dropout(x, p=0.5, training=self.training)
x = self.lin(x)
return x
model = GCN(hidden_channels=64).to(device)
print(model)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
criterion = torch.nn.CrossEntropyLoss()
def train():
model.train()
total_loss = 0
for data in training_loader: # Iterate in batches over the training dataset.
data = data.to(device)
out = model(data.x, data.edge_index, data.batch) # Perform a single forward pass.
loss = criterion(out, data.y) # Compute the loss solely based on the training nodes.
loss.backward() # Derive gradients.
optimizer.step() # Update parameters based on gradients.
optimizer.zero_grad() # Clear gradients.
def test(loader):
model.eval()
correct = 0
for data in loader: # Iterate in batches over the training/test dataset.
data = data.to(device)
out = model(data.x, data.edge_index, data.batch)
pred = out.argmax(dim=1) # Use the class with highest probability.
correct += int((pred == data.y).sum()) # Check against ground-truth labels.
return correct / len(loader.dataset) # Derive ratio of correct predictions.
output_frame = pd.DataFrame(columns=['epoch', 'lr', 'known', 'unknown', 'train_ones', 'train_zeros', 'val_ones', 'val_zeros', 'tn_all', 'fp_all', 'fn_all', 'tp_all', 'tn_known', 'fp_known', 'fn_known', 'tp_known', 'precision_all', 'recall_all', 'f1_all', 'accuracy_all', 'mcc_all', 'precision_known', 'recall_known', 'f1_known', 'accuracy_known', 'mcc_known', 'time_elapsed'])
for i in range(1,6):
training_loader, validation_loader, training_ones, training_zeros, validation_ones, validation_zeros = switcher().sets(case)
train_accuracy = []
val_accuracy = []
start_time = time.time()
for epoch in tqdm(range(1, epochs + 1), total=epochs):
train()
train_acc = test(training_loader)
train_accuracy.append(train_acc)
val_acc = test(validation_loader)
val_accuracy.append(val_acc)
accuracy = pd.DataFrame()
accuracy['train_acc'] = train_accuracy
accuracy['val_acc'] = val_accuracy
accuracy.to_csv(f'C:\\Users\\Anthony Sirico\\Documents\\GitHub\\PyGeo_Circuit_exp\\PyGeo_Circuit_exp\\imbalance_exp\\csv files\\accuracy_{i}.csv')
unknown_loader = DataLoader(unknown, batch_size=1, shuffle=False)
predictions = []
all_correct = 0
known_correct = 0
for test in unknown_loader:
test = test.to(device)
out = model(test.x, test.edge_index, test.batch)
pred = out.argmax(dim=1)
predictions.append(pred)
all_correct += int((pred == test.y_all).sum())
known_correct += int((pred == test.y_known).sum())
pred_df = pd.DataFrame()
pred_df['y_all_true'] = [i.item() for i in unknown.data.y_all]
pred_df['y_known_true'] = [i.item() for i in unknown.data.y_known]
pred_df['y_pred'] = [i.item() for i in predictions]
pred_df.to_csv(f'C:\\Users\\Anthony Sirico\\Documents\\GitHub\\PyGeo_Circuit_exp\\PyGeo_Circuit_exp\\imbalance_exp\\csv files\\pred_df_{i}.csv')
cf_matrix_all = confusion_matrix(pred_df['y_all_true'], pred_df['y_pred'])
ax = sns.heatmap(cf_matrix_all, annot=True, fmt='g', cmap='Blues')
ax.title.set_text('Confusion Matrix based on all data')
tn_all, fp_all, fn_all, tp_all = cf_matrix_all.ravel()
end_time = time.time()
time_elapsed = end_time - start_time
precision_all = tp_all / (tp_all + fp_all)
recall_all = tp_all / (tp_all + fn_all)
f1_all = 2 * (precision_all * recall_all) / (precision_all + recall_all)
accuracy_all = (tp_all + tn_all) / (tp_all + tn_all + fp_all + fn_all)
mcc_all = matthews_corrcoef(pred_df['y_all_true'], pred_df['y_pred'])
cf_matrix_known = confusion_matrix(pred_df['y_known_true'], pred_df['y_pred'])
ax = sns.heatmap(cf_matrix_known, annot=True, fmt='g', cmap='Blues')
ax.title.set_text('Confusion Matrix based on known data')
tn_known, fp_known, fn_known, tp_known = cf_matrix_known.ravel()
precision_known = tp_known / (tp_known + fp_known)
recall_known = tp_known / (tp_known + fn_known)
f1_known = 2 * (precision_known * recall_known) / (precision_known + recall_known)
accuracy_known = (tp_known + tn_known) / (tp_known + tn_known + fp_known + fn_known)
mcc_known = matthews_corrcoef(pred_df['y_known_true'], pred_df['y_pred'])
#'epoch', 'lr', 'known', 'unknown', 'train_ones', 'train_zeros', 'val_ones', 'val_zeros', 'tn_all', 'fp_all', 'fn_all', 'tp_all', 'tn_known', 'fp_known', 'fn_known', 'tp_known
output_frame.loc[i] = [epochs, lr, len(known), len(unknown), len(training_ones), len(training_zeros), len(validation_ones), len(validation_zeros), tn_all, fp_all, fn_all, tp_all, tn_known, fp_known, fn_known, tp_known, precision_all, recall_all, f1_all, accuracy_all, mcc_all, precision_known, recall_known, f1_known, accuracy_known, mcc_known, time_elapsed]
output_frame.to_csv('C:\\Users\\Anthony Sirico\\Documents\\GitHub\\PyGeo_Circuit_exp\\PyGeo_Circuit_exp\\imbalance_exp\\csv files\\final_output.csv')
training_perc -= 0.2

Related

Pytorch custom function fails to optimize

I have converted my problem into something more tangible.
I have the following equations;
Where x is the input variables, and I want to find the betas, as they are useful in another setting. I know that in this simple code example one could just estimate y and then apply OLS to get the betas, however for my real-life usage this is not applicable.
I use Pytorch to keep track of gradients and SGD to optimize my parameters. A simple FFN is used as an approximation for the functions f.
My code is written below (I'm relatively new to Pytorch and tensors)
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import copy
import torch.nn.functional as F
import matplotlib.pyplot as plt
# Make data
df = pd.date_range('2020-12-01', '2022-05-31', freq='H')
df = pd.DataFrame(df[0:len(df)-1], columns={'DateTime'})
np.random.seed(1)
A = 10
X = pd.DataFrame(np.random.uniform(0, 1, len(df)), columns={'X1'})
X['X2'] = np.random.normal(0, 1, len(df))
X['X3'] = np.random.normal(0, 1, len(df))
Y = pd.DataFrame()
for i in range(0, len(df)):
Y[str(i)] = X['X1'][i] ** 2 + X['X1'][i] * X['X2'][i] * range(0, A) + X['X1'][i] **2 * X['X2'][i] * np.array(range(0, A))**2
Y = Y.T.reset_index(drop=True)
df = pd.concat([df, Y], axis=1)
# Setting
valid_start = '2021-07-01'
test_start = '2022-01-01'
Out = 2
X_array = X.copy()
Y_train = df[df['DateTime'].dt.date.astype(str) < valid_start].drop(columns='DateTime')
Y_valid = df[df['DateTime'].dt.date.astype(str) >= valid_start].drop(columns='DateTime')
Y_test = df[df['DateTime'].dt.date.astype(str) >= valid_start].drop(columns='DateTime')
Y_test_date = pd.DataFrame(df[df['DateTime'].dt.date.astype(str) >= valid_start]['DateTime'], columns={'DateTime'})
X_train = X_array[X_array.index.isin(Y_train.index)]
X_valid = X_array[X_array.index.isin(Y_valid.index)]
X_test = X_array[X_array.index.isin(Y_test.index)]
# Make cuda
torch.cuda.is_available()
torch.cuda.device_count()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
# make tensor
Y_train_tensor = torch.tensor(np.float32(Y_train)).to(device)
X_train_tensor = torch.tensor(np.float32(X_train)).to(device)
Y_valid_tensor = torch.tensor(np.float32(Y_valid)).to(device)
X_valid_tensor = torch.tensor(np.float32(X_valid)).to(device)
Y_test_tensor = torch.tensor(np.float32(Y_test)).to(device)
X_test_tensor = torch.tensor(np.float32(X_test)).to(device)
## Model
class Model(nn.Module):
def __init__(self):
# how many layers?
super().__init__()
self.fc1 = nn.Linear(X_train_tensor.shape[1], 4)
self.out = nn.Linear(4, int(Out) + 1)
def forward(self, x):
x = F.relu(self.fc1(x))
x = self.out(x)
return x
net = Model().to(device)
criterion = nn.MSELoss()
optimizer = optim.SGD(net.parameters(), lr=0.001)
Range_tensor = torch.tensor(np.float32(range(0, A))).to(device)
# Early stopping
Network_weight = [copy.deepcopy(net.out.weight)]
Epoch = 100
patience = 10
Valloss_Array = []
trigger_times = 0
last_loss = 1000000000000000
Valloss_Array.append(last_loss)
for epoch in range(Epoch): # loop over the dataset multiple times
x = X_train_tensor
y = Y_train_tensor
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(x)
y_hat = []
for e in range(0, len(y)):
y_hat_temp = outputs[e, 0] + outputs[e, 1] * Range_tensor + outputs[e, 2] * torch.pow(Range_tensor, 2)
y_hat.append(y_hat_temp)
y_hat = torch.stack(y_hat)
loss = criterion(y_hat, y)
loss.backward()
optimizer.step()
print('Running training loss is; ' + str(loss.item()))
Network_weight.append([copy.deepcopy(net.fc1.weight), copy.deepcopy(net.out.weight)])
################## Validation ##################
# break
x = X_valid_tensor
y = Y_valid_tensor
outputs = net(x)
y_hat = []
for e in range(0, len(y)):
y_hat_temp = outputs[e, 0] + outputs[e, 1] * Range_tensor + outputs[e, 2] * torch.pow(Range_tensor, 2)
y_hat.append(y_hat_temp)
y_hat = torch.stack(y_hat)
Valloss = criterion(y_hat, y)
if Valloss.item() >= min(Valloss_Array):
trigger_times += 1
if trigger_times >= patience:
print('Early stopping!')
break
else:
trigger_times = 0
Best = [copy.deepcopy(net.fc1.weight), copy.deepcopy(net.out.weight)]
Valloss_Array.append(Valloss.item())
# With best weights
net.fc1.weight = Best[0]
net.out.weight = Best[1]
outputs = net(X_test_tensor)
y_hat = []
for e in range(0, len(Y_test_tensor)):
y_hat_temp = outputs[e, 0] + outputs[e, 1] * Range_tensor + outputs[e, 2] * torch.pow(Range_tensor, 2)
y_hat.append(y_hat_temp)
y_hat = torch.stack(y_hat)
BestModelLoss = criterion(y_hat, Y_test_tensor)
Y_Prediction = pd.DataFrame(y_hat.cpu().detach().numpy(), index=Y_test.index, columns={0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
Y_test_date['SE'] = ((Y_Prediction - Y_test) ** 2).sum(axis=1)
Y_Prediction['DateTime'] = Y_test_date['DateTime']
Y_test['DateTime'] = Y_test_date['DateTime']
### Plot
MinDate = Y_test_date.loc[Y_test_date['SE'] == Y_test_date['SE'].min(), 'DateTime']
YMinForecast = np.array(Y_Prediction[(Y_Prediction['DateTime'] == MinDate.values[0])].drop(columns='DateTime'))
YMin = np.array(Y_test[(Y_test['DateTime'] == MinDate.values[0])].drop(columns='DateTime'))
MaxDate = Y_test_date.loc[Y_test_date['SE'] == Y_test_date['SE'].max(), 'DateTime']
YMaxForecast = np.array(Y_Prediction[(Y_Prediction['DateTime'] == MaxDate.values[0])].drop(columns='DateTime'))
YMax = np.array(Y_test[(Y_test['DateTime'] == MaxDate.values[0])].drop(columns='DateTime'))
MedianValue = Y_test_date['SE'].iloc[(Y_test_date['SE']-Y_test_date['SE'].median()).abs().argsort()[:2]].values[0]
MedDate = Y_test_date.loc[Y_test_date['SE'] == MedianValue, 'DateTime']
YMedForecast = np.array(Y_Prediction[(Y_Prediction['DateTime'] == MedDate.values[0])].drop(columns='DateTime'))
YMed = np.array(Y_test[(Y_test['DateTime'] == MedDate.values[0])].drop(columns='DateTime'))
# plot
plt.plot(range(0, y_hat.shape[1]), YMinForecast.reshape(-1), label='Forecast')
plt.plot(range(0, y_hat.shape[1]), YMin.reshape(-1), label='Actual')
plt.grid()
plt.title("Best performance")
plt.legend()
plt.show()
plt.plot(range(0, y_hat.shape[1]), YMaxForecast.reshape(-1), label='Forecast')
plt.plot(range(0, y_hat.shape[1]), YMax.reshape(-1), label='Actual')
plt.grid()
plt.title("Poor performance")
plt.legend()
plt.show()
plt.plot(range(0, y_hat.shape[1]), YMedForecast.reshape(-1), label='Forecast')
plt.plot(range(0, y_hat.shape[1]), YMed.reshape(-1), label='Actual')
plt.grid()
plt.title("Median performance")
plt.legend()
plt.show()
Increasing the hidden layers and the number of neurons solved the problem (both the simplification and the real problem).

[Theano]TypeError: cost must be a scalar

I am undergoing a research project that requires me to write a regularizer for a DNN.
import lasagne
from lasagne.nonlinearities import leaky_rectify, softmax
import theano, theano.tensor as T
import numpy as np
import sklearn.datasets, sklearn.preprocessing, sklearn.model_selection
import matplotlib.pyplot as plt
from tabulate import tabulate
import time
import math
#psi function that will be used in the penalty function
def psi(g,l):
m = g.shape[1]
C = (1/T.pow(2,m))*(1/T.pow(math.pi,((m-1)/2))) / (T.gamma((m+1)/2))
logDens = T.log(C) + m*T.log(l) - l*T.sqrt(T.sum(g**2))
dens = T.exp(logDens)
return(dens)
#pstar function that will be used in the penalty function
def pStar(g,lambda1,lambda0,theta):
psi1 = psi(g,lambda1)
psi0 = psi(g,lambda0)
## if a coefficient is really large then both these will numerically be zero
if theta*psi1 ==0 and (1-theta)*psi0==0:
p = 1
else:
p = (theta*psi1) / (theta*psi1 + (1 - theta)*psi0)
return p
#Seperable
def pen_S(l):
theta = 0.5
lambda1 = 1
lambda0 = 12
for j in range(len(l)):
t = l[j]
m = t.shape[1]
n = t.shape[0].eval()
cost = T.zeros((1,1))
for i in range(n):
g = t[i]
temp = -lambda1*T.sum(g**2) + T.log(pStar(T.zeros((1,m)),lambda1,lambda0,theta)/pStar(g,lambda1,lambda0,theta))
cost = cost + temp
return cost
# Number of simulations
N_runs = 1
# Maximum number of epochs
max_epochs = 1500
# Define number of layers and number of neurons
H_layers = np.asarray([40, 20])
# Minibatch size
batch_size = 300
# Lasagne Regularizers to be tested
regularizers = [pen_S]
# Define the regularization factors for each algorithm
reg_factors = [10**-3.5]
# Define the names (for display purposes)
names = ['SSGL_Sep']
# Load the dataset (DIGITS)
digits = sklearn.datasets.load_digits()
X = digits.data
y = digits.target
# MNIST
#mnist = sklearn.datasets.fetch_mldata('MNIST original', data_home='C:/Users/ISPAMM/Downloads')
#X = mnist.data
#y = mnist.target
# Preprocessing (input)
scaler = sklearn.preprocessing.MinMaxScaler()
X = scaler.fit_transform(X)
# Output structures
tr_errors = np.zeros((len(regularizers), N_runs))
tst_errors = np.zeros((len(regularizers), N_runs))
tr_times = np.zeros((len(regularizers), N_runs))
tr_obj = np.zeros((len(regularizers), N_runs, max_epochs))
sparsity_weights = np.zeros((len(regularizers), N_runs, len(H_layers)+1))
sparsity_neurons = np.zeros((len(regularizers), N_runs, len(H_layers)+1))
# Define the input and output symbolic variables
input_var = T.matrix(name='X')
target_var = T.ivector(name='y')
# Utility function for minibatches
def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
assert len(inputs) == len(targets)
if shuffle:
indices = np.arange(len(inputs))
np.random.shuffle(indices)
for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
if shuffle:
excerpt = indices[start_idx:start_idx + batchsize]
else:
excerpt = slice(start_idx, start_idx + batchsize)
yield inputs[excerpt], targets[excerpt]
for k in np.arange(0, N_runs):
print("Run ", k+1, " of ", N_runs, "...\n", end="")
# Split the data
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, test_size=0.25)
# Define the network structure
network = lasagne.layers.InputLayer((None, X.shape[1]), input_var)
for h in H_layers:
network = lasagne.layers.DenseLayer(network, h, nonlinearity=leaky_rectify, W=lasagne.init.GlorotNormal())
network = lasagne.layers.DenseLayer(network, len(np.unique(y)), nonlinearity=softmax, W=lasagne.init.GlorotNormal())
params_original = lasagne.layers.get_all_param_values(network)
params = lasagne.layers.get_all_params(network, trainable=True)
# Define the loss function
prediction = lasagne.layers.get_output(network)
loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
# Define the test function
test_prediction = lasagne.layers.get_output(network, deterministic=True)
test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
dtype=theano.config.floatX)
test_fn = theano.function([input_var, target_var], test_acc, allow_input_downcast=True)
for r in np.arange(0, len(regularizers)):
# Set to original parameters
lasagne.layers.set_all_param_values(network, params_original)
# Define the regularized loss function
loss_reg = loss.mean() + reg_factors[r] * lasagne.regularization.regularize_network_params(network, regularizers[r])
# Update function
# updates_reg = lasagne.updates.nesterov_momentum(loss_reg, params,learning_rate=0.01)
updates_reg = lasagne.updates.adam(loss_reg, params)
# Training function
train_fn = theano.function([input_var, target_var], loss_reg, updates=updates_reg, allow_input_downcast=True)
# Train network
print("\tTraining with ", names[r], " regularization, epoch: ", end="")
start = time.time()
for epoch in range(max_epochs):
loss_epoch = 0
batches = 0
if np.mod(epoch, 10) == 0:
print(epoch, "... ", end="")
for batch in iterate_minibatches(X_train, y_train, batch_size, shuffle=True):
input_batch, target_batch = batch
loss_epoch += train_fn(input_batch, target_batch)
batches += 1
tr_obj[r,k,epoch] = loss_epoch/batches
end = time.time()
tr_times[r,k] = end - start
print(epoch, ".")
# Final test with accuracy
print("\tTesting the network with ", names[r], " regularization...")
tr_errors[r,k] = test_fn(X_train, y_train)
tst_errors[r,k] = test_fn(X_test, y_test)
# Check sparsity
params_trained = lasagne.layers.get_all_param_values(network, trainable=True)
sparsity_weights[r,k,:] = [1-(x.round(decimals=3).ravel().nonzero()[0].shape[0]/x.size) for x in params_trained[0::2]]
sparsity_neurons[r,k,:] = [x.round(decimals=3).sum(axis=1).nonzero()[0].shape[0] for x in params_trained[0::2]]
tr_obj_mean = np.mean(tr_obj, axis=1)
# Plot the average loss
plt.figure()
plt.title('Training objective')
for r in np.arange(0, len(regularizers)):
plt.semilogy(tr_obj_mean[r, :], label=names[r])
plt.legend()
# Print the results
print(tabulate([['Tr. accuracy [%]'] + np.mean(tr_errors, axis=1).round(decimals=4).tolist(),
['Test. accuracy [%]'] + np.mean(tst_errors, axis=1).round(decimals=4).tolist(),
['Tr. times [secs.]'] + np.mean(tr_times, axis=1).round(decimals=4).tolist(),
['Sparsity [%]'] + np.mean(sparsity_weights, axis=1).round(decimals=4).tolist(),
['Neurons'] + np.mean(sparsity_neurons, axis=1).round(decimals=4).tolist()],
headers=['']+names))
Here is my defined regularizer pen_S(l), but when I run the code to train the network, i was promted with 'TypeError: cost must be a scalar.' But I think my output of pen_S is already a scalar.
Can anyone help me with this?

mobilenet always predict same label

I trained a mobilenet_v2 model to classify a 8 class image dataset. I used google's pretrain parameters for fine-tune and retrain net in my dataset, the accuracy improve fast during training,and reached 98% when training end.
but when I use this trained model to predict test image, it always output the same label.
I have tried vgg16 too. It works well
here is my code
inference
def inference(inputs,is_training=True):
with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope(is_training=is_training)):
net, endpoints = mobilenet_v2.mobilenet(input_tensor = inputs,num_classes = n_class,conv_defs = V2_18_DEF)
print('mobilenet output',net.get_shape().as_list())
return net
loss
def loss(logit,label):
losses=[]
with tf.name_scope('LOSS'):
class_loss = tf.nn.softmax_cross_entropy_with_logits(labels=label,logits=logit)
class_loss = tf.reduce_mean(class_loss,axis = 0)
tf.summary.scalar('class_loss',class_loss)
regularization_loss = tf.reduce_sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
total_loss = class_loss + regularization_loss
train_step
def train_op(loss):
train_step = tf.train.AdamOptimizer(1e-4).minimize(loss)#
return train_step
test
def test():
inputs = tf.placeholder(name='inputs',shape=[None,224,224,3],dtype=tf.float32)
label_holder = tf.placeholder(name='label_holder',shape=[None,n_class],dtype=tf.float32)
if(net_type=='mobile_v2'):
test_logits = inference(inputs,is_training=False)
elif(net_type=='mobile_v1'):
test_logits = inference_mobile_v1(inputs,is_training=False)
elif net_type=='vgg16':
test_logits = inference_vgg(inputs,is_training=False)
predict = tf.nn.softmax(test_logits)
predict_result = tf.argmax(predict,axis=1,output_type=tf.int32)
true_result = tf.argmax(label_holder,axis=1,output_type=tf.int32)
correct_predict = tf.equal(tf.argmax(predict,axis=1,output_type=tf.int32),tf.argmax(label_holder,axis=1,\
output_type = tf.int32))
accuracy = tf.reduce_mean(tf.cast(correct_predict,tf.float32))
pos = 0
acc = 0
count = 0
sess = tf.Session()
ckpt_path=''
try:
ckpt_path = sys.argv[3]
except:
ckpt = tf.train.get_checkpoint_state(output_path)
if(ckpt and ckpt.model_checkpoint_path):
ckpt_path = ckpt.model_checkpoint_path
sess.run(tf.global_variables_initializer())
variables_to_restore = slim.get_variables_to_restore()
for var in variables_to_restore:
print(var.name)
saver = tf.train.Saver(variables_to_restore)
saver.restore(sess,ckpt_path)
dataset = Dataset(classes,0.8)
test_data,test_label = dataset.get_test_data()
while(pos<len(test_data)):
start = pos
end = min(pos+batch_size,len(test_data))
batch_img = test_data[start:end]
batch_label = test_label[start:end]
batch_data={}
batch_data[inputs] = batch_img
batch_data[label_holder] = batch_label
batch_acc,p_result,t_result = sess.run([accuracy,predict_result,true_result],feed_dict=batch_data)
print('batch_acc',batch_acc)
print(p_result)
print(t_result)
acc += batch_acc
pos = end
count+=1
acc = acc/count
print('test acc',acc)
sess.close()
train function
def train():
inputs = tf.placeholder(name='inputs',shape=[None,224,224,3],dtype=tf.float32)#
labels_placeholder = {}
label_holder = tf.placeholder(name='label_holder',shape=[None,n_class],dtype=tf.float32) #
if(net_type=='mobile_v2'):
train_logits = inference(inputs)
elif(net_type=='mobile_v1'):
train_logits = inference_mobile_v1(inputs,is_training=True)
elif net_type=='vgg16':
train_logits = inference_vgg(inputs,is_training=True)
loss_op = loss(train_logits,label_holder)
predict = tf.nn.softmax(train_logits)
print('predict shape',predict.get_shape().as_list())
predict_result = tf.argmax(predict,axis=1,output_type = tf.int32)
correct_predict = tf.equal(tf.argmax(predict,axis=1,output_type=tf.int32),tf.argmax(label_holder,axis=1,\
output_type = tf.int32))
accuracy = tf.reduce_mean(tf.cast(correct_predict,tf.float32))
for var in tf.trainable_variables():
tf.summary.histogram(var.name,var)
train_step = train_op(loss_op)
#pretrain_restore
all_variable = tf.trainable_variables()
pretrain_vals=[]
reader = pywrap_tensorflow.NewCheckpointReader(pretrain_model_path)
var_to_shape_map=reader.get_variable_to_shape_map()
for var in all_variable:
print(var.name)
if('Logits' in var.name):
continue
if(var.name.split(':')[0] in var_to_shape_map):
print('restore',var.name)
pretrain_vals.append(var)
pretrain_saver = tf.train.Saver(pretrain_vals)
variables_to_restore = slim.get_variables_to_restore()
train_saver = tf.train.Saver(variables_to_restore)
#tensorboard
sess = tf.Session()
merge_op = tf.summary.merge_all()
summary_writer = tf.summary.FileWriter('./tensorboard/train',sess.graph)
#init
print('init & restore')
sess.run(tf.global_variables_initializer())
pretrain_saver.restore(sess,pretrain_model_path)
dataset = Dataset(classes,0.8)#
data_amount = dataset.data_amount
each_epoch = int(data_amount/batch_size)
total_step = int(each_epoch*EPOCH)
global_step = 0
test_data,test_label = dataset.next_batch(80)
for epoch in range(EPOCH):
pos = 0
count = 0
acc = 0
while(pos<len(test_data)):
start = pos
end = min(pos+batch_size,len(test_data))
batch_img = test_data[start:end]
batch_label = test_label[start:end]
batch_data={}
batch_data[inputs] = batch_img
batch_data[label_holder] = batch_label
batch_acc,t_result ,loss_val= sess.run([accuracy,predict_result,loss_op],feed_dict=batch_data)
print('batch_acc',batch_acc,loss_val)
print(t_result)
acc += batch_acc
pos = end
count+=1
acc = acc/count
print('=====test_acc===',acc)
for epoch_step in range(each_epoch):
batch_img,batch_label = dataset.next_batch(batch_size)#
batch_data={}
batch_data[inputs] = batch_img
batch_data[label_holder] = batch_label
merge_str,loss_val,acc,p_result= sess.run([merge_op,loss_op,accuracy,predict_result],feed_dict=batch_data)
sess.run(train_step,feed_dict = batch_data)
print('loss %f,acc %f, global step %d ,epoch %d,epoch_step %d' % (loss_val,acc,global_step,epoch,epoch_step) )
print(p_result)
summary_writer.add_summary(merge_str,global_step=global_step)
summary_writer.flush()
global_step+=1
save_path = os.path.join(output_path,'model.ckpt')
train_saver.save(sess,save_path,global_step)

How does one use Hermite polynomials with Stochastic Gradient Descent (SGD)?

I was trying to train a simple polynomial linear model with pytorch using Hermite polynomials since they seem to have a better conditioned Hessian.
To do that I decided to use the hermvander since it gives the Vandermonde matrix with each entry being a Hermite term. To do that I just made my feature vectors be the outpute of hermvander:
Kern_train = hermvander(X_train,Degree_mdl)
however, when I proceeded to train I get NaN all the time. I suspected it could have been a step size issue but I decided to use the step size suggested by this question that already has my example working in R, so there is no need to search for a step size I thought. However, when I tried it it does not work.
Anyone has any idea whats going on?
Same error occurs in tensorflow:
import pdb
import numpy as np
from numpy.polynomial.hermite import hermvander
import random
import tensorflow as tf
def get_batch(X,Y,M):
N = len(Y)
valid_indices = np.array( range(N) )
batch_indices = np.random.choice(valid_indices,size=M,replace=False)
batch_xs = X[batch_indices,:]
batch_ys = Y[batch_indices]
return batch_xs, batch_ys
##
D0=1
logging_freq = 100
## SGD params
M = 5
eta = 0.1
#eta = lambda i: eta/(i**0.6)
nb_iter = 500*10
##
lb,ub = 0,1
freq_sin = 4 # 2.3
f_target = lambda x: np.sin(2*np.pi*freq_sin*x)
N_train = 10
X_train = np.linspace(lb,ub,N_train)
Y_train = f_target(X_train).reshape(N_train,1)
x_horizontal = np.linspace(lb,ub,1000).reshape(1000,1)
## degree of mdl
Degree_mdl = N_train-1
## Hermite
Kern_train = hermvander(X_train,Degree_mdl)
print(f'Kern_train.shape={Kern_train.shape}')
Kern_train = Kern_train.reshape(N_train,Kern_train.shape[1])
##
Kern_train_pinv = np.linalg.pinv( Kern_train )
c_pinv = np.dot(Kern_train_pinv, Y_train)
nb_terms = c_pinv.shape[0]
##
condition_number_hessian = np.linalg.cond(Kern_train)
##
graph = tf.Graph()
with graph.as_default():
X = tf.placeholder(tf.float32, [None, nb_terms])
Y = tf.placeholder(tf.float32, [None,1])
w = tf.Variable( tf.zeros([nb_terms,1]) )
#w = tf.Variable( tf.truncated_normal([Degree_mdl,1],mean=0.0,stddev=1.0) )
#w = tf.Variable( 1000*tf.ones([Degree_mdl,1]) )
##
f = tf.matmul(X,w) # [N,1] = [N,D] x [D,1]
#loss = tf.reduce_sum(tf.square(Y - f))
loss = tf.reduce_sum( tf.reduce_mean(tf.square(Y-f), 0))
l2loss_tf = (1/N_train)*2*tf.nn.l2_loss(Y-f)
##
learning_rate = eta
#global_step = tf.Variable(0, trainable=False)
#learning_rate = tf.train.exponential_decay(learning_rate=eta, global_step=global_step,decay_steps=nb_iter/2, decay_rate=1, staircase=True)
train_step = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(loss)
with tf.Session(graph=graph) as sess:
Y_train = Y_train.reshape(N_train,1)
tf.global_variables_initializer().run()
# Train
for i in range(nb_iter):
#if i % (nb_iter/10) == 0:
if i % (nb_iter/10) == 0 or i == 0:
current_loss = sess.run(fetches=loss, feed_dict={X: Kern_train, Y: Y_train})
print(f'tf: i = {i}, current_loss = {current_loss}')
## train
batch_xs, batch_ys = get_batch(Kern_train,Y_train,M)
sess.run(train_step, feed_dict={X: batch_xs, Y: batch_ys})
print(f'condition_number_hessian = {condition_number_hessian}')
print('\a')
Totally self contained code in pytorch:
import numpy as np
from numpy.polynomial.hermite import hermvander
import random
import torch
from torch.autograd import Variable
def vectors_dims_dont_match(Y,Y_):
'''
Checks that vector Y and Y_ have the same dimensions. If they don't
then there might be an error that could be caused due to wrong broadcasting.
'''
DY = tuple( Y.size() )
DY_ = tuple( Y_.size() )
if len(DY) != len(DY_):
return True
for i in range(len(DY)):
if DY[i] != DY_[i]:
return True
return False
def index_batch(X,batch_indices,dtype):
'''
returns the batch indexed/sliced batch
'''
if len(X.shape) == 1: # i.e. dimension (M,) just a vector
batch_xs = torch.FloatTensor(X[batch_indices]).type(dtype)
else:
batch_xs = torch.FloatTensor(X[batch_indices,:]).type(dtype)
return batch_xs
def get_batch2(X,Y,M,dtype):
'''
get batch for pytorch model
'''
# TODO fix and make it nicer, there is pytorch forum question
X,Y = X.data.numpy(), Y.data.numpy()
N = len(Y)
valid_indices = np.array( range(N) )
batch_indices = np.random.choice(valid_indices,size=M,replace=False)
batch_xs = index_batch(X,batch_indices,dtype)
batch_ys = index_batch(Y,batch_indices,dtype)
return Variable(batch_xs, requires_grad=False), Variable(batch_ys, requires_grad=False)
def get_sequential_lifted_mdl(nb_monomials,D_out, bias=False):
return torch.nn.Sequential(torch.nn.Linear(nb_monomials,D_out,bias=bias))
def train_SGD(mdl, M,eta,nb_iter,logging_freq ,dtype, X_train,Y_train):
##
#pdb.set_trace()
N_train,_ = tuple( X_train.size() )
#print(N_train)
for i in range(1,nb_iter+1):
# Forward pass: compute predicted Y using operations on Variables
batch_xs, batch_ys = get_batch2(X_train,Y_train,M,dtype) # [M, D], [M, 1]
## FORWARD PASS
y_pred = mdl.forward(batch_xs)
## Check vectors have same dimension
if vectors_dims_dont_match(batch_ys,y_pred):
raise ValueError('You vectors don\'t have matching dimensions. It will lead to errors.')
## LOSS + Regularization
batch_loss = (1/M)*(y_pred - batch_ys).pow(2).sum()
## BACKARD PASS
batch_loss.backward() # Use autograd to compute the backward pass. Now w will have gradients
## SGD update
for W in mdl.parameters():
delta = eta(i)*W.grad.data
W.data.copy_(W.data - delta)
## train stats
if i % (nb_iter/10) == 0 or i == 0:
#X_train_, Y_train_ = Variable(X_train), Variable(Y_train)
X_train_, Y_train_ = X_train, Y_train
current_train_loss = (1/N_train)*(mdl.forward(X_train_) - Y_train_).pow(2).sum().data.numpy()
print('\n-------------')
print(f'i = {i}, current_train_loss = {current_train_loss}\n')
print(f'eta*W.grad.data = {eta*W.grad.data}')
print(f'W.grad.data = {W.grad.data}')
## Manually zero the gradients after updating weights
mdl.zero_grad()
final_sgd_error = current_train_loss
return final_sgd_error
##
D0=1
logging_freq = 100
#dtype = torch.cuda.FloatTensor
dtype = torch.FloatTensor
## SGD params
M = 5
eta = 0.1
eta = lambda i: eta/(i**0.6)
nb_iter = 500*10
##
lb,ub = 0,1
freq_sin = 4 # 2.3
f_target = lambda x: np.sin(2*np.pi*freq_sin*x)
N_train = 10
X_train = np.linspace(lb,ub,N_train)
Y_train = f_target(X_train).reshape(N_train,1)
x_horizontal = np.linspace(lb,ub,1000).reshape(1000,1)
## degree of mdl
Degree_mdl = N_train-1
## Hermite
Kern_train = hermvander(X_train,Degree_mdl)
Kern_train = Kern_train.reshape(N_train,Kern_train.shape[2])
##
Kern_train_pinv = np.linalg.pinv( Kern_train )
c_pinv = np.dot(Kern_train_pinv, Y_train)
##
condition_number_hessian = np.linalg.cond(Kern_train)
## linear mdl to train with SGD
nb_terms = c_pinv.shape[0]
mdl_sgd = get_sequential_lifted_mdl(nb_monomials=nb_terms,D_out=1, bias=False)
mdl_sgd[0].weight.data.normal_(mean=0,std=0.001)
mdl_sgd[0].weight.data.fill_(0)
## Make polynomial Kernel
Kern_train_pt, Y_train_pt = Variable(torch.FloatTensor(Kern_train).type(dtype), requires_grad=False), Variable(torch.FloatTensor(Y_train).type(dtype), requires_grad=False)
final_sgd_error = train_SGD(mdl_sgd, M,eta,nb_iter,logging_freq ,dtype, Kern_train_pt,Y_train_pt)
## PRINT ERRORS
from plotting_utils import *
train_error_pinv = (1/N_train)*(np.linalg.norm(Y_train-np.dot(Kern_train,c_pinv))**2)
print('\n-----------------')
print(f'N_train={N_train}')
print(f'train_error_pinv = {train_error_pinv}')
print(f'final_sgd_error = {final_sgd_error}')
print(f'condition_number_hessian = {condition_number_hessian}')
print('\a')
Maybe it's a bit late, but you might have a look at this https://github.com/Orcuslc/OrthNet

Why is RPROP not updating the training accuracy?

I need help with my machine learning code. I am trying to train my network using RPROP, and the train accuracy has not been updating.
Here's the code:
import theano
from theano import *
import theano.tensor as T
from theano.ifelse import ifelse
import numpy as np
from random import random
from sklearn import datasets
from sklearn.model_selection import train_test_split
import math
#GETTING TEST DATA
def get_iris_data():
""" Read the iris data set and split them into training and test sets """
iris = datasets.load_iris()
data = iris.data
target = iris.target
# Prepend the column of 1s for bias
N, M = data.shape
all_X = np.ones((N, M + 1))
all_X[:, 1:] = data
# Convert into one-hot vectors
num_labels = len(np.unique(target))
all_Y = np.eye(num_labels)[target] # One liner trick!
return train_test_split(all_X, all_Y, test_size=0.33)
#SETTING INITIAL WEIGHT VALUES
def init_weights(shape):
""" Weight initialization """
weights = np.asarray(np.random.randn(*shape) * 0.01, dtype=theano.config.floatX)
return theano.shared(weights)
def feedforward(X, w1, w2):
hidden = T.nnet.sigmoid(T.dot(X,w1))
out = T.nnet.softmax(T.dot(hidden,w2))
return out
def rprop(cost, params, learning_rate):
""" Back-propagation """
#RPROP Variables
updates = []
gradients = T.grad(cost = cost, wrt = params)
#Default Values
prevparams = params
deltaMax = 50.
deltaMin = math.exp(-6)
deltas = -0.1 * numpy.ones(len(params))
prevgradients = numpy.zeros(len(params))
npos = 1.2
nneg = 0.5
#All Values
allvalues = zip(params, prevparams, gradients, deltas, prevgradients)
for param, prevparam, gradient, delta, prevgradient in allvalues:
polarity = T.sgn(gradient * prevgradient)
prevdelta = delta
if T.gt(polarity, 0):
delta = T.minimum(prevdelta * npos, deltaMax)
change = - T.sgn(gradient) * delta
prevgradient = gradient
elif T.lt(polarity,0):
delta = T.maximu(prevdelta * nneg, deltaMin)
prevgradient = 0
change = -prevgradient
else:
change = - T.sign(gradient) * delta
prevgradient = gradient
updates.append((param, param - change * learning_rate))
return updates
#MAIN FUNCTION
def main():
#Initialization of Variables and data
train_X, test_X, train_Y, test_Y = get_iris_data()
learning_rate = 0.01
X = T.fmatrix()
Y = T.fmatrix()
#Set ANN Network Size
in_size = train_X.shape[1]
hid_size = 256
out_size = train_Y.shape[1]
#Set weights inbetween
w1 = init_weights((in_size, hid_size))
w2 = init_weights((hid_size, out_size))
#Forward Propagation Function for Neuron activation and transfer funtion
yHat = feedforward(X,w1,w2)
#Backpropagation for correction
cost = T.mean(T.nnet.categorical_crossentropy(yHat, Y))
params = [w1,w2]
updates = rprop(cost, params, learning_rate)
# Train and predict
train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True)
pred_Y = T.argmax(yHat, axis=1)
predict = theano.function(inputs=[X], outputs=pred_Y, allow_input_downcast=True)
# Run SGD
for iter in range(2000):
for i in range(len(train_X)):
train(train_X[i: i + 1], train_Y[i: i + 1])
train_accuracy = np.mean(np.argmax(train_Y, axis=1) == predict(train_X))
test_accuracy = np.mean(np.argmax(test_Y, axis=1) == predict(test_X))
print("Iteration = %d, train accuracy = %.2f%%, test accuracy = %.2f%%"
% (iter + 1, 100 * train_accuracy, 100 * test_accuracy))
if __name__ == '__main__':
main()

Categories

Resources