I'm trying to implement a Bayesian neural network for genomic predictions. My X is a matrix that is scaled and gets normalized so that the values are between 0 and 1. The y is a vector of values that are again normalized so that the values are between 0 and 1.
The network seems to learn as seen here:
But, when I try to make predictions these look strange and seem to behave randomly. While the true values of y are distributed between 0 and 1. The predicted values are between ~ 0.4 - 0.6 and my R2 is negative. The MSE is around 0.02, what seems not to bad, but might be caused by the fact that the range of the predictions is quite narrow.
I'm a bit running out of ideas what could be wrong. Any suggestions are appreciated :).
I've also tried to predict the training data. That is also not working and I'm getting a negative R2.
X has the dimensions (5000,500) and y (5000,)
Increasing the number of hidden layers (up to 3) and units (upt to 128) doesn't change anything.
# Import necessary packages:
import sys
from os.path import join
import warnings
warnings.filterwarnings('ignore')
from IPython import display
import tensorflow as tf
from tensorflow import keras
import tensorflow_probability as tfp
import kerastuner as kt
from keras import backend as K
from keras import activations, initializers
from keras.layers import Layer
import tensorflow_docs as tfdocs
import tensorflow_docs.modeling
import tensorflow_docs.plots
import numpy as np
import numpy.ma as ma
import pandas as pd
import seaborn as sns
import matplotlib.pylab as plt
import time
import tempfile
import math
import statsmodels.api as sm
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import r2_score,mean_absolute_error
from sklearn.linear_model import LinearRegression, BayesianRidge
from sklearn.utils import shuffle
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, scale
from pandas_plink import read_plink
from pandas_plink import read_plink1_bin
from pandas_plink import get_data_folder
tfd = tfp.distributions
# Set random seed and start timer
np.random.seed(12345)
start = time.time()
### functions
def get_optimizer():
return tf.keras.optimizers.SGD()
def get_callbacks():
return [
#tfdocs.modeling.EpochDots(),
tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=1, patience=500, restore_best_weights=True),
]
def normalize_data(df):
return (df - df.min())/(df.max() - df.min())
def compile_model(model, optimizer=None):
if optimizer is None:
optimizer = get_optimizer()
model.compile(optimizer=optimizer,
loss=keras.losses.MeanSquaredError())
return model
def MSE(test,pred):
sqr_err = np.subtract(test,pred)**2
return sqr_err.mean()
# ## load & preprocess data
# ### load genotype data
G = np.genfromtxt("genotype.txt")
G[np.isnan(G)] = 0.
G = normalize_data(G)
print(G.mean())
print(G.var())
# ### load phenotype data
traits = np.genfromtxt("phenotype.txt")
traits = normalize_data(traits)
print(traits.mean())
print(traits.var())
# ### split training and validation set
train_X, test_X, train_y, test_y = train_test_split(G, traits, test_size = 0.2, random_state = 42)
X = np.concatenate((train_X, test_X), axis=0)
y = np.concatenate((train_y, test_y), axis=0)
# ### parameter definition
N = G.shape[0]
p = G.shape[1]
NUM_FOLDS = 5
kfold = KFold(n_splits=NUM_FOLDS, shuffle=True)
INPUT_SHAPE = X.shape[1]
OUTPUT_SHAPE = y.shape[0]
BATCH_SIZE = 32
STEPS_PER_EPOCH = math.ceil((X.shape[0]*(1-1/NUM_FOLDS)*0.8)/BATCH_SIZE)
MAX_EPOCHS = 5000
df = pd.DataFrame(columns = ['method','MSE','R2'])
histories = {}
# Specify the surrogate posterior over `keras.layers.Dense` `kernel` and `bias`.
def posterior_mean_field(kernel_size, bias_size=0, dtype=None):
n = kernel_size + bias_size
c = np.log(np.expm1(1.))
return tf.keras.Sequential([
tfp.layers.VariableLayer(2 * n, dtype=dtype),
tfp.layers.DistributionLambda(lambda t: tfd.Independent(
tfd.Normal(loc=t[..., :n],
scale=1e-5 + tf.nn.softplus(c + t[..., n:])),
reinterpreted_batch_ndims=1)),
])
# Specify the prior over `keras.layers.Dense` `kernel` and `bias`.
def prior_trainable(kernel_size, bias_size=0, dtype=None):
n = kernel_size + bias_size
return tf.keras.Sequential([
tfp.layers.VariableLayer(n, dtype=dtype),
tfp.layers.DistributionLambda(lambda t: tfd.Independent(
tfd.Normal(loc=t, scale=1),
reinterpreted_batch_ndims=1)),
])
def neg_log_likelihood(y_true, y_pred, sigma=1.0):
dist = tfp.distributions.Normal(loc=y_pred, scale=sigma)
return K.sum(-dist.log_prob(y_true))
#neg_log_likelihood = lambda y, p_y: -p_y.log_prob(y)
kl_loss_weight = 1.0 / STEPS_PER_EPOCH
histories = {}
fold_no = 1
for train, test in kfold.split(X, y):
model = tf.keras.Sequential([
keras.layers.InputLayer(input_shape=(INPUT_SHAPE,)),
tfp.layers.DenseVariational(units=32,
make_posterior_fn=posterior_mean_field,
make_prior_fn=prior_trainable,
kl_weight=kl_loss_weight,
activation='sigmoid'),
tfp.layers.DenseVariational(units=1,
make_posterior_fn=posterior_mean_field,
make_prior_fn=prior_trainable,
kl_weight=kl_loss_weight,
activation='sigmoid'),
tfp.layers.DistributionLambda(lambda t: tfd.Normal(loc=t, scale=1)),
])
model.compile(loss=neg_log_likelihood, optimizer=tf.keras.optimizers.Adam(lr=0.0001), metrics=['mse'])
history = model.fit(X[train], y[train],
validation_split = 0.2,
steps_per_epoch = STEPS_PER_EPOCH,
epochs=MAX_EPOCHS,
callbacks=get_callbacks(),
verbose=0)
histories['BNN2_'+str(fold_no)] = history
y_pred_list = []
for i in range(500):
y_pred = model.predict(X[test])
y_pred_list.append(y_pred)
y_preds = np.concatenate(y_pred_list, axis=1)
y_mean = np.mean(y_preds, axis=1)
m_err = MSE(y[test],y_mean)
r2_acc = r2_score(y[test],y_mean)
df = df.append({'MSE':m_err, 'R2':r2_acc, 'method':'BNN2'}, ignore_index=True)
fold_no = fold_no + 1
df.to_csv("results.csv")
Related
I've just recently started dabbling with Keras and I'm having a hell of a time trying to understand how to format my data to be used by Keras.
I'm currently trying to use a TimeDistributed ConvLSTM1D layer to predict stock prices. I've used a walk-forward approach to generate my dataset.
In creating the dataset, each data point comprises of 21 features for 5 days. I've arranged these data points into sequences of 10. The shapes for my test data and labels are as follows:
X.shape = (3467, 10, 5, 21)
Y.shape = (3467, 10)
From my understanding of a TimeDistributed(ConvLSTM1D) layer, I would need the following shape: (samples, time, rows, channels)
I assume samples should be 10 and time should be 5. I'm not clear on how I should reshape my array to account for rows and channels. Do I have 21 rows of 1 channel, or 1 row of 21 channels? Does it even matter?
Below is my code:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import tensorflow as tf
import sys
import warnings
from datetime import datetime, timedelta
warnings.simplefilter(action='ignore', category=FutureWarning)
from sklearn.model_selection import train_test_split
from keras.models import Sequential, model_from_json
from keras.layers import Conv2D, Conv1D, MaxPooling1D, Bidirectional,LSTM,Dropout,TimeDistributed,Flatten, ConvLSTM1D, ConvLSTM2D
from keras.layers import Dense, RepeatVector, LeakyReLU
from keras.layers import BatchNormalization
from keras.callbacks import TensorBoard
from talib import DEMA, ROCR, ATR, RSI, SMA, ADX, CCI, MACD, OBV, SAR, EMA, T3
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from matplotlib.ticker import AutoMinorLocator, FixedLocator
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif, f_regression
from sklearn import preprocessing
os.environ['CUDA_VISIBLE_DEVICES'] = '-1' #stop using GPU since there's a problem
def setup_TA(dataframe):
#Add these indicators
dataframe['DEMA']=DEMA(dataframe['Close'])
dataframe['ROCR']=ROCR(dataframe['Close'])
dataframe['ATR']=ATR(dataframe['High'], dataframe['Low'], dataframe['Close'])
dataframe['RSI']=RSI(dataframe['Close'])
dataframe['SMA']=SMA(dataframe['Close'])
dataframe['ADX']=ADX(dataframe['High'], dataframe['Low'], dataframe['Close'])
dataframe['CCI']=CCI(dataframe['High'], dataframe['Low'], dataframe['Close'])
dataframe['MACD'], dataframe['MACD Signal'], dataframe['MACD Hist'] = MACD(dataframe['Close'])
dataframe['OBV']=OBV(dataframe['Close'],dataframe['Volume'])
dataframe['SAR']=SAR(dataframe['High'], dataframe['Low'])
dataframe['EMA']=EMA(dataframe['Close'])
dataframe['T3']=T3(dataframe['Close'])
# Lets make everything relative to the close price as far as we can.
#Reduce these scales to between 0-1
dataframe['RSI'] = dataframe['RSI'] / 100
dataframe['CCI'] = dataframe['CCI'] / 100
dataframe['ADX'] = dataframe['ADX'] / 100
dataframe['SAR'] = dataframe['SAR'] / dataframe['Close']
dataframe['ATR'] = dataframe['ATR'] / dataframe['Close']
dataframe['OBV'] = dataframe['OBV'] / dataframe['Volume']
dataframe['Open'] = dataframe['Open'] / dataframe['Close']
dataframe['High'] = dataframe['High'] / dataframe['Close']
dataframe['Low'] = dataframe['Low'] / dataframe['Close']
dataframe['DEMA'] = dataframe['DEMA'] / dataframe['Close']
dataframe['SMA'] = dataframe['SMA'] / dataframe['Close']
dataframe['EMA'] = dataframe['EMA'] / dataframe['Close']
dataframe['T3'] = dataframe['T3'] / dataframe['Close']
dataframe['ROCR'] = dataframe['ROCR'] - 1
#Remove any rows with NaN
rows_with_nan=[]
for index, rows in dataframe.iterrows():
if rows.isnull().any():
rows_with_nan.append(index)
dataframe.drop(rows_with_nan,inplace=True)
dataframe=dataframe.reset_index(drop=True)
return dataframe
def feature_selection(dataframe):
selector = SelectKBest(score_func=f_regression, k='all')
selector.fit_transform(dataframe.drop(['Labels'], 1).to_numpy(), dataframe['Labels'].to_numpy())
sorted = (selector.scores_).argsort()[:]
print(sorted)
print(dataframe.columns[sorted])
print(selector.scores_)
print(list(dataframe))
#Open, High, Low, Close, Volumen, compound, msg count, DEMA, ROCR, ATR, RSI, SMA, ADX, CCI, MACD, MACD Signal, MACD Hist, OBV, SAR, EMA, T3
raw_data = pd.read_csv('./Test/test.csv', index_col=0)
ta_df = setup_TA(raw_data)
ta_df['msg count'] +=1
ta_df['Shifted Close'] = ta_df['Close'].shift(1)
ta_df['Shifted Volume'] = ta_df['Volume'].shift(1)
ta_df['Shifted msg count'] = ta_df['msg count'].shift(1)
ta_df['Close'] = np.log(ta_df['Close']/ta_df['Shifted Close'])
ta_df['Volume'] = np.log(ta_df['Volume']/ta_df['Shifted Volume'])
ta_df['msg count'] = np.log(ta_df['msg count']/ta_df['Shifted msg count'])
ta_df.drop(['Shifted Close', 'Shifted Volume', 'Shifted msg count'], 1, inplace=True)
ta_df = ta_df.drop(0,0).reset_index(drop=True)
ta_df['Close'] = (ta_df['Close'] -ta_df['Close'].mean())/ta_df['Close'].std()
ta_df['Volume'] = (ta_df['Volume'] - ta_df['Volume'].mean())/ta_df['Volume'].std()
ta_df['compound'] = (ta_df['compound'] - ta_df['compound'].mean())/ta_df['compound'].std()
ta_df['msg count'] = (ta_df['msg count'] - ta_df['msg count'].mean())/ta_df['msg count'].std()
ta_df['OBV'] = (ta_df['OBV'] - ta_df['OBV'].mean())/ta_df['OBV'].std()
ta_df['Labels'] = ta_df['Close'].shift(-1)
ta_df=ta_df[:-1]
clean_df = ta_df.drop(['Date','Labels'],1)
#create samples(5 day window, 10 previous weeks?)
X=[]
Y=[]
for k in range(len(clean_df)-10-5):
temp_x=[]
temp_y=[]
for i in range(10):
start_index=k+i
end_index = start_index + 4
temp2_x=[]
for j in range(start_index, end_index+1):
temp2_x.append(clean_df.xs(j))
temp_x.append(temp2_x)
temp_y.append(ta_df.iloc[end_index,ta_df.columns.get_loc('Labels')])
X.append(temp_x)
Y.append(temp_y)
pd.set_option('display.max_rows',None)
X=np.array(X)
Y=np.array(Y)
#X = X.reshape(X.shape[0],X.shape[1],X.shape[2],1,X.shape[3])
#Y = Y.reshape(Y.shape[0],Y.shape[1],1)
print(X.shape)
print(Y.shape)
train_X, test_X, train_Y, test_Y = train_test_split(X, Y, test_size=0.2, shuffle=False)
model = Sequential()
model.add(TimeDistributed(ConvLSTM1D(filters = 21, kernel_size=(3), padding='same', return_sequences=True, input_shape=(10,5,21,1))))
model.add(TimeDistributed(ConvLSTM1D(filters = 21, kernel_size=(3), padding='same', return_sequences=False)))
model.add(TimeDistributed((Flatten())))
model.add(TimeDistributed(Dense(21)))
model.add(Dense(1))
model.compile(loss='mse', optimizer='adam', metrics = ['mse'])
history = model.fit(train_X, train_Y, validation_data=(test_X, test_Y), epochs = 100, batch_size = 64, shuffle=False, verbose=1)
Hello I am new to building models in python and I am trying to learn because I need to train a model using Python and extract its weights and biases to build the model on FPGA
I was following this tutorial:
https://medium.com/#curiousily/human-activity-recognition-using-lstms-on-android-tensorflow-for-hackers-part-vi-492da5adef64
I have been trying to implement the same model in the previous link using Keras. However, when I tried to train the keras model the accuracy was 0.0905 eventhough it has the same structure as the tensorflow model.
import keras.layers
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from scipy import stats
from sklearn import metrics
import seaborn as sns
from keras.utils.vis_utils import plot_model
import pydot as py
RANDOM_SEED = 42
#Reading Dataset
columns = ['user', 'activity', 'timestamp', 'x_axis', 'y_axis', 'z_axis']
df = pd.read_csv('WISDM_ar_v1.1_raw.txt', header=None, names=columns)
df = df.dropna()
#data_preprocessing
N_TIME_STEPS = 200
N_FEATURES = 3
step = 20
segments = []
labels = []
for i in range(0, len(df) - N_TIME_STEPS, step):
xs = df['x_axis'].values[i:i + N_TIME_STEPS]
ys = df['y_axis'].values[i:i + N_TIME_STEPS]
zs = df['z_axis'].values[i:i + N_TIME_STEPS]
# Note that we take the most common activity and assign it as a label for the sequence.
label = stats.mode(df['activity'][i:i + N_TIME_STEPS])[0][0]
segments.append([xs, ys, zs])
labels.append(label)
#print(np.array(segments).shape)
#(54901,3,200)
reshaped_segments = np.array(segments, dtype=np.float32).reshape(-1, N_TIME_STEPS, N_FEATURES)
#print(reshaped_segments.shape)
#(54901,200,3)
# Labels one hot encoding
labels = np.array(pd.get_dummies(labels), dtype=np.float32)
#print(labels.shape)
#(54901,6)
X_train, X_test, y_train, y_test = train_test_split(reshaped_segments, labels, test_size=0.2, random_state=RANDOM_SEED)
N_CLASSES = 6
N_HIDDEN_UNITS = 64
model = Sequential()
model.add(
LSTM((N_HIDDEN_UNITS),input_shape=(N_TIME_STEPS,N_FEATURES),return_sequences=True,recurrent_activation='relu'))
model.add(LSTM(labels.shape[1],return_sequences=False,recurrent_activation='relu'))
print(model.summary())
opt = keras.optimizers.Adam(learning_rate=0.0025)
model.compile(loss= 'categorical_crossentropy',optimizer=opt,metrics=['categorical_accuracy'])
plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)
history = model.fit(X_train,y_train,epochs=50,batch_size=1024)
print(model.get_weights())
predictions = model.predict(X_test)
plt.plot(history.history['loss'])
plt.show()
categories = ['Downstairs', 'Jogging', 'Sitting', 'Standing', 'Upstairs', 'Walking']
max_test = np.argmax(y_test, axis=1)
max_predictions = np.argmax(predictions, axis=1)
confusion_matrix = metrics.confusion_matrix(max_test, max_predictions)
plt.figure(figsize=(16, 14))
sns.heatmap(confusion_matrix, xticklabels=categories, yticklabels=categories, annot=True, fmt="d");
plt.title("Confusion matrix")
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
model.save('mymodel')
This is my Keras implemenation, if someone can guide me on what is the difference between both models or if I am missing something I would be very grateful
I'm working with some code that classifies the infamous dog vs cat image classification using a ResNet-18 model and I'd like to extend it to be able to classify for greater than two image categories (like dog vs cat vs hamster vs ....). In particular I've got 5 categories. I'm new at transfer learning and I'm not sure what I have to change in my code to make this work.
import torch
import numpy as np
import torch.nn.functional as F
from torch.nn import Linear
from torch.utils.data import DataLoader, random_split
from torch.optim import Adam
from torchvision.transforms import Compose, Resize, ToTensor
from torchvision.datasets import ImageFolder
from torchvision.models import resnet18
from matplotlib import pyplot as plt
import random
transform = Compose([Resize((128,128)), ToTensor()])
ds = ImageFolder("*Image_Folder*", transform=transform)
ds_train, ds_val = random_split(ds, [3250, 1073])
dl_train = DataLoader(ds_train, batch_size= 32, shuffle=True)
dl_val = DataLoader(ds_val, batch_size= len(ds_val), shuffle= True)
model = resnet18(pretrained=True)
model.requires_grad_(False)
model.fc = Linear(model.fc.in_features, 5)
X_val, y_val = next(dl_val.__iter__())
opt = torch.optim.Adam(model.parameters(), lr=0.001)
def accuracy(yy, y):
return torch.mean(1.0*(yy == y))
X_val.shape, y_val.shape
y_val = y_val.reshape(-1, 1).float()
for epoch in range(10):
losses = []
accs = []
losses_val = []
accs_val = []
model.train()
for X, y in dl_train:
y = y.reshape(-1, 1).float()
yy = torch.sigmoid(model(X))
loss = F.binary_cross_entropy(yy, y)
losses.append(loss.item())
loss.backward()
opt.step()
opt.zero_grad()
acc = accuracy(torch.round(yy), y)
accs.append(acc.item())
model.eval()
with torch.no_grad():
yy_val = torch.sigmoid(model(X_val))
loss_val = F.binary_cross_entropy(yy_val, y_val)
losses_val.append(loss_val.item())
acc_val = accuracy(torch.round(yy_val), y_val)
accs_val.append(acc_val.item())
print(f"Epoch {epoch}: t-loss = {np.mean(losses):.4f}, t-acc = {np.mean(accs):.4f}, v-loss = {loss_val:.4f}, v-acc = {acc_val:.4f}")
I believe the code is fine up to the for loop, however it could be something I need to add or alter. Currently the line loss = F.binary_cross_entropy(yy, y) is what's giving me an error ValueError: Using a target size (torch.Size([32, 1])) that is different to the input size (torch.Size([32, 5])) is deprecated. Please ensure they have the same size.
This is the data I'm working from: https://www.kaggle.com/alxmamaev/flowers-recognition
Binary Cross Entropy is a loss function designed for binary classification tasks.
In order to convert this model into one capable of 5-class classification, in addition to changing the final layer's width to 5, you need to change the loss function to a multinomial scorer e.g. CrossEntropyLoss().
I wrote a small
"Linear Regression Neural Network Tensorflow Keras Python program"
Input dataset is
y = mx + c straight line data.
Predicted y values are not correct and are giving horizontal line kind of
values, instead of a line with some slope.
I ran this program on Windows laptop with tensorflow, Keras and
Jupyter notebook.
What to do to fix this program please?
Thanks and best regards,
SSJ
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
n2 = 50
count = 20
n4 = n2 + count
p = 100
m = 10
c = 5
x = np.linspace(n2, n4, p)
y = m * x + c
x
y
plt.scatter(x,y)
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing
x_normalizer = preprocessing.Normalization(input_shape=[1,])
x_normalizer.adapt(x)
x_normalized = x_normalizer(x)
y_normalizer = preprocessing.Normalization(input_shape=[1,])
y_normalizer.adapt(y)
y_normalized = x_normalizer(y)
y_model = tf.keras.Sequential([
y_normalizer,
layers.Dense(1)
])
y_model.compile(optimizer='rmsprop', loss='mse', metrics = ['mae'])
y_hist = y_model.fit(x, y, epochs=100, verbose=0, validation_split = 0.2)
hist = pd.DataFrame(y_hist.history)
hist['epoch'] = y_hist.epoch
hist.head()
hist.tail()
xin = [51,53,59,64]
ypred = y_model.predict(xin)
ypred
plt.scatter(x, y)
plt.scatter(xin, ypred, color = 'r')
plt.grid(linestyle = '--')
Use StandardScaler instead of Normalization
Normalizer acts row-wise and StandardScaler column-wise.
Normalizer does not remove the mean and scale by deviation but scales
the whole row to unit norm.
Found here: Difference between StandardScaler and Normalizer
This is how you can process the data:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import StandardScaler
x = np.linspace(50, 70, 100).reshape(-1, 1)
y = 10 * x + 5
x_standard_scaler = StandardScaler().fit(x)
y_standard_scaler = StandardScaler().fit(y)
x_scaled = x_standard_scaler.transform(x)
y_scaled = y_standard_scaler.transform(y)
Remember that you need two separate scalers for x and y so don't use the same object for that. Also if you want to use that scaler to process new data for testing, save the scaler in some variable. A good practice is to not refit the scaler again on test data.
model = Sequential([
Dense(1, input_dim=1, activation='linear'),
])
model.compile(optimizer='rmsprop', loss='mse')
history = model.fit(x_scaled, y_scaled, epochs=1000, verbose=0, validation_split = 0.2).history
pd.DataFrame(history).plot()
plt.show()
As you can see the model is converging. Its worth to plot the loss history which helps to tell if your model is learning or not.
x_test = np.linspace(20, 100, 10).reshape(-1, 1)
y_test = 10 * x_test + 5
x_test_scaled = x_standard_scaler.transform(x_test)
y_test_scaled = y_standard_scaler.transform(y_test)
If you have a test data that you want to use for validation or just predict it, remember to use standard scaler again, but without fitting. It should be fitted on train data only in most cases.
y_test_pred_scaled = model.predict(x_test_scaled)
y_test_pred = y_standard_scaler.inverse_transform(y_test_pred_scaled)
plt.scatter(x_test, y_test, s=30, label='true')
plt.scatter(x_test, y_test_pred, s=15, label='pred')
plt.legend()
plt.show()
If you want to get your prediction rescaled back to its original range use inverse_transform. Notice that prediction on x_test after rescaling is very close to y_test.
I want to get a confusion matrix according to y_test and pred_test, but raise a question "At least one label specified must be in y_true",i don't know why
metrics.confusion_matrix(np.argmax(y_test,axis=1),pred_test)
y_test = [[0. 1. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 1.]
[0. 0. 0. 0. 1. 0.]
...
[0. 0. 0. 1. 0. 0.]
[0. 0. 1. 0. 0. 0.]
[0. 0. 1. 0. 0. 0.]]
pred_test = [1 4 5 ... 3 2 2]
np.argmax(y_test,axis=1) = [1 5 4 ... 3 2 2]
File "D:\Anaconda\lib\site-packages\sklearn\metrics\classification.py", line 259, in confusion_matrix
raise ValueError("At least one label specified must be in y_true")
ValueError: At least one label specified must be in y_true
I create a convolutional neural network. model and use cross validation for estimate, finally generate a confusion matrix. Now there are problems in generating confusion matrix.
The dataset is enter link description here.The complete code is as follows:
import matplotlib
#matplotlib.use('Agg')
import timing
from keras.layers import Input,Dense,Conv2D,MaxPooling2D,UpSampling2D,Flatten
from keras.models import Model
from keras import backend as K
from keras.utils.np_utils import to_categorical
import numpy as np
import pandas as pd
import seaborn as sns
from keras.models import Sequential# 导入Sequential
from keras.utils import np_utils, generic_utils
from keras.callbacks import LearningRateScheduler
import os
from keras.layers import Dropout
from keras.backend.tensorflow_backend import set_session
import tensorflow as tf
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.cross_validation import KFold, StratifiedKFold
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn import metrics
import time
from scipy import stats
from keras import optimizers
import matplotlib.pyplot as plt
from keras import regularizers
import keras
from keras.callbacks import TensorBoard
config = tf.ConfigProto(allow_soft_placement=True)
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9)
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
time1 = time.time()
class LossHistory(keras.callbacks.Callback):
def on_train_begin(self, logs={}):
self.losses = {'batch':[], 'epoch':[]}
self.accuracy = {'batch':[], 'epoch':[]}
self.val_loss = {'batch':[], 'epoch':[]}
self.val_acc = {'batch':[], 'epoch':[]}
def on_batch_end(self, batch, logs={}):
self.losses['batch'].append(logs.get('loss'))
self.accuracy['batch'].append(logs.get('acc'))
self.val_loss['batch'].append(logs.get('val_loss'))
self.val_acc['batch'].append(logs.get('val_acc'))
def on_epoch_end(self, batch, logs={}):
self.losses['epoch'].append(logs.get('loss'))
self.accuracy['epoch'].append(logs.get('acc'))
self.val_loss['epoch'].append(logs.get('val_loss'))
self.val_acc['epoch'].append(logs.get('val_acc'))
def loss_plot(self, loss_type):
iters = range(len(self.losses[loss_type]))
plt.figure()
# acc
plt.plot(iters, self.accuracy[loss_type], 'r', label='train acc')
# loss
plt.plot(iters, self.losses[loss_type], 'g', label='train loss')
if loss_type == 'epoch':
# val_acc
plt.plot(iters, self.val_acc[loss_type], 'b', label='val acc')
# val_loss
plt.plot(iters, self.val_loss[loss_type], 'k', label='val loss')
plt.grid(True)
plt.xlabel(loss_type)
plt.ylabel('acc-loss')
plt.legend(loc="center")
plt.show()
#plt.savefig('common.png')
#dataset
RANDOM_SEED = 42
def read_data(file_path):
column_names = ['user-id', 'activity', 'timestamp', 'x-axis', 'y-axis', 'z-axis']
m = pd.read_csv(file_path,names=column_names, header=None,sep=',')
return m
def feature_normalize(dataset):
mu = np.mean(dataset,axis=0)
sigma = np.std(dataset,axis=0)
return (dataset-mu)/sigma
dataset1 = read_data('ab.txt')
dataset = pd.DataFrame(dataset1)
dataset['x-axis'] = feature_normalize(dataset['x-axis'])
dataset['y-axis'] = feature_normalize(dataset['y-axis'])
dataset['z-axis'] = feature_normalize(dataset['z-axis'])
N_TIME_STEPS = 200
N_FEATURES = 3
step = 200
segments = []
labels = []
for i in range(0, len(dataset) - N_TIME_STEPS, step):
xs = dataset['x-axis'].values[i: i + N_TIME_STEPS]
ys = dataset['y-axis'].values[i: i + N_TIME_STEPS]
zs = dataset['z-axis'].values[i: i + N_TIME_STEPS]
label = stats.mode(dataset['activity'][i: i + N_TIME_STEPS])[0][0]
segments.append([xs, ys, zs])
labels.append(label)
print("reduced size of data", np.array(segments).shape)
reshaped_segments = np.asarray(segments,dtype=np.float32).reshape(-1,1, N_TIME_STEPS, 3)
print("Reshape the segments", np.array(reshaped_segments).shape)
#x_train1, x_val_test, y_train1, y_val_test = train_test_split(reshaped_segments, labels, test_size=0.25, random_state=RANDOM_SEED)
batch_size = 128
num_classes =6
def create_model():
input_shape = Input(shape=(1,200,3))
x = Conv2D(5, kernel_size=(1, 1), padding='valid')(input_shape)
x1 = keras.layers.concatenate([input_shape, x], axis=-1)
x = Conv2D(50, kernel_size=(1, 7),padding='valid',
kernel_initializer='glorot_uniform',
kernel_regularizer = keras.regularizers.l2(0.0015))(x1)
x = keras.layers.core.Activation('relu')(x)
x = MaxPooling2D(pool_size=(1, 2))(x)
x = Conv2D(50, kernel_size=(1, 7),padding='valid',kernel_initializer='glorot_uniform',
kernel_regularizer=keras.regularizers.l2(0.0015))(x)
x = keras.layers.core.Activation('relu')(x)
x = MaxPooling2D(pool_size=(1, 2))(x)
x = Flatten()(x)
x = Dropout(0.9)(x)
output = Dense(num_classes, activation='softmax',kernel_initializer='glorot_uniform',)(x)
model = Model(inputs=input_shape,outputs=output)
model.summary()
sgd = optimizers.SGD(lr=0.005,decay=1e-6,momentum=0.9,nesterov=True)
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=sgd,
metrics=['accuracy'])
return model
history = LossHistory()
epochs = 4000
#setting learning rate
def scheduler(epoch):
if epoch > 0.75 * epochs:
lr = 0.0005
elif epoch > 0.25 * epochs:
lr = 0.001
else:
lr = 0.005
return lr
scheduler = LearningRateScheduler(scheduler)
estimator = KerasClassifier(build_fn=create_model)
#divide dataset
scores = []
confusions = []
sign = ['DOWNSTAIRS','JOGGING','SITTING','STANDING','UPSTAIRS','WALKING']
encoder = LabelEncoder()
encoder_y = encoder.fit_transform(labels)
train_labels = to_categorical(encoder_y,num_classes=None)
#kfold = StratifiedKFold(reshaped_segments.shape[0],n_folds=10,shuffle=True,random_state=42)
kfold = StratifiedKFold(labels,n_folds=3,shuffle=True,random_state=42)
for train_index,test_index in kfold:
print(test_index)
x_train, x_test = reshaped_segments[train_index], reshaped_segments[test_index]
y_train, y_test = train_labels[train_index], train_labels[test_index]
estimator.fit(x_train,y_train,callbacks=[scheduler,history],epochs=10,batch_size=128,verbose=0)
scores.append(estimator.score(x_test,y_test))
print(y_test)
print(type(y_test))
pred_test = estimator.predict(x_test)
print(pred_test)
print(np.argmax(y_test,axis=1))
confusions.append(metrics.confusion_matrix(np.argmax(y_test,axis=1),pred_test,sign))
matrix = [[0,0,0,0,0,0],[0,0,0,0,0,0],[0,0,0,0,0,0],[0,0,0,0,0,0],[0,0,0,0,0,0],[0,0,0,0,0,0]]
for i in np.arange(n_folds-1):
for j in len(confusions[0]):
for k in len(confusions[0][0]):
matrix[j][k] = matrix[j][k] + confusions[i][j][k] + confusions[i+1][j][k]
model.save('model.h5')
model.save_weights('my_model_weights.h5')
print('score:',scores)
scores = np.mean(scores)
print('mean:',scores)
plt.figure(figsize=(16,14))
sns.heatmap(matrix, xticklabels=sign, yticklabels=sign, annot=True, fmt="d");
plt.title("CONFUSION MATRIX : ")
plt.ylabel('True Label')
plt.xlabel('Predicted label')
plt.savefig('cmatrix.png')
plt.show();
The error isn't in your main code but rather in the definition of sign. When you define sign as
sign = ['DOWNSTAIRS','JOGGING','SITTING','STANDING','UPSTAIRS','WALKING']
the system cannot read your labels as it is looking for the labels 0,1,2,3,4,5 as what the error was trying to say i.e. it could not find any labels in sign in y_pred.
changing sign to
sign = [1,2,3,4,5]
should fix the error. As for what you do now , its rather simple just map your result as this array and then during the actual predictions(Deployment) just swap out the numeric values for the labels.