Speech Emotion Recognition model predicting wrong classes

Speech Emotion Recognition model predicting wrong classes - python

I have created a model to predict emotion from a voice sample, the model is made from the code below:
there are total 8 emotions:
neutral, calm, happy, sad, angry, disgust, surprised
i first extracted the features of each and every voice sample and put them in a dataframe, then loaded
them one by one to both X and (labels to Y) then split the data as shown below:
x_train, x_test, y_train, y_test = train_test_split(X, Y, random_state=0, shuffle=True)
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)
x_train = np.expand_dims(x_train, axis=2)
x_test = np.expand_dims(x_test, axis=2)
model=Sequential()
model.add(Conv1D(256, kernel_size=5, strides=1, padding='same', activation='relu', input_shape=(x_train.shape[1], 1)))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(Conv1D(256, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(Conv1D(128, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(Dropout(0.2))
model.add(Conv1D(64, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(Flatten())
model.add(Dense(units=32, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(units=8, activation='softmax'))
model.compile(optimizer = 'adam' , loss = 'categorical_crossentropy' , metrics = ['accuracy'])
model.summary()
rlrp = ReduceLROnPlateau(monitor='loss', factor=0.4, verbose=0, patience=2, min_lr=0.0000001)
history=model.fit(x_train, y_train, batch_size=64, epochs=75, validation_data=(x_test, y_test), callbacks=[rlrp])
got total 89% accuracy
But when i try to predict any emotion, even with a voice sample from the training set, it always gives
"surprised" as the predicted emotion.
code below:
class livepredictions:
def __init__(self, path, file):
self.path = path
self.file = file
def loadmodel(self):
self.loaded_model = keras.models.load_model(self.path)
return self.loaded_model.summary()
def makepredictions(self):
x = []
scaler = StandardScaler()
x = extract_features(self.file)
Features = pd.DataFrame(x)
x = np.expand_dims(x, axis=0)
scaler.fit(x)
x = scaler.transform(x)
x = np.expand_dims(x, axis=2)
predictions = self.loaded_model.predict_classes(x)
print(predictions)
print("Prediction is", " ", self.convertclasstoemotion(predictions))
#staticmethod
def convertclasstoemotion(pred):
label_conversion = {'0': 'neutral',
'1': 'calm',
'2': 'happy',
'3': 'sad',
'4': 'angry',
'5': 'fear',
'6': 'disgust',
'7': 'surprised'}
for key, value in label_conversion.items():
if int(key) == pred:
label = value
return label
what am i doing wrong?

Related

ValueError: Input 0 of layer sequential_4 is incompatible with the layer: : expected min_ndim=3, found ndim=2. Full shape received: (None, 5000)

I am working on a project where I need to segregate the sleep data and its labels.
But I am stuck with the error mentioned above.
As I am new to the machine learning side I would be highly grateful if someone can help me out with how can I resolve this issue.
I have implemented a model using the following code:
EEG_training_data = EEG_training_data.reshape(EEG_training_data.shape[0], EEG_training_data.shape[1],1)
print(EEG_training_data.shape)# (5360, 5000, 1)
EEG_validation_data = EEG_validation_data.reshape(EEG_validation_data.shape[0], EEG_validation_data.shape[1],1)
print(EEG_validation_data.shape)#(1396, 5000, 1)
label_class = (np.unique(EEG_training_label))
num_classes = label_class.size # num_classes = 5
#define the model using CNN
model = tf.keras.Sequential()
model.add(tf.keras.layers.Conv1D(filters=64, kernel_size= 16, activation='relu', batch_input_shape=(None,5000, 1))) # #input_shape=(5000, 1)
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.MaxPool1D(8, padding='same'))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(16, activation='relu'))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.Dense(num_classes, activation='softmax'))
#Summary of the model defined:
model.summary()
#Define loss function
model.compile(
loss= 'categorical_crossentropy', # 'sparse_categorical_crossentropy',
optimizer='adam',
metrics=[tf.keras.metrics.FalseNegatives(), tf.keras.metrics.FalsePositives(), 'accuracy'])
#one Hot Encoding
y_train_hot = tf.keras.utils.to_categorical(EEG_training_label, num_classes)
print('New y_train shape: ', y_train_hot.shape)#(5360, 5)
y_valid_hot = tf.keras.utils.to_categorical(EEG_validation_label, num_classes)
print('New y_valid shape: ', y_valid_hot.shape)#(1396, 5)
# apply fit on data
model_history = model.fit(
x=EEG_training_data,
y=y_train_hot,
batch_size=32,
epochs=5,
validation_data=(EEG_validation_data, y_valid_hot),
)
model_prediction = model.predict(EEG_testing_data)
predicted_matrix = tf.math.confusion_matrix(labels=EEG_testing_label.argmax(axis=1), predictions=model_prediction.argmax(axis=1)).numpy()
print(predicted_matrix)

I'm not experiencing your issue with the code you have provided. Try executing the following code, that should work as expected. If that is the case, double check that the shapes of all your data, i.e. EEG_training_data etc. are like the ones below:
import tensorflow as tf
import numpy as np
EEG_training_data = np.ones((5360, 5000, 1))
EEG_validation_data = np.ones((1396, 5000, 1))
EEG_training_label = np.random.randint(5, size=5360)
EEG_validation_label = np.random.randint(5, size=1396)
label_class = (np.unique(EEG_training_label))
num_classes = label_class.size
print(num_classes) # prints 5
#define the model using CNN
model = tf.keras.Sequential()
model.add(tf.keras.layers.Conv1D(filters=64, kernel_size= 16, activation='relu', batch_input_shape=(None, 5000, 1))) # input_shape=(5000, 1)
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.MaxPool1D(8, padding='same'))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(16, activation='relu'))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.Dense(num_classes, activation='softmax'))
#Summary of the model defined:
model.summary()
#Define loss function
model.compile(loss = 'categorical_crossentropy',
optimizer='adam',
metrics=[tf.keras.metrics.FalseNegatives(), tf.keras.metrics.FalsePositives(), 'accuracy'])
#one Hot Encoding
y_train_hot = tf.keras.utils.to_categorical(EEG_training_label, num_classes)
print('New y_train shape: ', y_train_hot.shape) #(5360, 5)
y_valid_hot = tf.keras.utils.to_categorical(EEG_validation_label, num_classes)
print('New y_valid shape: ', y_valid_hot.shape) #(1396, 5)
# apply fit on data
model_history = model.fit(x=EEG_training_data,
y=y_train_hot,
batch_size=32,
epochs=5,
validation_data=(EEG_validation_data, y_valid_hot),
)
model_prediction = model.predict(EEG_validation_data)
predicted_matrix = tf.math.confusion_matrix(labels=EEG_validation_label.argmax(axis=1), predictions=model_prediction.argmax(axis=1)).numpy()
print(predicted_matrix)

Unknown entries in loss dictionary: Only expected following keys ['conv1d_4', 'conv1d_4']

I have a model with multiple outputs. I want to assign different labels to each of the loss functions and metrics. The code is as given below:
input_img = Input(shape=(n_states,n_features))
x = Conv1D(32, kernel_size=5, activation='relu', padding='same')(input_img)
x = Conv1D(32, kernel_size=5, activation='relu', padding='same')(x)
x = Conv1D(32, kernel_size=5, activation='relu', padding='same')(x)
decoded = Conv1D(n_outputs, kernel_size=3, activation='linear', padding='same')(x)
model = Model(inputs=input_img, outputs=[decoded,decoded])
model.compile(loss={'regression': 'mean_squared_error',
'diffusion': 'mean_absolute_error'},
loss_weights={'regression': 1.0,
'diffusion': 0.5},
optimizer='adam',
metrics={'regression': coeff_determination,
'diffusion': coeff_determination})
model.summary()
history_callback = model.fit(x_train,
{'regression': y_train, 'diffusion': y_train},
batch_size=batch_size,
epochs=epochs,
validation_data= (x_valid, {'regression': y_valid, 'diffusion': y_valid}),
verbose=1)
If I run the above model, I get an error of unknown entries in the loss dictionary. Specifically, the error isUnknown entries in loss dictionary: ['diffusion', 'regression']. Only expected following keys: ['conv1d_4', 'conv1d_4'].
How can I give different names to each of the loss function? Thank you.

You need to match the names of your outputs with the loss dictionary keys. Here, you didn't name your outputs, so they default to conv1d_4 in the name space. Try:
decoded1 = Conv1D(n_outputs, kernel_size=3, activation='linear',
padding='same', name='diffusion')(x)
decoded2 = Conv1D(n_outputs, kernel_size=3, activation='linear',
padding='same', name='regression')(x)
I doubled your output because I don't think you can apply two different losses to the same output.
Here's a minimal example of matching output/loss dictionary keys:
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Dense
import numpy as np
x_train = np.random.rand(1000, 10)
y_train = np.random.rand(1000)
inputs = Input(shape=(10,))
x = Dense(32, activation='relu')(inputs)
out1 = Dense(1, name='first_output')(x)
out2 = Dense(1, name='second_output')(x)
model = Model(inputs=inputs, outputs=[out1, out2])
model.compile(loss={'first_output': 'mean_squared_error',
'second_output': 'mean_absolute_error'},
optimizer='adam')
history_callback = model.fit(x_train,
{'first_output': y_train, 'second_output': y_train},
batch_size=8, epochs=1)
Notice that loss dictionary keys match the output keys. The same should be done with metrics, loss weights, validation data, etc.

Unable to preduction right values from my Regression Model

Predicting head poses to get output in the form of roll pitch yawn by using the model in the image, but the result is not accurate. Dataset has a total of 5500 images
What values should be:
Roll: 0.67°
Pitch: -4.89°
Yaw: 22.57°
Values from my model:
Roll: 356.10°
Pitch: 1036.82°
Yaw: 532.35°
`
#opening the dataset
a,b,c,d = pkl.load(open('samples.pkl', 'rb'))
#concatenating the 2 arrays
x = np.concatenate((a,b), axis=0)
y = np.concatenate((c,d), axis=0)
#Assigning degrees to roll pitch and yaw
roll, pitch, yaw = y[:, 0], y[:, 1], y[:, 2]
#test and train division
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=2)
x_val, x_test, y_val, y_test = train_test_split(x_test, y_test, test_size=0.2, random_state=2)
print(x_train.shape, y_train.shape)
print(x_val.shape, y_val.shape)
print(x_test.shape, y_test.shape)
#standazing the test and train sets
std = StandardScaler()
std.fit(x_train)
x_train = std.transform(x_train)
x_val = std.transform(x_val)
x_test = std.transform(x_test)
BATCH_SIZE = 64
EPOCHS = 100
#model of cnn
model = Sequential()
model.add(Dense(units=20, activation='relu', kernel_regularizer='l2', input_dim=x.shape[1]))
model.add(Dense(units=10, activation='relu', kernel_regularizer='l2'))
model.add(Dense(units=3, activation='linear'))
print(model.summary())
#compiling the model
callback_list = [EarlyStopping(monitor='val_loss', patience=25)]
model.compile(optimizer='adam', loss='mean_squared_error',metrics=['accuracy'])
hist = model.fit(x=x_train, y=y_train, validation_data=(x_val, y_val), batch_size=BATCH_SIZE, epochs=EPOCHS, callbacks=callback_list)
model.save('model.h5')
`

First of all predicting head poses from images is not a basic regression problem, it is, in fact, a computer vision problem that needs some computer vision-based approach.
Your model is a dummy MLP that will never perform well with the given problem.
I'm giving you some pointers, you still need to figure out which one works based on experimentation.
Instead of using an MLP, start with a ConvNet. Here's a simple CNN for regression.
model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same',
input_shape=x_train.shape[1:]))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(3))
Get rid of the Flatten, and use Global Pooling if the model overfits.
Try to normalize your output. Instead of blindly regressing, range the roll, pitch to 0 - 1 using min-max normalization. Use sigmoid in the last layer in this case.
Here's a paper to get the basic idea and start with a baseline model: https://aip.scitation.org/doi/pdf/10.1063/1.4982509

Keras Face Recognition model bad accuracy

I am struggling for some days to create my own model for face recognition with Keras in python. I will walk you through my setup and give you my code, I'm sure there is something I'm not doing right but I can't tell what. So, my dataset is composed of 97 people with an average of 10 photos per person. The total number of photos is 1106. The dataset is in a folder "faces" and the pictures for each person are in folders named from 0 to 96. Here is the code I am using to process and split my data into train, validation and test and also to create my CNN using Keras. Any help or suggestions are much appreciated, thank you!
channels = 3
rows = 50
cols = 50
classes = 97
data = []
labels = []
images = sorted(list(paths.list_images("faces")))
for image in images:
img = cv2.imread(image)
img = cv2.resize(img, (rows, cols)).flatten()
data.append(img)
label = image.split(os.path.sep)[-2]
labels.append(label)
data = np.array(data, dtype="float32") / 255.0
labels = [int(i) for i in labels]
labels = np.array(labels)
xtrain, testX, ytrain, testY = train_test_split(data, labels, test_size = 0.3, random_state = 13)
trainX, validX, trainY, validY = train_test_split(xtrain, ytrain, test_size = 0.2, random_state = 14)
lb = LabelBinarizer()
trainY = lb.fit_transform(trainY)
validY = lb.transform(validY)
testY = lb.transform(testY)
trainX = trainX.astype("float32")
validX = validX.astype("float32")
testX = testX.astype("float32")
trainY = trainY.astype("float32")
validY = validY.astype("float32")
testY = testY.astype("float32")
trainX = trainX.reshape([trainX.shape[0], cols, rows, channels])
validX = validX.reshape([validX.shape[0], cols, rows, channels])
testX = testX.reshape([testX.shape[0], cols, rows, channels])
weight_decay = 1e-4
model = Sequential()
model.add(Conv2D(32, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay), input_shape=trainX.shape[1:]))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(Conv2D(32, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.2))
model.add(Conv2D(64, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(Conv2D(64, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.3))
model.add(Conv2D(128, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(Conv2D(128, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.4))
model.add(Flatten())
model.add(Dense(classes))
model.add(Activation('softmax'))
datagen = ImageDataGenerator(horizontal_flip=True,
featurewise_center=False,
samplewise_center=False,
featurewise_std_normalization=False,
samplewise_std_normalization=False,
zca_whitening=False,
rotation_range=20,
width_shift_range=0.2,
height_shift_range=0.2,
vertical_flip=False)
datagen.fit(trainX)
optim = RMSprop(lr=0.001, decay=1e-6)
#sgd = SGD(lr=0.01, momentum=0.9)
model.compile(loss='categorical_crossentropy', optimizer=optim, metrics=['accuracy'])
model.fit_generator(datagen.flow(trainX, trainY, batch_size=64), steps_per_epoch = trainX.shape[0], epochs = 50, validation_data=(validX, validY), verbose = 1)
score = model.evaluate(testX, testY, batch_size = 64, verbose = 1)
print("Test score: ", score[0])
print("Test accuracy: ", score[1])
model_json = model.to_json()
open('face_architecture.json', 'w').write(model_json)
model.save_weights('face_weights.h5', overwrite=True)

TypeError: Input 'y' of 'Equal' Op has type float32 that does not match type int32 of argument 'x'

I'm pretty new to Keras and LSTMs. I've been trying to train my model of sequences to predict the future price of a stock with the code below but the error above kept popping up.
I have tried changing the dtypes of both x_data, y_data with .astype(np.float16). However, all times I am returned with the TypeError stating that I have a float32 type.
If it helps, here are the shapes of my data:
xtrain.shape : (32, 24, 67), ytrain.shape : (32, 24, 1), xtest.shape
: (38, 67), ytest.shape : (38, 1)
Does anyone have any idea on what might be wrong? I've been stuck at this for awhile. It would be great if someone could give me a hint.
y_data = y_data.to_numpy().astype(np.float32)
x_data = main_df.to_numpy().astype(np.float32)
num_x_signals = x_data.shape[1]
num_y_signals = y_data.shape[1]
# SPLIT TRAIN TEST DATA
ratio = 0.85
train_ratio = int(ratio * len(x_data))
x_train = x_data[0:train_ratio]
x_test = x_data[train_ratio:]
y_train = y_data[0:train_ratio]
y_test = y_data[train_ratio:]
# GENERATE RANDOM SEQUENCES
batch_size = 32
sequence_length = 24
EPOCHS = 50
def batch_generator(x_train, y_train, batch_size, sequence_length, num_x_signals, num_y_signals, num_train):
while True:
x_shape = (batch_size, sequence_length, num_x_signals)
x_batch = np.zeros(shape = x_shape).astype(np.float32)
y_shape = (batch_size, sequence_length, num_y_signals)
y_batch = np.zeros(shape = y_shape).astype(np.float32)
for i in range(batch_size):
idx = np.random.randint(num_train - sequence_length)
x_batch[i] = x_train[idx:idx+sequence_length]
y_batch[i] = y_train[idx:idx+sequence_length]
yield (x_batch, y_batch)
generator = batch_generator(x_train, y_train, batch_size, sequence_length, num_x_signals, num_y_signals, train_ratio)
xtrain, ytrain = next(generator)
xtest, ytest = (np.expand_dims(x_test, axis=0),
np.expand_dims(y_test, axis=0))
# LSTM MODEL
model = Sequential()
model.add(LSTM(32, input_shape = (None, num_x_signals,), return_sequences = True))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(LSTM(128, return_sequences = True))
model.add(Dropout(0.15))
model.add(BatchNormalization())
model.add(LSTM(128))
model.add(Dropout(0.18))
model.add(BatchNormalization())
model.add(Dense(32, activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation = 'softmax'))
opt = tf.keras.optimizers.Adam(lr = 0.001, decay = 1e-6)
model.compile(
loss = 'sparse_categorical_crossentropy',
optimizer = opt,
metrics = ['accuracy']
)
name_of_file = f"{to_predict}-{sequence_length}-{future_predict}-{int(time.time())}"
tensorboard = TensorBoard(log_dir = "logs/{}".format(name_of_file))
filepath = "LSTM_Final-{epoch:02d}-{val_acc:.3f}"
checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor = 'val_acc', verbose = 1, save_best_only = True, mode = 'max')) # saves only the best ones
history = model.fit(
xtrain, ytrain,
epochs = EPOCHS,
validation_data = (xtest, ytest),
callbacks = [tensorboard, checkpoint]
)
score = model.evaluate(xtest, ytest, verbose = 0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
model.save("models/{}".format(name_of_file))

I found this issue had to do with the loss function specified.
My code:
import tensorflow as tf
from tensorflow import keras
model = tf.keras.Sequential([
keras.layers.Dense(64, activation=tf.nn.relu, input_shape=[3]),
keras.layers.Dense(64, activation=tf.nn.relu),
keras.layers.Dense(1)
])
#I changed the loss function from 'sparse_categorical_crossentropy' to 'mean_squared error'
model.compile(optimizer='adam',loss='mean_squared_error',metrics=['accuracy'])
X = train_dataset.to_numpy()
y = train_labels.to_numpy()
model.fit(X,y, epochs=5)
X shape was (920,3) and dtype = float64
y shape was (920,1) and dtype = float64
My problem was in the model.fit method. I took the 'sparse_categorical_crossentropy' function from an image recognition example and what I was trying here is a neural network for house prices prediction.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Speech Emotion Recognition model predicting wrong classes - python

Related

ValueError: Input 0 of layer sequential_4 is incompatible with the layer: : expected min_ndim=3, found ndim=2. Full shape received: (None, 5000)

Unknown entries in loss dictionary: Only expected following keys ['conv1d_4', 'conv1d_4']

Unable to preduction right values from my Regression Model

Keras Face Recognition model bad accuracy

TypeError: Input 'y' of 'Equal' Op has type float32 that does not match type int32 of argument 'x'

Categories

Resources