OneHotEncoding for LSTM Categorical Sequence in TensorFlow

OneHotEncoding for LSTM Categorical Sequence in TensorFlow - python

From a set of categories labelled by numbers I am predicting the next category in the sequence. I have modeled this on a text generator (hence the random titles!).
I created a number for each category so it could be interpreted by keras and tensorflow as numerical information assigning these numbers through the enumerate function. It threw up an error suggesting I should use OneHotEncoding for outputs. I don't know how to proceed.
I have sampled what OneHotEncoding of the information would look like but I don't know how to work this into the body of the code going forward/ conversely how to change my code so that the input without OneHotEncoding works.
I don't think I understand M/c learning well enough just yet, I am teaching myself.
import numpy as np
from numpy import array
from numpy import argmax
import tensorflow as tf
import keras
from keras.utils import to_categorical
from keras.utils import np_utils
from keras.layers import LSTM
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import Input, Dense
from keras.layers import TimeDistributed
from keras.models import Model
data= ['10001426', '10001426','10001426','5121550', '5431000', '10001426', '10001426', '10001466','10001426','5121550', '10001426', '10001426', '10001426','10001426','5431000', '10001426', '10001426', '10001466','10001426','5121550', '5431000', '10001426', '10001426', '10001466','10001426','5121550', '5431000', '10001426', '10001426', '10001466','10001426','5121550', '5431000', '10001426', '10001426', '10001466','10001426','5121550']
data= array(data)
chars=['10001426','5121550','5431000','10001466']
chars= array(chars)
"""
#OneHotEncode - turns the category into an encoded array
encoded = to_categorical(data)
print(encoded)
encoded2 = to_categorical(chars)
print(encoded2)
#Invert OneHotEncode
inverted = argmax(encoded[0])
print inverted
inverted2 = argmax(encoded[0])
print inverted2
"""
#Parameters
SEQ_LENGTH = 2 # Learn in steps of 2
VOCAB_SIZE = len(chars) #numer of features - how many categories of fault
#Prepare training data
ix_to_char={ix:char for ix, char in enumerate(chars)}
char_to_ix={char:ix for ix, char in enumerate(chars)}
X= np.zeros((len(data)/SEQ_LENGTH, SEQ_LENGTH, VOCAB_SIZE))
y= np.zeros((len(data)/SEQ_LENGTH, SEQ_LENGTH, VOCAB_SIZE))
for i in range((len(data)/SEQ_LENGTH)):
if (i+1)*SEQ_LENGTH<len(data):
X_sequence = data[(i)*SEQ_LENGTH:(i+1)*SEQ_LENGTH]
X_sequence_ix=[char_to_ix[value] for value in X_sequence]
input_sequence= np.zeros((SEQ_LENGTH, VOCAB_SIZE))
print ((i+1)*SEQ_LENGTH, len(data))
print input_sequence
for j in range(SEQ_LENGTH):
input_sequence[j][X_sequence_ix[j]]=1.
X[i]=input_sequence
y_sequence = data[i*SEQ_LENGTH+1:(i+1)*(SEQ_LENGTH+1)]
y_sequence_ix = [char_to_ix[value] for value in y_sequence]
target_sequence= np.zeros((SEQ_LENGTH, VOCAB_SIZE))
for j in range(SEQ_LENGTH):
if (i+1)*(SEQ_LENGTH+1)<(SEQ_LENGTH):
target_sequence[j][y_sequence_ix[j]]=1
y[i]=target_sequence
print y[i]
#Create the network
HIDDEN_DIM=1
LAYER_NUM= 1
model = Sequential()
model.add(LSTM(HIDDEN_DIM, input_shape=(None, VOCAB_SIZE),
return_sequences=True))
for i in range(LAYER_NUM-1):
model.add(LSTM(HIDDEN_DIM, return_sequences=True))
model.add(Activation('softmax'))
model.compile(loss="categorical_crossentropy",optimizer="rmsprop")
#Train the network
nb_epoch = 0
BATCH_SIZE = 5
GENERATE_LENGTH = 7
while True:
print ('\n\n')
model.fit(X,y,batch_size=BATCH_SIZE,verbose=1, epochs=1)
nb_epoch +=1
generate_text(model, GENERATE_LENGTH)
if nb_epoch %5==0:
model.save_weights('checkpoint_{}_epoch_{}.hdf5'.format(HIDDEN_DIM, nb_epoch))
model.summary()

You forgot that your final layer should have an output of size VOCAB_SIZE. You could either do this by adding a special Dense layer:
for i in range(LAYER_NUM-1):
model.add(LSTM(HIDDEN_DIM, return_sequences=True))
model.add(Dense(VOCAB_SIZE))
model.add(Activation('softmax'))
model.compile(loss="categorical_crossentropy",optimizer="rmsprop")
or by setting appropriate output from last LSTM layer (I will skip code for this part as it's a little bit tedious).

Related

ValueError: Exception encountered when calling layer 'sequential' (type Sequential)

I am writing an AI that will predict its own one by one - for example, an array [22,1,456,2] is given, and it will predict [33,1,455,3].
But when I run this code:
from music21 import converter, instrument, note, chord
import tensorflow as tf
from scipy.interpolate import UnivariateSpline
from tensorflow import keras
import numpy as np
from sklearn.model_selection import train_test_split
from keras_preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, LSTM
import tensorflow as tf
import numpy as np
notes = []
file = "C:\\Users\\User\\Desktop\\titanic_guitar.mid"
# Получаем все ноты и аккорды из файла
midi = converter.parse(file)
parts = instrument.partitionByInstrument(midi)
if parts:
notes_to_parse = parts.parts[0].recurse()
else:
notes_to_parse = midi.flat.notes
for element in notes_to_parse:
if isinstance(element, note.Note):
# Добавляем "ноты, типа ля2-до3"
notes.append(str(element.pitch))
elif isinstance(element, chord.Chord):
# Добавляем аккорды
notes.append('.'.join(str(n) for n in element.pitches))
print(notes)
note_to_int = {note: number for number, note in enumerate(sorted(set(notes)))}
for i in range(len(notes)):
notes[i] = note_to_int.get(notes[i])
print(notes)
X = np.expand_dims(notes, axis = 0)
print(X)
y = np.array([1,1,1])
y = y.reshape(1,-1)
model = Sequential()
model.add(LSTM(4, return_sequences=False, input_shape=(None, X.shape[1])))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy']
)
r = model.fit(X,
y,
epochs=5
)
I am getting this error:
ValueError: Exception encountered when calling layer 'sequential' (type Sequential).
Input 0 of layer "lstm" is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: (None, 168)
What is the problem?
Do you have working options?

reshape your data by using
.resape(-1,1)

Data preparation for neural network in Python

I want to learn how to prepare data for training samples in python. I found a simple example of a neural network that predicts the stock price. At the moment I am not interested in the accuracy of training the network, but I am interested in how to take any data and prepare it for submission to the neural network.
As an example, I took these stocks over the past 5 years. As planned, the neural network accepts data for the last 50 days as input and predicts the course for the next 5 days. To do this, I read the .csv file, processed the data in such a way that after the transformation I got two dataframes, the first one is responsible for the input data, and the second for the output.
The problem is, no matter what I do, I keep getting errors and so I cannot complete the training. What am I doing wrong? The code is shown below:
import matplotlib.pylab as plt
import torch
import random
import numpy as np
import pandas as pd
import sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import normalize
import pandas_profiling as pprf
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense, BatchNormalization, LeakyReLU
from tensorflow.keras.layers import Activation, Input, MaxPooling1D, Dropout
from tensorflow.keras.layers import AveragePooling1D, Conv1D, Flatten
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.optimizers import Adam, RMSprop, SGD
from tensorflow.keras.utils import plot_model
from IPython.display import display, Image
random.seed(0)
np.random.seed(0)
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
data = pd.read_csv('F:\\YNDX_ME.csv')[::]
data = data.drop('Date',axis=1)
data = data.drop('Adj Close',axis=1)
data = data.drop(np.where(data['Volume'] == 0)[0])
data = data.reset_index(drop=True)
#profiler = pprf.ProfileReport(data)
#profiler.to_file(r'F:\profiling.html')
days_edu = 50
days_pred = 5
df_edu_list = []
for i in range(len(data.index)-days_edu-days_pred+1):
df_temp = []
for j in range(days_edu):
df_temp.extend(data.loc[i+j,:].tolist())
df_edu_list.append(df_temp)
df_edu_out_list = []
for i in range(len(data.index)-days_edu-days_pred+1):
df_temp = []
for j in range(5):
df_temp.extend(data.loc[i+j+days_edu,:].tolist())
df_edu_out_list.append(df_temp)
df_edu_train = pd.DataFrame(df_edu_list[:int(len(df_edu_list)*0.8)])
df_edu_val = pd.DataFrame(df_edu_list[int(len(df_edu_list)*0.8):])
df_edu_train_out = pd.DataFrame(df_edu_out_list[:int(len(df_edu_out_list)*0.8)])
df_edu_val_out = pd.DataFrame(df_edu_out_list[int(len(df_edu_out_list)*0.8):])
df_edu_train = normalize(df_edu_train.values)
df_edu_val = normalize(df_edu_val.values)
df_edu_train_out = normalize(df_edu_train_out.values)
df_edu_val_out = normalize(df_edu_val_out.values)
df_edu_train = np.expand_dims(df_edu_train,axis=0)
df_edu_train_out = np.expand_dims(df_edu_train_out,axis=0)
model = Sequential()
model.add(Conv1D(filters=32, kernel_size=5, padding="same", strides=1, input_shape= (959,250),data_format='channels_first'))
model.add(Conv1D(32, 5))
model.add(Dropout(0.3))
model.add(Conv1D(16, 5))
model.add(Dropout(0.3))
model.add(Flatten())
model.add(Dense(250, activation='relu'))
model.add(Dense(25, activation=None))
optimizer = Adam(learning_rate=0.0001, beta_1=0.9, beta_2=0.999, amsgrad=False)
model.compile(optimizer=optimizer, loss='mae', metrics=['accuracy'])
EPOCHS = 1000
model.fit(df_edu_train, df_edu_train_out, epochs=EPOCHS)
Error:
InvalidArgumentError: Conv2DCustomBackpropFilterOp only supports NHWC.
[[node gradient_tape/sequential/conv1d/Conv1D/Conv2DBackpropFilter
(defined at C:\Users\nick0\anaconda3\lib\site-packages\keras\optimizer_v2\optimizer_v2.py:464)
]] [Op:__inference_train_function_1046]
Errors may have originated from an input operation.
Input Source operations connected to node gradient_tape/sequential/conv1d/Conv1D/Conv2DBackpropFilter:
In[0] sequential/conv1d/Conv1D/ExpandDims (defined at C:\Users\nick0\anaconda3\lib\site-packages\keras\layers\convolutional.py:231)
In[1] gradient_tape/sequential/conv1d/Conv1D/ShapeN:
In[2] gradient_tape/sequential/conv1d/Conv1D/Reshape:
Update:
Changed data_format = 'channels_first' to data_format = 'channels_last'. The training began, but as I understood, the training took place on the entire training set, i.e. the neural network just thought that there was one example and it was trained on it specifically. How to make the neural network take each line in turn? is each line essentially a separate example?

The accuracy problem of hand sign gestures recognition with using CNN in Python

Im working on my senior project in my university and I have only 2 days to fix this problem.I created a hand gesture recognition with using CNN in Python.I used 78000 images with 50x50px values.But I got stuck in the last part of my model.I can not improve my accuracy.When I start to train the data with 100 epochs,the first 15 epochs show 0,039 accuracy and it is horrible,because of that I'm not waiting the end of the train.Maybe it happens because of the values of conv2d or pooling because I don't know how to put the correct values into conv2d,pooling etc.
I'm new and I could not fix the problem.If you help me,I will be grateful for you
The code I wrote is given below;
from keras.models import Sequential
from keras.layers import Convolution2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense
from keras.layers import Dropout
from keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf
import pickle
import cv2
import os
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from PIL import Image
from numpy import asarray
DATADIR = "asl_alphabet_train"
CATEGORIES = ["A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z"]
X_train = []
y_train = []
X_test=[]
y_test=[]
IMG_SIZE=50
def create_training_data():
for category in CATEGORIES:
path = os.path.join(DATADIR,category) # create path to dogs and cats
class_num = CATEGORIES.index(category) # get the classification (0 or a 1).
for img in tqdm(os.listdir(path)): # iterate over each image per dogs and cats
try:
img_array = cv2.imread(os.path.join(path,img)) # convert to array
#new_array = cv2.resize(img_array, (28, 50 )) # resize to normalize data size
X_train.append(img_array) # add this to our trainingdata
# add this to our X_train
y_train.append(class_num) # add this to our X_train
except Exception as e: # in the interest in keeping the output clean...
pass
create_training_data()
X_train = asarray(X_train)
y_train = asarray(y_train)
"""
nsamples, nx, ny = X_train.shape
X_train = X_train.reshape((nsamples,nx*ny))
"""
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2,random_state=0)
N = y_train.size
M = y_train.max()+1
resultArray = np.zeros((N,M),int)
idx = (np.arange(N)*M) + y_train
resultArray.ravel()[idx] = 1
y_train=resultArray
classifier=Sequential()
#convolution step
classifier.add(Convolution2D(filters=96, input_shape=(50,50,3), kernel_size=(11,11), padding='valid',activation="relu"))
#pooling step
classifier.add(MaxPooling2D(pool_size=(2,2)))
#convolution step
classifier.add(Convolution2D(filters=256,kernel_size=(11,11),padding="valid",activation="relu"))
#pooling step
classifier.add(MaxPooling2D(pool_size=(2,2)))
classifier.add(Convolution2D(filters=384,kernel_size=(3,3),padding="valid",activation="relu"))
classifier.add(MaxPooling2D(pool_size=(2,2)))
#flatten step
classifier.add(Flatten())
#Dense(Fully connected step)
classifier.add(Dense(output_dim=128,activation="relu"))
#Dropout to decrease the possibility of overfitting
classifier.add(Dropout(0.5))
#Dense to determine the output
classifier.add(Dense(output_dim=26,activation="softmax"))
#compile step
classifier.compile(optimizer="adam",loss="categorical_crossentropy",metrics=["accuracy"])
enter code here
classifier.fit(X_train,y_train,epochs=100,batch_size=32)
filename="CNN_TEST.sav"
pickle.dump(classifier, open(filename, 'wb'))
y_pred=classifier.predict(X_test)
print(y_pred)

Would recommend the following :
1) Reduce the kernel size in the first two convolutional layers of your model.
2) I believe the MaxPooling layer is not necessary after every convolutional layer. Do verify this.
3) A DropOut of 0.5 could drop out a large number of essential neurons, you might want to lower that.
4) Vary the number of epochs and see how your model performs each time.
Plot "train accuracy vs val accuracy" and "train loss vs val loss" at each attempt and see if your model overfits or underfits.

skopt's gp_minimize() function raises ValueError: array must not contain infs or NaNs

I am currently using the skopt (scikit-optimize) package for hyperparameter tuning of a neural network (I am trying to minimize -1* accuracy). It seems to run fine (and successfully prints to the console) for several iterations before it raises Value Error: array must not contain infs or NaNs.
What are some possible causes of this? My data does not contain infs or NaNs and neither do my search parameter ranges. The neural network code is quite long, so for brevity, I will paste the relevant sections:
Imports:
import pandas as pd
import numpy as np
from skopt import gp_minimize
from skopt.utils import use_named_args
from skopt.space import Real, Categorical, Integer
from tensorflow.python.framework import ops
from sklearn.model_selection import train_test_split
import tensorflow
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, Dropout, MaxPooling1D, Flatten
from keras import backend as K
Creation of search parameters:
dim_num_filters_L1 = Integer(low=1, high=50, name='num_filters_L1')
#dim_kernel_size_L1 = Integer(low=1, high=70, name='kernel_size_L1')
dim_activation_L1 = Categorical(categories=['relu', 'linear', 'softmax'], name='activation_L1')
dim_num_filters_L2 = Integer(low=1, high=50, name='num_filters_L2')
#dim_kernel_size_L2 = Integer(low=1, high=70, name='kernel_size_L2')
dim_activation_L2 = Categorical(categories=['relu', 'linear', 'softmax'], name='activation_L2')
dim_num_dense_nodes = Integer(low=1, high=28, name='num_dense_nodes')
dim_activation_L3 = Categorical(categories=['relu', 'linear', 'softmax'], name='activation_L3')
dim_dropout_rate = Real(low = 0, high = 0.5, name = 'dropout_rate')
dim_learning_rate = Real(low=1e-4, high=1e-2, name='learning_rate')
dimensions = [dim_num_filters_L1,
#dim_kernel_size_L1,
dim_activation_L1,
dim_num_filters_L2,
#dim_kernel_size_L2,
dim_activation_L2,
dim_num_dense_nodes,
dim_activation_L3,
dim_dropout_rate,
dim_learning_rate,
]
Function that creates all models that will be tested:
def create_model(num_filters_L1, #kernel_size_L1,
activation_L1,
num_filters_L2, #kernel_size_L2,
activation_L2,
num_dense_nodes, activation_L3,
dropout_rate,
learning_rate):
input_shape = (X_train.shape[1], 1)
model = Sequential()
model.add(Conv1D(num_filters_L1, kernel_size = 40, activation = activation_L1, input_shape = input_shape))
model.add(MaxPooling1D(pool_size=2))
model.add(Conv1D(num_filters_L2, kernel_size=20, activation=activation_L2))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(num_dense_nodes, activation = activation_L3))
model.add(Dropout(dropout_rate))
model.add(Dense(y_train.shape[1], activation='linear'))
adam = tensorflow.keras.optimizers.Adam(learning_rate = learning_rate)
model.compile(optimizer=adam, loss='mean_squared_error', metrics=['accuracy'])
return model
Define fitness function:
#use_named_args(dimensions=dimensions)
def fitness(num_filters_L1, #kernel_size_L1,
activation_L1,
num_filters_L2, #kernel_size_L2,
activation_L2,
num_dense_nodes, activation_L3,
dropout_rate,
learning_rate):
model = create_model(num_filters_L1, #kernel_size_L1,
activation_L1,
num_filters_L2, #kernel_size_L2,
activation_L2,
num_dense_nodes, activation_L3,
dropout_rate,
learning_rate)
history_opt = model.fit(x=X_train,
y=y_train,
validation_data=(X_val,y_val),
shuffle=True,
verbose=2,
epochs=10
)
#return the validation accuracy for the last epoch.
accuracy_opt = model.evaluate(X_test,y_test)[1]
# Print the classification accuracy:
print("Experimental Model Accuracy: {0:.2%}".format(accuracy_opt))
# Delete the Keras model with these hyper-parameters from memory:
del model
# Clear the Keras session, otherwise it will keep adding new models to the same TensorFlow graph each time we create model with a different set of hyper-parameters.
K.clear_session()
ops.reset_default_graph()
# the optimizer aims for the lowest score, so return negative accuracy:
return -accuracy # or sum(RMSE)?
Run hyperparameter search:
gp_result = gp_minimize(func=fitness,
dimensions=dimensions)
print("best accuracy was " + str(round(gp_result.fun *-100,2))+"%.")

Your activation function is not converging in a random acquisition function call. I encountered this problem and removed 'relu' function from search space.

LSTM Keras confusion

#enumaris thank you for your answer. I'll try to explain my approach a bit:
I pushed the video frames through resnet model and got fature shapes of (k, 2048). I have the data into train/validation and test folders. Then I was writing this script:
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Activation, Dropout, Dense
import tensorflow as tf
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import argparse
import random
import cv2
import os
dataTrain = []
labelsTrain = []
# Prepare the Training Data. The .txt files contain name of the name of the
#file and the label which is 0,1,or 2 based on which class the video belongs
#to (nameVideo.npy 0)
with open('D:...\Data\/train_files.txt') as f:
trainingList = f.readlines()
for line in trainingList:
npyFiles = line.split( )
loadTrainingData = np.load(npyFiles[0])
dataTrain.append(loadTrainingData)
labelsTrain.append(npyFiles[1])
dataNp = np.array(dataTrain, dtype=object)
labelsNp = np.array(labelsTrain, dtype=object)
f.close()
dataVal = []
labelsVal = []
# Prepare the Validation Data
with open('D:\...\Data\/val_files.txt') as f:
valList = f.readlines()
for line in valList:
npyValFiles = line.split( )
loadValData = np.load(npyValFiles[0])
dataVal.append(loadValData)
labelsVal.append(npyValFiles[1])
f.close()
print(len(dataVal))
model = Sequential()
model.add(LSTM(32,
batch_input_shape=(None, None, 1),
return_sequences=True))
model.add(LSTM(32, return_sequences=True))
model.add(LSTM(32))
model.add(Dense(10, activation='softmax'))
model.compile(loss='mean_absolute_error',
optimizer='adam',
metrics=['accuracy'])
model.summary()
history = model.fit(dataTrain, labelsTrain,
epochs=10,
validation_data=(dataVal, labelsVal))
Which results in the following error:
ValueError: Error when checking model input: the list of Numpy arrays that you are passing to your model is not the size the model expected. Expected to see 1 array(s), but instead got the following list of 3521 arrays.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

OneHotEncoding for LSTM Categorical Sequence in TensorFlow - python

Related

ValueError: Exception encountered when calling layer 'sequential' (type Sequential)

Data preparation for neural network in Python

The accuracy problem of hand sign gestures recognition with using CNN in Python

skopt's gp_minimize() function raises ValueError: array must not contain infs or NaNs

LSTM Keras confusion

Categories

Resources