LSTM Keras confusion - python

#enumaris thank you for your answer. I'll try to explain my approach a bit:
I pushed the video frames through resnet model and got fature shapes of (k, 2048). I have the data into train/validation and test folders. Then I was writing this script:
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Activation, Dropout, Dense
import tensorflow as tf
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import argparse
import random
import cv2
import os
dataTrain = []
labelsTrain = []
# Prepare the Training Data. The .txt files contain name of the name of the
#file and the label which is 0,1,or 2 based on which class the video belongs
#to (nameVideo.npy 0)
with open('D:...\Data\/train_files.txt') as f:
trainingList = f.readlines()
for line in trainingList:
npyFiles = line.split( )
loadTrainingData = np.load(npyFiles[0])
dataTrain.append(loadTrainingData)
labelsTrain.append(npyFiles[1])
dataNp = np.array(dataTrain, dtype=object)
labelsNp = np.array(labelsTrain, dtype=object)
f.close()
dataVal = []
labelsVal = []
# Prepare the Validation Data
with open('D:\...\Data\/val_files.txt') as f:
valList = f.readlines()
for line in valList:
npyValFiles = line.split( )
loadValData = np.load(npyValFiles[0])
dataVal.append(loadValData)
labelsVal.append(npyValFiles[1])
f.close()
print(len(dataVal))
model = Sequential()
model.add(LSTM(32,
batch_input_shape=(None, None, 1),
return_sequences=True))
model.add(LSTM(32, return_sequences=True))
model.add(LSTM(32))
model.add(Dense(10, activation='softmax'))
model.compile(loss='mean_absolute_error',
optimizer='adam',
metrics=['accuracy'])
model.summary()
history = model.fit(dataTrain, labelsTrain,
epochs=10,
validation_data=(dataVal, labelsVal))
Which results in the following error:
ValueError: Error when checking model input: the list of Numpy arrays that you are passing to your model is not the size the model expected. Expected to see 1 array(s), but instead got the following list of 3521 arrays.

Related

Value Error problem for machine learning model

I got a ValueError when using TensorFlow to create a model. I have tried debugging and checked the shapes from my model. I don't understand how or what this error means but I've narrowed the problem down to the Conv2D layer as causing the error. I also tried changing hyperparamters (i.e., batch size, microbatches, etc.).
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
!pip install tensorflow-privacy
import numpy as np
import tensorflow as tf
from tensorflow_privacy import *
import tensorflow_privacy
from matplotlib import pyplot as plt
import pylab as pl
import numpy as np
import pandas as pd
from tensorflow.keras.models import Model
from tensorflow.keras import datasets, layers, models, losses
from tensorflow.keras import backend as bke
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l1, l2, l1_l2 #meaning of norm
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
batch_size = 8
epochs = 4
microbatches = 8
inChannel = 1
kr = 0#1e-5
num_kernels=8
drop_perc=0.25
dim = 1
l2_norm_clip = 1.5
noise_multiplier = 1.3
learning_rate = 0.25
latent_dim = 0
def print_datashape():
print('genotype data: ', genotype_data.shape)
print('phenotype data: ', single_pheno.shape)
genotype_data = tf.random.uniform([4276, 28220],0,255,)
phenotype_data = tf.random.uniform([4276, 20],0,255,)
genotype_data = genotype_data.numpy()
phenotype_data = phenotype_data.numpy()
small_geno = genotype_data
single_pheno = phenotype_data[:, 1]
print_datashape()
df = small_geno
min_max_scaler = preprocessing.MinMaxScaler()
df = min_max_scaler.fit_transform(df)
scaled_pheno = min_max_scaler.fit_transform(single_pheno.reshape(-1,1)).reshape(-1)
feature_size= df.shape[1]
df = df.reshape(-1, feature_size, 1, 1)
print("df: ", df.shape)
print("scaled: ", scaled_pheno.shape)
# split train to train and valid
train_data,test_data,train_Y,test_Y = train_test_split(df, scaled_pheno, test_size=0.2, random_state=13)
train_X,valid_X,train_Y,valid_Y = train_test_split(train_data, train_Y, test_size=0.2, random_state=13)
def print_shapes():
print('train_X: {}'.format(train_X.shape))
print('train_Y: {}'.format(train_Y.shape))
print('valid_X: {}'.format(valid_X.shape))
print('valid_Y: {}'.format(valid_Y.shape))
input_shape= (feature_size, dim, inChannel)
predictor = tf.keras.Sequential()
predictor.add(layers.Conv2D(num_kernels, (5,1), padding='same', strides=(12, 1), activation='relu',input_shape= input_shape))
predictor.add(layers.AveragePooling2D(pool_size=(2,1)))
predictor.add(layers.Dropout(drop_perc))
predictor.add(layers.Flatten())
predictor.add(layers.Dense(int(feature_size / 4), activation='relu'))
predictor.add(layers.Dropout(drop_perc))
predictor.add(layers.Dense(int(feature_size / 10), activation='relu'))
predictor.add(layers.Dropout(drop_perc))
predictor.add(layers.Dense(1))
optimizer = DPKerasAdamOptimizer(learning_rate=learning_rate, l2_norm_clip=l2_norm_clip, noise_multiplier=noise_multiplier, num_microbatches=microbatches)
# compile
predictor.compile(loss='mse', optimizer=optimizer, metrics=['mse'])
#summary
predictor.summary()
print_shapes()
predictor.fit(train_X, train_Y,batch_size=batch_size,epochs=epochs,verbose=1, validation_data=(valid_X, valid_Y))
ValueError: Dimension size must be evenly divisible by 8 but is 1 for '{{node Reshape}} = Reshape[T=DT_FLOAT, Tshape=DT_INT32](mean_squared_error/weighted_loss/value, Reshape/shape)' with input shapes: [], [2] and with input tensors computed as partial shapes: input[1] = [8,?].

ValueError: Exception encountered when calling layer 'sequential' (type Sequential)

I am writing an AI that will predict its own one by one - for example, an array [22,1,456,2] is given, and it will predict [33,1,455,3].
But when I run this code:
from music21 import converter, instrument, note, chord
import tensorflow as tf
from scipy.interpolate import UnivariateSpline
from tensorflow import keras
import numpy as np
from sklearn.model_selection import train_test_split
from keras_preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, LSTM
import tensorflow as tf
import numpy as np
notes = []
file = "C:\\Users\\User\\Desktop\\titanic_guitar.mid"
# Получаем все ноты и аккорды из файла
midi = converter.parse(file)
parts = instrument.partitionByInstrument(midi)
if parts:
notes_to_parse = parts.parts[0].recurse()
else:
notes_to_parse = midi.flat.notes
for element in notes_to_parse:
if isinstance(element, note.Note):
# Добавляем "ноты, типа ля2-до3"
notes.append(str(element.pitch))
elif isinstance(element, chord.Chord):
# Добавляем аккорды
notes.append('.'.join(str(n) for n in element.pitches))
print(notes)
note_to_int = {note: number for number, note in enumerate(sorted(set(notes)))}
for i in range(len(notes)):
notes[i] = note_to_int.get(notes[i])
print(notes)
X = np.expand_dims(notes, axis = 0)
print(X)
y = np.array([1,1,1])
y = y.reshape(1,-1)
model = Sequential()
model.add(LSTM(4, return_sequences=False, input_shape=(None, X.shape[1])))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy']
)
r = model.fit(X,
y,
epochs=5
)
I am getting this error:
ValueError: Exception encountered when calling layer 'sequential' (type Sequential).
Input 0 of layer "lstm" is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: (None, 168)
What is the problem?
Do you have working options?
reshape your data by using
.resape(-1,1)

Data preparation for neural network in Python

I want to learn how to prepare data for training samples in python. I found a simple example of a neural network that predicts the stock price. At the moment I am not interested in the accuracy of training the network, but I am interested in how to take any data and prepare it for submission to the neural network.
As an example, I took these stocks over the past 5 years. As planned, the neural network accepts data for the last 50 days as input and predicts the course for the next 5 days. To do this, I read the .csv file, processed the data in such a way that after the transformation I got two dataframes, the first one is responsible for the input data, and the second for the output.
The problem is, no matter what I do, I keep getting errors and so I cannot complete the training. What am I doing wrong? The code is shown below:
import matplotlib.pylab as plt
import torch
import random
import numpy as np
import pandas as pd
import sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import normalize
import pandas_profiling as pprf
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense, BatchNormalization, LeakyReLU
from tensorflow.keras.layers import Activation, Input, MaxPooling1D, Dropout
from tensorflow.keras.layers import AveragePooling1D, Conv1D, Flatten
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.optimizers import Adam, RMSprop, SGD
from tensorflow.keras.utils import plot_model
from IPython.display import display, Image
random.seed(0)
np.random.seed(0)
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
data = pd.read_csv('F:\\YNDX_ME.csv')[::]
data = data.drop('Date',axis=1)
data = data.drop('Adj Close',axis=1)
data = data.drop(np.where(data['Volume'] == 0)[0])
data = data.reset_index(drop=True)
#profiler = pprf.ProfileReport(data)
#profiler.to_file(r'F:\profiling.html')
days_edu = 50
days_pred = 5
df_edu_list = []
for i in range(len(data.index)-days_edu-days_pred+1):
df_temp = []
for j in range(days_edu):
df_temp.extend(data.loc[i+j,:].tolist())
df_edu_list.append(df_temp)
df_edu_out_list = []
for i in range(len(data.index)-days_edu-days_pred+1):
df_temp = []
for j in range(5):
df_temp.extend(data.loc[i+j+days_edu,:].tolist())
df_edu_out_list.append(df_temp)
df_edu_train = pd.DataFrame(df_edu_list[:int(len(df_edu_list)*0.8)])
df_edu_val = pd.DataFrame(df_edu_list[int(len(df_edu_list)*0.8):])
df_edu_train_out = pd.DataFrame(df_edu_out_list[:int(len(df_edu_out_list)*0.8)])
df_edu_val_out = pd.DataFrame(df_edu_out_list[int(len(df_edu_out_list)*0.8):])
df_edu_train = normalize(df_edu_train.values)
df_edu_val = normalize(df_edu_val.values)
df_edu_train_out = normalize(df_edu_train_out.values)
df_edu_val_out = normalize(df_edu_val_out.values)
df_edu_train = np.expand_dims(df_edu_train,axis=0)
df_edu_train_out = np.expand_dims(df_edu_train_out,axis=0)
model = Sequential()
model.add(Conv1D(filters=32, kernel_size=5, padding="same", strides=1, input_shape= (959,250),data_format='channels_first'))
model.add(Conv1D(32, 5))
model.add(Dropout(0.3))
model.add(Conv1D(16, 5))
model.add(Dropout(0.3))
model.add(Flatten())
model.add(Dense(250, activation='relu'))
model.add(Dense(25, activation=None))
optimizer = Adam(learning_rate=0.0001, beta_1=0.9, beta_2=0.999, amsgrad=False)
model.compile(optimizer=optimizer, loss='mae', metrics=['accuracy'])
EPOCHS = 1000
model.fit(df_edu_train, df_edu_train_out, epochs=EPOCHS)
Error:
InvalidArgumentError: Conv2DCustomBackpropFilterOp only supports NHWC.
[[node gradient_tape/sequential/conv1d/Conv1D/Conv2DBackpropFilter
(defined at C:\Users\nick0\anaconda3\lib\site-packages\keras\optimizer_v2\optimizer_v2.py:464)
]] [Op:__inference_train_function_1046]
Errors may have originated from an input operation.
Input Source operations connected to node gradient_tape/sequential/conv1d/Conv1D/Conv2DBackpropFilter:
In[0] sequential/conv1d/Conv1D/ExpandDims (defined at C:\Users\nick0\anaconda3\lib\site-packages\keras\layers\convolutional.py:231)
In[1] gradient_tape/sequential/conv1d/Conv1D/ShapeN:
In[2] gradient_tape/sequential/conv1d/Conv1D/Reshape:
Update:
Changed data_format = 'channels_first' to data_format = 'channels_last'. The training began, but as I understood, the training took place on the entire training set, i.e. the neural network just thought that there was one example and it was trained on it specifically. How to make the neural network take each line in turn? is each line essentially a separate example?

Using generator in Python to feed into Keras model.fit_generator

I am learning how to use generator in Python and feed it into Keras model.fit_generator.
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten
import pandas as pd
import os
import cv2
class Generator:
def __init__(self,path):
self.path = path
def gen(self, feat, labels):
i=0
while (True):
im = cv2.imread(feat[i],0)
im = im.reshape(28,28,1)
yield im,labels[i]
i+=1
if __name__ == "__main__":
input_dir = './mnist'
output_file = 'dataset.csv'
filename = []
label = []
for root,dirs,files in os.walk(input_dir):
for file in files:
full_path = os.path.join(root,file)
filename.append(full_path)
label.append(os.path.basename(os.path.dirname(full_path)))
data = pd.DataFrame(data={'filename': filename, 'label':label})
data.to_csv(output_file,index=False)
feat = data.iloc[:,0]
labels = pd.get_dummies(data.iloc[:,1]).as_matrix()
image_gen = Generator(input_dir)
# #create model
model = Sequential()
model.add(Conv2D(64, kernel_size=3, activation="relu", input_shape=(28,28,1)))
model.add(Conv2D(32, kernel_size=3, activation="relu"))
model.add(Flatten())
model.add(Dense(2, activation="softmax"))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit_generator(image_gen.gen(filename,labels), steps_per_epoch=5 ,epochs=5, verbose=1)
I have 2 subfolders inside ./mnist folder, corresponding to each class in my dataset.
I created a Dataframe that contains the path of each image and the label (which is the name of the corresponding subfolder).
I created Generator class that loads the image whose path is written in the DataFrame.
It gave me error:
ValueError: Error when checking input: expected conv2d_1_input to have 4 dimensions, but got array with shape (28, 28, 1)
Could anyone please help?
And also, is it the correct way to implement generator in general?
Thanks!
I think answers to your questions can be found in Keras documentation.
In terms of the input shape, Conv2D layers expects 4-dimensional input, but you explicitly reshape to (28,28,1) in your generator, so 3 dimensions. On the Conv2D info and the input format, see this documentation.
In terms of the generator itself, Keras documentation provides an example with generator being a function, the same is discussed in Python Wiki. But your particular implementation seem to work, at least for the first iteration, if you get to the point of feeding the data into the convolution layer.

word embedding example in keras predicts different results on each run

I am following the pretrained_word_embeddings and is saving the model using the following piece of code
print('Saving model to disk ...')
model.save('/home/data/pretrained-model.h5'')
I am then loading the pretrained model using
pretrained_model = load_model('/home/data/pretrained-model.h5')
Later the following piece of code for predicting on a different text altogether
predict_texts = [] # list of text samples
for predict_name in sorted(os.listdir(PREDICT_TEXT_DATA_DIR)):
predict_path = os.path.join(PREDICT_TEXT_DATA_DIR, predict_name)
if os.path.isdir(predict_path):
for predict_fname in sorted(os.listdir(predict_path)):
if predict_fname.isdigit():
predict_fpath = os.path.join(predict_path, predict_fname)
if sys.version_info < (3,):
f = open(predict_fpath)
else:
f = open(predict_fpath, encoding='latin-1')
predict_text = f.read()
i = predict_text.find('\n\n') # skip header
if 0 < i:
predict_text = predict_text[i:]
predict_texts.append(predict_text)
f.close()
print('Found %s texts.' % len(predict_texts))
tokenizer.fit_on_texts(predict_texts)
predict_sequences = tokenizer.texts_to_sequences(predict_texts)
predict_data = pad_sequences(predict_sequences, maxlen=MAX_SEQUENCE_LENGTH)
print('Shape of predict data tensor:', predict_data.shape)
x_predict = predict_data
y_predict = pretrained_model.predict(x_predict)
max_val = np.argmax(y_predict)
print('Category it belongs to : ',max_val)
The problem that I am facing now is that each time I run this above piece of code, max_val is always a different value.
How do I make predictions consistent please ?
I think you should predict one by one, not merge all texts for all files.
The following code I tested is OK:
from __future__ import print_function
import os
import sys
import numpy as np
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from keras.layers import Dense, Input, GlobalMaxPooling1D
from keras.layers import Conv1D, MaxPooling1D, Embedding
from keras.models import Model
from keras.models import load_model
from keras.preprocessing.text import text_to_word_sequence
MAX_SEQUENCE_LENGTH = 1000
MAX_NB_WORDS = 20000
EMBEDDING_DIM = 100
model = load_model('embedding.h5')
PREDICT_TEXT_DATA_DIR = 'predict_data'
predict_path = os.path.join(PREDICT_TEXT_DATA_DIR, '1.txt')
f = open(predict_path, encoding='utf-8')
predict_text = f.read()
f.close()
texts=[predict_text]
# finally, vectorize the text samples into a 2D integer tensor
tokenizer = Tokenizer(num_words=MAX_NB_WORDS)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
x_predict = pad_sequences(sequences, maxlen=MAX_SEQUENCE_LENGTH)
print('Shape of predict data tensor:', x_predict.shape)
y_predict = model.predict(x_predict)
max_val = np.argmax(y_predict)
print('Category it belongs to : ',max_val)

Categories

Resources