I made the simple RNN model to learn and fit the one wave file.
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,Dropout,Dense
from tensorflow.keras.layers import SimpleRNN
import librosa
import librosa.display
import numpy as np
a, sr = librosa.load("A.wav",sr=22050)
rawdata = librosa.stft(a, n_fft=512,hop_length= 512 // 4, window='hann') #make fourier transfered A.wav data.
rawdata = rawdata.transpose() # [Frame,Freq] => [Freq,Frame]
input_len = 10 # the frame number for learning to make next one
input=[]
target=[]
for i in range(0, len(rawdata) - input_len):
input.append( rawdata[i:i+input_len] ) # frames
target.append( rawdata[i+input_len] ) # one step forward frame for answer.
X = np.array(input)
Y = np.array(target)
#Separate 8:2 for training and test
x, val_x, y, val_y = train_test_split(X, Y, test_size=int(X.shape[0] * 0.2), shuffle=False)
n_hidden = 512
epoch = 100
model = Sequential()
model.add(SimpleRNN(n_hidden, input_shape=(input_len, n_in), return_sequences=False))
model.add(Dense(n_hidden, activation="linear"))
model.add(Dense(n_in, activation="linear"))
opt = Adam(lr=0.001)
model.compile(loss='mse', optimizer=opt)
model.summary()
history = model.fit(x, y, epochs=epoch, batch_size=10,validation_data=(val_x, val_y))
OK it works fine.
It learns the one wave file A.wav
However how can I learn multiple wave files??
B.wav C.wav
For example,
If I use model.fit() multiple times for each wav, does this model remember the past learning??
yes, model does remember previous train during fit, you can use fit multiple times as well. but its better to use model.train_on_batch this is simple version of fit to be used on small batch of data.
you can also modify your code to add other wav file feature to data.
# second way
input_len = 10 # the frame number for learning to make next one
input=[]
target=[]
for f in ['A.wav','B.wav','C.wav']:
a, sr = librosa.load(f,sr=22050)
rawdata = librosa.stft(a, n_fft=512,hop_length= 512 // 4, window='hann') #make fourier transfered A.wav data.
rawdata = rawdata.transpose() # [Frame,Freq] => [Freq,Frame]
for i in range(0, len(rawdata) - input_len):
input.append( rawdata[i:i+input_len] ) # frames
target.append( rawdata[i+input_len] ) # one step forward frame for answer.
Related
I'm trying to create a neural network for a classification problem about audios of me talking and audios of other people talking, so it classify it. But when I train it, it give me this weird result of accuracy and loss.
Here is my code.
'''
This is only to read the data and pass it into an array
1. Get the Audio data, my voice so we can visualize it into an array.
2. Build an ANN with the data already into an array. classification problem
3. Real time predictor using pyaudio and trained model
'''
from sklearn.model_selection import train_test_split
from tensorflow.python.keras.layers.core import Dropout
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
import numpy as np
from scipy.io import wavfile
from pathlib import Path
import os
# cut audio to make the same sizes, shape and length
def trim_wav( originalWavPath, newWavPath , start, new ):
'''
:param originalWavPath: the path to the source wav file
:param newWavPath: output wav file * can be same path as original
:param start: time in seconds
:param end: time in seconds
:return:
'''
sampleRate, waveData = wavfile.read( originalWavPath )
startSample = int( start * sampleRate )
endSample = int( new * sampleRate )
wavfile.write( newWavPath, sampleRate, waveData[startSample:endSample])
### DATASET
pathlist = Path(os.path.abspath('Voiceclassification/Data/me/')).rglob('*.wav')
# My voice data
for path in pathlist:
wp = str(path)
# Trim function here for each file
trim_wav(wp, wp.replace(".wav", ".wav"), 0,5)
filename = str(path)
# convert audio to numpy array and then 2D to 1D np Array
samplerate, data = wavfile.read(filename)
#print(f"sample rate: {samplerate}")
#print(f"data: {data}")
pathlist2 = Path(os.path.abspath('Voiceclassification/Data/other/')).rglob('*.wav')
# other voice data
for path2 in pathlist2:
wp2 = str(path2)
trim_wav(wp2, wp2.replace(".wav", ".wav"), 0,5)
filename2 = str(path2)
samplerate2, data2 = wavfile.read(filename2)
#print(data2)
### ADAPTING THE DATA FOR THE MODEL
X = data.reshape(-1, 1) # My voice
y = data2.reshape(-1, 1) # Other data
#print(X_.shape)
#print(y_.shape)
### Trainig the model
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)
# Performing future scaling
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)
### Creating the ANN
ann = tf.keras.models.Sequential()
# First hidden layer of the ann
ann.add(tf.keras.layers.Dense(units=6, activation="relu"))
ann.add(Dropout(0.05))
# Second one
ann.add(tf.keras.layers.Dense(units=6, activation="relu"))
ann.add(Dropout(0.05))
# Output layer
ann.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))
# Compile our neural network
ann.compile(optimizer="adam",
loss="binary_crossentropy",
metrics=['accuracy'])
# Fit ANN
ann.fit(x_train, y_train, batch_size=1024, epochs=100) ############ batch 32
ann.save('Models/voiceclassification.model')
does anyone know if there is anything wrong with my code that makes the acc very low?
#MarkLavin, your answer was correct, I'm the same person that ask it, this is my other account the real one, I try what #MarkLavin told me to do and it work, I modify my code with this.
### DATASET
data = []
labels = []
audio_files = [f for f in glob.glob(os.path.abspath(r"Voiceclassification\Data")+"/**/*", recursive=True) if not os.path.isdir(f)]
random.shuffle(audio_files)
# My voice data
for path in audio_files:
wp = str(path)
# Trim function here for each file
trim_wav(wp, wp.replace(".wav", ".wav"), 0,5)
filename = str(path)
# convert audio to numpy array and then 2D to 1D np Array
samplerate, data_array = wavfile.read(filename)
#print(f"data: {data}")
data_array.reshape(-1, 1)
data.append(data_array)
label = path.split(os.path.sep)[-2]
if label == "me":
label = 1
else:
label = 0
labels.append([label])
### ADAPTING THE DATA FOR THE MODEL
X = data # all voices data
y = np.array(labels) # data label 1 es me, 0 is other
Labels for y and all data with random for X, this are the results are 100% and 90% accuracy, thank you so much #MarkLavin :)
I know there are several questions about this here, but I haven't found one which fits exactly my problem.
I'm trying to fit an LSTM with data from Pandas DataFrames but getting confused about the format I have to provide them.
I created a small code snipped which shall show you what I try to do:
import pandas as pd, tensorflow as tf, random
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
targets = pd.DataFrame(index=pd.date_range(start='2019-01-01', periods=300, freq='D'))
targets['A'] = [random.random() for _ in range(len(targets))]
targets['B'] = [random.random() for _ in range(len(targets))]
features = pd.DataFrame(index=targets.index)
for i in range(len(features)) :
features[str(i)] = [random.random() for _ in range(len(features))]
model = Sequential()
model.add(LSTM(units=targets.shape[1], input_shape=features.shape))
model.compile(optimizer='adam', loss='mae')
model.fit(features, targets, batch_size=10, epochs=10)
this results to:
ValueError: Input 0 of layer sequential is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: [10, 300]
which I expect relates to the dimensions of the features DataFrame provided. I guess that once fixed this the next error would mention the targets DataFrame.
As far as I understand, 'units' parameter of my first layer defines the output dimensionality of this model. The inputs have to have a 3D shape, but I don't know how to create them out of the 2D world of the Data Frames.
I hope you can help me understanding the reshape mechanism in Python and how to use them in combination with Pandas DataFrames. (I'm quite new to Python and came from R)
Thankls in advance
Lets looks at the few popular ways in LSTMs are used.
Many to Many
Example: You have a sentence (composed of words in sequence). Give these sequence of words you would like to predict the Parts of speech (POS) of each word.
So you have n words and you feed each word per timestep to the LSTM. Each LSTM timestep (also called LSTM unwrapping) will produce and output. The word is represented by a a set of features normally word embeddings. So the input to LSTM is of size bath_size X time_steps X features
Keras code:
inputs = keras.Input(shape=(10,3))
lstm = keras.layers.LSTM(8, input_shape = (10, 3), return_sequences = True)(inputs)
outputs = keras.layers.TimeDistributed(keras.layers.Dense(5, activation='softmax'))(lstm)
model = keras.Model(inputs=inputs, outputs=outputs)
model.compile(loss='categorical_crossentropy', optimizer='adam')
X = np.random.randn(4,10,3)
y = np.random.randint(0,2, size=(4,10,5))
model.fit(X, y, epochs=2)
print (model.predict(X).shape)
Many to One
Example: Again you have a sentence (composed of words in sequence). Give these sequence of words you would like to predict sentiment of the sentence if it is positive or negative.
Keras code
inputs = keras.Input(shape=(10,3))
lstm = keras.layers.LSTM(8, input_shape = (10, 3), return_sequences = False)(inputs)
outputs =keras.layers.Dense(5, activation='softmax')(lstm)
model = keras.Model(inputs=inputs, outputs=outputs)
model.compile(loss='categorical_crossentropy', optimizer='adam')
X = np.random.randn(4,10,3)
y = np.random.randint(0,2, size=(4,5))
model.fit(X, y, epochs=2)
print (model.predict(X).shape)
Many to multi-headed
Example: You have a sentence (composed of words in sequence). Give these sequence of words you would like to predict sentiment of the sentence as well the author of the sentence.
This is multi-headed model where one head will predict the sentiment and another head will predict the author. Both the heads share the same LSTM backbone.
Keras code
inputs = keras.Input(shape=(10,3))
lstm = keras.layers.LSTM(8, input_shape = (10, 3), return_sequences = False)(inputs)
output_A = keras.layers.Dense(5, activation='softmax')(lstm)
output_B = keras.layers.Dense(5, activation='softmax')(lstm)
model = keras.Model(inputs=inputs, outputs=[output_A, output_B])
model.compile(loss='categorical_crossentropy', optimizer='adam')
X = np.random.randn(4,10,3)
y_A = np.random.randint(0,2, size=(4,5))
y_B = np.random.randint(0,2, size=(4,5))
model.fit(X, [y_A, y_B], epochs=2)
y_hat_A, y_hat_B = model.predict(X)
print (y_hat_A.shape, y_hat_B.shape)
What you are looking for is Many to Multi head model where your predictions for A will be made by one head and another head will make predictions for B
The input data for the LSTM has to be 3D.
If you print the shapes of your DataFrames you get:
targets : (300, 2)
features : (300, 300)
The input data has to be reshaped into (samples, time steps, features). This means that targets and features must have the same shape.
You need to set a number of time steps for your problem, in other words, how many samples will be used to make a prediction.
For example, if you have 300 days and 2 features the time step can be 3. So that three days will be used to make one prediction (you can choose this arbitrarily). Here is the code for reshaping your data (with a few more changes):
import pandas as pd
import numpy as np
import tensorflow as tf
import random
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
data = pd.DataFrame(index=pd.date_range(start='2019-01-01', periods=300, freq='D'))
data['A'] = [random.random() for _ in range(len(data))]
data['B'] = [random.random() for _ in range(len(data))]
# Choose the time_step size.
time_steps = 3
# Use numpy for the 3D array as it is easier to handle.
data = np.array(data)
def make_x_y(ts, data):
"""
Parameters
ts : int
data : numpy array
This function creates two arrays, x and y.
x is the input data and y is the target data.
"""
x, y = [], []
offset = 0
for i in data:
if offset < len(data)-ts:
x.append(data[offset:ts+offset])
y.append(data[ts+offset])
offset += 1
return np.array(x), np.array(y)
x, y = make_x_y(time_steps, data)
print(x.shape, y.shape)
nodes = 100 # This is the width of the network.
out_size = 2 # Number of outputs produced by the network. Same size as features.
model = Sequential()
model.add(LSTM(units=nodes, input_shape=(x.shape[1], x.shape[2])))
model.add(Dense(out_size)) # For the output a Dense (fully connected) layer is used.
model.compile(optimizer='adam', loss='mae')
model.fit(x, y, batch_size=10, epochs=10)
Well, just to finalize this issue I would like to provide one solution I have meanwhile worked on. The class TimeseriesGenerator in tf.keras.... enabled me quite easy to provide the data in the right shape to an LSTM model
from keras.preprocessing.sequence import TimeseriesGenerator
import numpy as np
window_size = 7
batch_size = 8
sampling_rate = 1
train_gen = TimeseriesGenerator(X_train.values, y_train.values,
length=window_size, sampling_rate=sampling_rate,
batch_size=batch_size)
valid_gen = TimeseriesGenerator(X_valid.values, y_valid.values,
length=window_size, sampling_rate=sampling_rate,
batch_size=batch_size)
test_gen = TimeseriesGenerator(X_test.values, y_test.values,
length=window_size, sampling_rate=sampling_rate,
batch_size=batch_size)
There are many other ways on implementing generators e.g. using the more_itertools which provides the function windowed, or making use of tensorflow.Dataset and its function window.
For me the TimeseriesGenerator was sufficient to feed the tests I did.
In case you would like to see an example modeling the DAX based on some stocks I'm sharing a notebook on Github.
Currently I can train a LSTM network using one csv file based on this tutorial: https://machinelearningmastery.com/how-to-develop-lstm-models-for-time-series-forecasting/
This code generate sliding windows where the last n_steps of the features are saved to predict the actual target (similar to this: Keras LSTM - feed sequence data with Tensorflow dataset API from the generator):
#%% Import
import pandas as pd
import tensorflow as tf
from tensorflow.python.keras.models import Sequential, model_from_json
from tensorflow.python.keras.layers import LSTM
from tensorflow.python.keras.layers import Dense
# for path
import pathlib
import os
#%% Define functions
# Function to split multivariate input data into samples according to the number of timesteps (n_steps) used for the prediction ("sliding window")
def split_sequences(sequences, n_steps):
X, y = list(), list()
for i in range(len(sequences)):
# find end of this pattern
end_ix = i + n_steps
# check if beyond maximum index of input data
if end_ix > len(sequences):
break
# gather input and output parts of the data in corresponding format (depending on n_steps)
seq_x, seq_y = sequences[i:end_ix, :-1], sequences[end_ix-1, -1]
X.append(seq_x)
y.append(seq_y)
#Append: Adds its argument as a single element to the end of a list. The length of the list increases by one.
return array(X), array(y)
# Set source files
csv_train_path = os.path.join(dir_of_file, 'SimulationData', 'SimulationTrainData', 'SimulationTrainData001.csv')
# Load data
df_train = pd.read_csv(csv_train_path, header=0, parse_dates=[0], index_col=0)
#%% Select features and target
features_targets_considered = ['Fz1', 'Fz2', 'Fz3', 'Fz4', 'Fz5', 'Fz_res']
n_features = len(features_targets_considered)-1 # substract the target
features_targets_train = df_train[features_targets_considered]
# "Convert" to array
train_values = features_targets_train.values
# Set number of previous timesteps, which are considered to predict
n_steps = 100
# Convert into input (400x5) and output (1) values
X, y = split_sequences(train_values, n_steps)
X_test, y_test = split_sequences(test_values, n_steps)
#%% Define model
model = Sequential()
model.add(LSTM(200, activation='relu', return_sequences=True, input_shape=(n_steps, n_features)))
model.add(LSTM(200, activation='relu', return_sequences=True))
model.add(LSTM(200, activation='relu'))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse', metrics=['mae'])
#%% Fit model
history = model.fit(X, y, epochs=200, verbose=1)
I now want to expand this example to efficiently train the network with different csv files. In the data folder I have the files 'SimulationTrainData001.csv', 'SimulationTrainData002.csv', ..., 'SimulationTrainData300.csv' (about 14 GB).
To achieve this, I tried to adopt the code of this input pipeline example: https://www.tensorflow.org/guide/data#consuming_sets_of_files, which works to a certain extend. I can show the training files in the folder with this change:
# Set source folders
csv_train_path = os.path.join(dir_of_file, 'SimulationData', 'SimulationTrainData')
csv_train_path = pathlib.Path(csv_train_path)
#%% Show five example files from training folder
list_ds = tf.data.Dataset.list_files(str(csv_train_path/'*'))
for f in list_ds.take(5):
print(f.numpy())
One problem is, that in the example the files are pictures of flowers and not time series values and I do not know at which point I can use the split_sequences(sequences, n_steps) function to create the sliding windows to provide the necessary data format to train the LSTM network.
Also, as far as I know, it would be better for the training process, if the generated windows of the different files would be shuffled. I could use the split_sequences(sequences, n_steps) function on every csv file (to generate X_test , y_test) and join the result in one big variable or file and shuffle the windows, but I do not think this is an efficient way and it also had to be redone if n_steps will be changed.
If somebody could suggest a (established) method or example to preprocess my data, I would be very thankful.
You can use the TimeSeriesGenerator after consuming those sets of files.
Here is the reference link.
As per the documentation:
'''
This class takes in a sequence of data-points gathered at equal intervals, along with time-series parameters such as stride, length of history, etc., to produce batches for training/validation.
'''
Provided examples for both univariate & multiple variate scenario
Univariate Example:
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, LSTM
import numpy as np
import tensorflow as tf
# define dataset
series = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
# reshape to [10, 1]
n_features = 1
series = series.reshape((len(series), n_features))
# define generator
n_input = 2
generator = TimeseriesGenerator(series, series, length=n_input, batch_size=8)
# create model
model = Sequential()
model.add(LSTM(100, activation='relu', input_shape=(n_input, n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
# fit model
model.fit_generator(generator, steps_per_epoch=1, epochs=500, verbose=1)
#sample prediction
inputs = np.array([9, 10]).reshape((1, n_input, n_features))
result = model.predict(inputs, verbose=0)
print(result)
Multi-variate Example
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, LSTM
import numpy as np
import tensorflow as tf
# define dataset
in_seq1 = np.array([10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
in_seq2 = np.array([15, 25, 35, 45, 55, 65, 75, 85, 95, 105])
# reshape series
in_seq1 = in_seq1.reshape((len(in_seq1), 1))
in_seq2 = in_seq2.reshape((len(in_seq2), 1))
# horizontally stack columns
dataset = np.hstack((in_seq1, in_seq2))
# define generator
n_features = dataset.shape[1]
n_input = 2
generator = TimeseriesGenerator(dataset, dataset, length=n_input, batch_size=8)
# define model
model = Sequential()
model.add(LSTM(100, activation='relu', input_shape=(n_input, n_features)))
model.add(Dense(2))
model.compile(optimizer='adam', loss='mse')
# fit model
model.fit_generator(generator, steps_per_epoch=1, epochs=500, verbose=1)
# make a one step prediction out of sample
inputs = np.array([[90, 95], [100, 105]]).reshape((1, n_input, n_features))
result = model.predict(inputs, verbose=1)
print(result)
Note: All of these were simulated using Google Colaboratory
Im trying to create an speaker recognition system which take sound files from any movie and than train these sounds files using Neural Network and MFCC ( sound feature ) and then the system will say me on another sound file which speaker talked in this sound file.
So that's what I did -
Created MFCC vector for each speaker and put it on an array named X ( speaker can be more than one time )
Created Output number for each speaker
Created this model with tensorflow -
Dense Layer(512, 'relu')
Dropout (0.3)
Dense Layer(256, 'relu')
Dense Layer(128, 'relu')
Flattern
Dense Layer(length of outputs, 'relu')
than I trained and finally checked my results but as I said unfortuentlly my results are not high enough, only ~45% accucarry :(
I add my full code and my data base, notice that my data base can make some mistakes for example take voice of leonard and call it sheldon because it base on the srt file of the movie and the srt file have sometimes mistakes.
My Full Code :
import python_speech_features
import scipy.io.wavfile as wav
import numpy as np
from os import listdir
import os
import shutil
from os.path import isfile, join
from random import shuffle
from matplotlib import pyplot
from tqdm import tqdm
import tensorflow as tf
win_len = 0.04 # in seconds
step = win_len / 2
nfft = 2048
for TestNum in tqdm(range(5)): # We check it several times
X = [] # inputs
Y = [] # outputs
onlyfiles = [f for f in listdir("FinalAudios/") if isfile(join("FinalAudios/", f))] # Files in dir
names = [] # names of the speakers
for file in onlyfiles: # for each wav sound
# UNESSECERY TO UNDERSTAND THE CODE
if " " not in file.split("_")[0]:
names.append(file.split("_")[0])
else:
names.append(file.split("_")[0].split(" ")[0])
only_speakers = [] + names
namesWithoutDuplicate = list(dict.fromkeys(names))
namesWithoutDuplicateCopy = namesWithoutDuplicate[:]
for name in namesWithoutDuplicateCopy: # we remove low samples files
if names.count(name) < 60:
namesWithoutDuplicate.remove(name)
names = namesWithoutDuplicate
print(names) # print it
vector_names = [] # output for each name
i = 0
for name in names:
vector_for_each_name = i
vector_names.append(np.array(vector_for_each_name))
i += 1
for f in onlyfiles: # for all the files
if " " not in f.split("_")[0]:
f_speaker = f.split("_")[0]
else:
f_speaker = f.split("_")[0].split(" ")[0]
if f_speaker in namesWithoutDuplicate:
fs, audio = wav.read("FinalAudios/" + f) # read the file
try:
# compute MFCC
mfcc_feat = python_speech_features.mfcc(audio, samplerate=fs, winlen=win_len,
winstep=step, nfft=nfft, appendEnergy=False)
flat_list = [item for sublist in mfcc_feat for item in sublist]
# Create output + inputs
X.append(np.array(flat_list))
Y.append(np.array(vector_names[names.index(f_speaker)]))
except IndexError:
pass
else:
if not os.path.exists("TooLowSamples"): # if path not exist we create it
os.makedirs("TooLowSamples")
shutil.move("FinalAudios\\" + f, "TooLowSamples\\" + f)
# ------------------- RANDOMIZATION, UNNECESSARY TO UNDERSTAND THE CODE ------------------- #
Z = list(zip(X, Y))
shuffle(Z) # WE SHUFFLE X,Y TO PERFORM RANDOM ON THE TEST LEVEL
X, Y = zip(*Z)
X = list(X)
Y = list(Y)
lenX = len(X)
# ------------------- RANDOMIZATION, UNNECESSARY TO UNDERSTAND THE CODE ------------------- #
y_test = np.asarray(Y[:100]) # CHOOSE 100 FOR TEST, OTHERS FOR TRAIN
x_test = np.asarray(X[:100]) # CHOOSE 100 FOR TEST, OTHERS FOR TRAIN
x_train = np.asarray(X[100:]) # CHOOSE 100 FOR TEST, OTHERS FOR TRAIN
y_train = np.asarray(Y[100:]) # CHOOSE 100 FOR TEST, OTHERS FOR TRAIN
x_val = x_train[-100:] # FROM THE TRAIN CHOOSE 100 FOR VALIDATION
y_val = y_train[-100:] # FROM THE TRAIN CHOOSE 100 FOR VALIDATION
x_train = x_train[:-100] # FROM THE TRAIN CHOOSE 100 FOR VALIDATION
y_train = y_train[:-100] # FROM THE TRAIN CHOOSE 100 FOR VALIDATION
x_train = x_train.reshape(np.append(x_train.shape, 1)) # RESHAPE FOR INPUT
x_test = x_test.reshape(np.append(x_test.shape, 1)) # RESHAPE FOR INPUT
x_val = x_val.reshape(np.append(x_val.shape, 1)) # RESHAPE FOR INPUT
# -------------- OUR TENSOR FLOW NEURAL NETWORK MODEL -------------- #
model = tf.keras.models.Sequential([
tf.keras.layers.Dense(512, activation='relu'),
tf.keras.layers.Dropout(0.3),
tf.keras.layers.Dense(256, activation='relu'),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(len(names), activation='softmax'),
])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
# -------------- OUR TENSOR FLOW NEURAL NETWORK MODEL -------------- #
print("fitting")
history = model.fit(x_train, y_train, epochs=4, validation_data=(x_val, y_val))
print("testing")
results = model.evaluate(x_test, y_test)
print(results)
pyplot.plot(history.history['loss'], label='train')
pyplot.plot(history.history['val_loss'], label='test')
pyplot.legend()
pyplot.show()
My data set - https://filebin.net/ajho6kgzx66xayyn
Note : I tried also convolution layers but it worked even worse
I'm using the tf.keras API in TensorFlow2. I have 100,000 images or so that are saved as TFRecords (128 images per record). Each record has an input image, target image, and frame index. I can't find a clean way to keep the frame index with the prediction.
Here is an example, except I build a dataset with NumPy arrays instead of reading from TFRecords:
import tensorflow as tf
from tensorflow import keras
import numpy as np
# build dummy tf.data.Dataset
x = np.random.random(10000).astype(np.float32)
y = x + np.random.random(10000).astype(np.float32) * 0.1
idx = np.arange(10000, dtype=np.uint16)
np.random.shuffle(idx) # frames are random in my TFRecord files
ds = tf.data.Dataset.from_tensor_slices((x, y, idx))
# pretend ds returned from TFRecord
ds = ds.map(lambda f0, f1, f2: (f0, f1)) # strip off idx
ds = ds.batch(32)
# build and train model
x = keras.Input(shape=(1,))
y_hat = keras.layers.Dense(1)(x) # i.e. linear regression
model = keras.Model(x, y_hat)
model.compile('sgd', 'mse')
history = model.fit(ds, epochs=5)
# predict 1 batch
model.predict(ds, steps=1)
Short of reading through the dataset again to extract the indices (which is prone to error), is there a clean way to keep prediction correspondence with image index? In TF1.x it was straightforward. But I'd like to take advantage of clean Keras compile(), fit(), predict() API in TF2.
Ok, was thinking too hard, pretty easy actually. Just add index to dataset when you are making predictions, and pull out indices as you are iterating through batches:
rt tensorflow as tf
from tensorflow import keras
import numpy as np
def build_dataset(mode):
np.random.seed(1)
x = np.random.random(10000).astype(np.float32)
y = x + np.random.random(10000).astype(np.float32) * 0.1
idx = np.arange(10000, dtype=np.uint16)
if mode == 'train':
ds = tf.data.Dataset.from_tensor_slices((x, y))
ds = ds.shuffle(128)
else:
ds = tf.data.Dataset.from_tensor_slices((x, idx))
ds = ds.batch(32)
return ds
# build and train simple linear regression model
x_tf = keras.Input(shape=(1,))
yhat_tf = keras.layers.Dense(1)(x_tf)
model = keras.Model(x_tf, yhat_tf)
model.compile(optimizer='sgd', loss='mse')
ds = build_dataset('train')
history = model.fit(ds, epochs=5)
# predict 1 batch
ds = build_dataset('predict')
for batch in ds:
x_tf, indices_tf = batch
yhat_np = model.predict(x_tf)
break