I have a python script for pre-processing audio and it has frame length, frame step and fft length as the command line arguments. I am able to run the code if I have single values of these arguments. I wanted to know if there is a way in which I can run the python script with multiple values of the arguments? For example, get the output if values of fft lengths are 128, 256 and 512 instead of just one value.
The code for pre-processing is as follows:
import numpy as np
import pandas as pd
import tensorflow as tf
from scipy.io import wavfile
import os
import time
import pickle
import random
import argparse
import configlib
from configlib import config as C
import mfccwithpaddingandcmd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import MultiLabelBinarizer
from tensorflow import keras
from tensorflow.python.keras import Sequential
from tensorflow.keras.layers import Dense,Conv2D,MaxPooling2D,Flatten,Dropout,BatchNormalization,LSTM,Lambda,Reshape,Bidirectional,GRU
from tensorflow.keras.callbacks import TensorBoard
start = time.time()
classes = ['blinds','fan','light','music','tv']
#dire = r"/mnt/beegfs/home/gehani/test_speech_command/"
parser = configlib.add_parser("Preprocessing config")
parser.add_argument("-dir","--dire", metavar="", help="Directory for the audio files")
def pp():
data_list=[] #To save paths of all the audio files.....all audio files in list format in data_list
#data_list-->folder-->files in folder
for index,label in enumerate(classes):
class_list=[]
if label=='silence': #creating silence folder and storing 1sec noise audio files
silence_path = os.path.join(C["dire"],'silence')
if not os.path.exists(silence_path):
os.mkdir(silence_path)
silence_stride = 2000
#sample_rate = 16000
folder = os.path.join(C["dire"],'_background_noise_') #all silence are kept in the background_noise folder
for file_ in os.listdir(folder):
if '.wav' in file_:
load_path = os.path.join(folder,file_)
sample_rate,y = wavfile.read(load_path)
for i in range(0,len(y)-sample_rate,silence_stride):
file_path = "silence/{}_{}.wav".format(file_[:-4],i)
y_slice = y[i:i+sample_rate]
wavfile.write(os.path.join(C["dire"],file_path),sample_rate,y_slice)
class_list.append(file_path)
else:
folder = os.path.join(C["dire"],label)
for file_ in os.listdir(folder):
file_path = '{}/{}'.format(label,file_) #Ex: up/c9b653a0_nohash_2.wav
class_list.append(file_path)
random.shuffle(class_list) #To shuffle files
data_list.append(class_list) #if not a silence file then just append to the datalist
X = []
Y = []
preemphasis = 0.985
print("Feature Extraction Started")
for i,class_list in enumerate(data_list): #datalist = all files, class list = folder name in datalist, sample = path to the audio file in that particular class list
for j,samples in enumerate(class_list): #samples are of the form classes_name/audio file
if(samples.endswith('.wav')):
sample_rate,audio = wavfile.read(os.path.join(C["dire"],samples))
if(audio.size<sample_rate):
audio = np.pad(audio,(sample_rate-audio.size,0),mode="constant")
#print("****")
#print(sample_rate)
#print(preemphasis)
#print(audio.shape)
coeff = mfccwithpaddingandcmd.mfcc(audio,sample_rate,preemphasis) # 0.985 = preemphasis
#print("****")
#print(coeff)
#print("****")
X.append(coeff)
#print(X)
if(samples.split('/')[0] in classes):
Y.append(samples.split('/')[0])
elif(samples.split('/')[0]=='_background_noise_'):
Y.append('silence')
#print(len(X))
#print(len(Y))
#X= coefficient array and Y = name of the class
A = np.zeros((len(X),X[0].shape[0],X[0][0].shape[0]),dtype='object')
for i in range(0,len(X)):
A[i] = np.array(X[i]) #Converting list X into array A
end1 = time.time()
print("Time taken for feature extraction:{}sec".format(end1-start))
MLB = MultiLabelBinarizer() # one hot encoding for converting labels into binary form
MLB.fit(pd.Series(Y).fillna("missing").str.split(', '))
Y_MLB = MLB.transform(pd.Series(Y).fillna("missing").str.split(', '))
MLB.classes_ #Same like classes array
print(Y_MLB.shape)
pickle_out = open("A_all.pickle","wb") #Writes array A to a file A.pickle
pickle.dump(A, pickle_out) #pickle is the file containing the extracted features
pickle_out.close()
pickle_out = open("Y_all.pickle","wb")
pickle.dump(Y_MLB, pickle_out)
pickle_out.close()
pickle_in = open("Y_all.pickle","rb")
Y = pickle.load(pickle_in)
X = tf.keras.utils.normalize(X)
X_train,X_valtest,Y_train,Y_valtest = train_test_split(X,Y,test_size=0.2,random_state=37)
X_val,X_test,Y_val,Y_test = train_test_split(X_valtest,Y_valtest,test_size=0.5,random_state=37)
print(X_train.shape,X_val.shape,X_test.shape,Y_train.shape,Y_val.shape,Y_test.shape)
if __name__ == "__main__":
configlib.parse(save_fname="last_arguments.txt")
print("Running with configuration:")
configlib.print_config()
pp()
The code for MFCC is as follows:
import tensorflow as tf
import scipy.io.wavfile as wav
import numpy as np
import matplotlib.pyplot as plt
import pickle
import argparse
import configlib
from configlib import config as C
# Configuration arguments
parser = configlib.add_parser("MFCC config")
parser.add_argument("-fl","--frame_length", type=int, default=400, metavar="", help="Frame Length")
parser.add_argument("-fs","--frame_step", type=int, default=160, metavar="", help="Frame Step")
parser.add_argument("-fft","--fft_length", type=int, default=512, metavar="", help="FFT length")
#args = parser.parse_args()
def Preemphasis(signal,pre_emp):
return np.append(signal[0],signal[1:]-pre_emp*signal[:-1])
def Paddinggg(framelength,framestep,samplerate):
frameStart = np.arange(0,samplerate,framestep)
frameEnd = frameStart + framelength
padding = min(frameEnd[(frameEnd > samplerate)]) - samplerate
return padding
def mfcc(audio,sample_rate,pre_emp):
audio = np.pad(audio,(Paddinggg(C["frame_length"],C["frame_step"],sample_rate),0),mode='reflect')
audio = audio.astype('float32')
#Normalization
audio = tf.keras.utils.normalize(audio)
#Preemphasis
audio = Preemphasis(audio,pre_emp)
stfts = tf.signal.stft(audio,C["frame_length"],C["frame_step"],C["fft_length"],window_fn=tf.signal.hann_window)
spectrograms = tf.abs(stfts)
num_spectrogram_bins = stfts.shape[-1]
lower_edge_hertz, upper_edge_hertz, num_mel_bins = 0.0, sample_rate/2.0, 32
linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(num_mel_bins, num_spectrogram_bins, sample_rate, lower_edge_hertz,upper_edge_hertz)
mel_spectrograms = tf.tensordot(spectrograms, linear_to_mel_weight_matrix, 1)
mel_spectrograms.set_shape(spectrograms.shape[:-1].concatenate(linear_to_mel_weight_matrix.shape[-1:]))
# Compute a stabilized log to get log-magnitude mel-scale spectrograms.
log_mel_spectrograms = tf.math.log(mel_spectrograms + 1e-6)
# Compute MFCCs from log_mel_spectrograms and take the first 13.
return log_mel_spectrograms
print("End")
And the code for configlib is as follows:
from typing import Dict, Any
import logging
import pprint
import sys
import argparse
# Logging for config library
logger = logging.getLogger(__name__)
# Our global parser that we will collect arguments into
parser = argparse.ArgumentParser(description=__doc__, fromfile_prefix_chars="#")
# Global configuration dictionary that will contain parsed arguments
# It is also this variable that modules use to access parsed arguments
config:Dict[str, Any] = {}
def add_parser(title: str, description: str = ""):
"""Create a new context for arguments and return a handle."""
return parser.add_argument_group(title, description)
def parse(save_fname: str = "") -> Dict[str, Any]:
"""Parse given arguments."""
config.update(vars(parser.parse_args()))
logging.info("Parsed %i arguments.", len(config))
# Optionally save passed arguments
if save_fname:
with open(save_fname, "w") as fout:
fout.write("\n".join(sys.argv[1:]))
logging.info("Saving arguments to %s.", save_fname)
return config
def print_config():
"""Print the current config to stdout."""
pprint.pprint(config)
I use the following command to run my python file:
python3.7 preprocessingwithpaddingandcmd.py -fl 1103 -fs 88 -fft 512 -dir /mnt/beegfs/home/gehani/appliances_audio_one_channel
Should I be writing a shell script or python has some options for it?
EDIT 1
I tried using
parser.add_argument('-fft', '--fft_length', type=int, default=[], nargs=3)
for getting fft length from the command line and used the command
run preprocessingwithpaddingandcmd -dir filepath -fl 1765 -fs 1102 -fft 512 218 64
to run it. But, it gives me this error: ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
Can anyone please help?
I found you can do it by these. mfcc features extraction
You can create your own mfcc features extraction or you can limit window lengths and ceptrums that is enough for simple works except you need logarithms scales where you can use target matrix ( convolution ) or else.
It is logarithms when you use FFT or alternative derivation but mfcc is only extraction where I will provide the sample output in picture.
[ Sample ]:
from python_speech_features import mfcc
from python_speech_features import logfbank
import scipy.io.wavfile as wav
import tensorflow as tf
import matplotlib.pyplot as plt
(rate,sig) = wav.read("F:\\temp\\Python\\Speech\\temple_of_love-sisters_of_mercy.wav")
mfcc_feat = mfcc(signal=sig, samplerate=rate, winlen=0.025, winstep=0.01, numcep=13, nfilt=26, nfft=512, lowfreq=0, highfreq=None, preemph=0.97, ceplifter=22, appendEnergy=True)
fbank_feat = logfbank(sig,rate)
plt.plot( mfcc_feat[50:42000,0] )
plt.xlabel("sample")
plt.show()
plt.close()
input('...')
Related
In below code I get the following error:
import sys
import esig
import numpy as np
from preprocessing import preprocess_signature
from hyperparameters import hyperparameters
esig.set_backend("iisignature")
np.set_printoptions(threshold=sys.maxsize)
def calculate_signatures(split_stream, hps = hyperparameters):
depth = hps.depth
signatures = []
for stream in split_stream:
np_stream = np.array(stream)
sig = esig.stream2sig(np_stream, depth)
sig = preprocess_signature(sig, hps)
signatures.append(sig)
return signatures
from preprocessing import preprocess_signature
ImportError: cannot import name 'preprocess_signature' from 'preprocessing' (c:\Users\dmitr\Desktop\project work\preprocessing.py)
However when I change the code to be the following, it works as intended:
import sys
import esig
import numpy as np
import preprocessing
from hyperparameters import hyperparameters
esig.set_backend("iisignature")
np.set_printoptions(threshold=sys.maxsize)
def calculate_signatures(split_stream, hps = hyperparameters):
depth = hps.depth
signatures = []
for stream in split_stream:
np_stream = np.array(stream)
sig = esig.stream2sig(np_stream, depth)
sig = preprocessing.preprocess_signature(sig, hps)
signatures.append(sig)
return signatures
There are no typos, I am literally copy-pasting the name of the file and the function. There are no cycles of dependencies either.
Can someone please explain what is going wrong?
I have a pytorch model that I exported to ONNX and converted to a tensorflow model with the following command:
trtexec --onnx=model.onnx --batch=400 --saveEngine=model.trt
All of this works, but how do I now load this model.trt in python and run the inference?
The official documentation has a lot of examples. The basic steps to follow are:
ONNX parser: takes a trained model in ONNX format as input and populates a network object in TensorRT
Builder: takes a network in TensorRT and generates an engine that is optimized for the target platform
Engine: takes input data, performs inferences and emits inference output
Logger: object associated with the builder and engine to capture errors, warnings and other information during the build and inference phases
An example for the engine is:
import tensorrt as trt
import pycuda.autoinit
import pycuda.driver as cuda
from onnx import ModelProto
import onnx
import numpy as np
import matplotlib.pyplot as plt
from time import time
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
trt_runtime = trt.Runtime(TRT_LOGGER)
#batch_size = 1
explicit_batch = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
#inp_shape = [batch_size, 3, 1024, 1024] # the shape I was using
def build_engine(onnx_path, shape = inp_shape):
with trt.Builder(TRT_LOGGER) as builder,builder.create_builder_config() as config,\
builder.create_network(explicit_batch) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
if builder.platform_has_fast_fp16:
builder.fp16_mode = True
builder.max_workspace_size = (1 << 30)
#builder.max_workspace_size = (3072 << 20)
#profile = builder.create_optimization_profile()
#config.max_workspace_size = (3072 << 20)
#config.add_optimization_profile(profile)
print("parsing")
with open(onnx_path, 'rb') as model:
print("onnx found")
if not parser.parse(model.read()):
print("parse failed")
for error in range(parser.num_errors):
print(parser.get_error(error))
#parser.parse(model.read())
last_layer = network.get_layer(network.num_layers - 1)
# Check if last layer recognizes it's output
if not last_layer.get_output(0):
# If not, then mark the output using TensorRT API
network.mark_output(last_layer.get_output(0))
network.get_input(0).shape = shape
engine = builder.build_cuda_engine(network)
return engine
def save_engine(engine, file_name):
buf = engine.serialize()
with open(file_name, 'wb') as f:
f.write(buf)
def load_engine(trt_runtime, plan_path):
with open(engine_path, 'rb') as f:
engine_data = f.read()
engine = trt_runtime.deserialize_cuda_engine(engine_data)
return engine
if __name__ == "__main__":
onnx_path = "./path/to/your/model.onnx"
engine_name = "./path/to/engine.plan"
model = ModelProto()
with open(onnx_path, "rb") as f:
model.ParseFromString(f.read())
d0 = model.graph.input[0].type.tensor_type.shape.dim[1].dim_value
d1 = model.graph.input[0].type.tensor_type.shape.dim[2].dim_value
d2 = model.graph.input[0].type.tensor_type.shape.dim[3].dim_value
shape = [batch_size , d0, d1 ,d2]
print(shape)
print("trying to build engine")
engine = build_engine(onnx_path,shape)
save_engine(engine,engine_name)
print("finished")
Follow this page for another example and information.
Found an answer based on this tutorial.
import numpy as np
import tensorrt as trt
import pycuda.driver as cuda
dev = cuda.Device(0)
ctx = dev.make_context()
try:
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
with open("model.trt", 'rb') as f, trt.Runtime(TRT_LOGGER) as runtime:
engine = runtime.deserialize_cuda_engine(f.read())
with engine.create_execution_context() as context:
# get sizes of input and output and allocate memory required for input data and for output data
for binding in engine:
if engine.binding_is_input(binding): # we expect only one input
input_shape = engine.get_binding_shape(binding)
input_size = trt.volume(input_shape) * engine.max_batch_size * np.dtype(np.float32).itemsize # in bytes
device_input = cuda.mem_alloc(input_size)
else: # and one output
output_shape = engine.get_binding_shape(binding)
# create page-locked memory buffers (i.e. won't be swapped to disk)
host_output = cuda.pagelocked_empty(trt.volume(output_shape) * engine.max_batch_size, dtype=np.float32)
device_output = cuda.mem_alloc(host_output.nbytes)
stream = cuda.Stream()
host_input = np.array(batch, dtype=np.float32, order='C')
cuda.memcpy_htod_async(device_input, host_input, stream)
context.execute_async(bindings=[int(device_input), int(device_output)], stream_handle=stream.handle)
cuda.memcpy_dtoh_async(host_output, device_output, stream)
stream.synchronize()
# postprocess results
output_data = host_output.reshape(engine.max_batch_size, output_shape[0]).T
finally:
ctx.pop()
I am trying to adapt some code to better suit my needs. The code currently takes all the files from a folder and runs image recognition on them, but I need to pass a single image that I specify from the command line.
Here is the code I am using:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
# import the necessary packages
import json
import os
import random
import cv2 as cv
import keras.backend as K
import numpy as np
import scipy.io
from utils import load_model
if __name__ == '__main__':
img_width, img_height = 224, 224
model = load_model()
model.load_weights('models/model.96-0.89.hdf5')
cars_meta = scipy.io.loadmat('devkit/cars_meta')
class_names = cars_meta['class_names'] # shape=(1, 196)
class_names = np.transpose(class_names)
test_path = 'data/test/'
test_images = [f for f in os.listdir(test_path) if
os.path.isfile(os.path.join(test_path, f)) and f.endswith('.jpg')]
num_samples = 1
samples = random.sample(test_images, num_samples)
results = []
for i, image_name in enumerate(samples):
filename = os.path.join(test_path, image_name)
print('Start processing image: {}'.format(filename))
bgr_img = cv.imread(filename)
bgr_img = cv.resize(bgr_img, (img_width, img_height), cv.INTER_CUBIC)
rgb_img = cv.cvtColor(bgr_img, cv.COLOR_BGR2RGB)
rgb_img = np.expand_dims(rgb_img, 0)
preds = model.predict(rgb_img)
prob = np.max(preds)
class_id = np.argmax(preds)
text = ('Predict: {}, prob: {}'.format(class_names[class_id][0][0], prob))
results.append({'label': class_names[class_id][0][0], 'prob': '{:.4}'.format(prob)})
cv.imwrite('images/{}_out.png'.format(i), bgr_img)
print(results)
with open('results.json', 'w') as file:
json.dump(results, file, indent=4)
K.clear_session()
Is there a way that I can just pass a single image, instead of an entire folder, and do so from the command line?
sys.argv is a list in Python, which contains the command-line arguments passed to the script.
pay attention sys.argv[0] is the name of the script itself!
so the easieaset way to do it is by the built in sys.argv list. for example if my script is called : foo.py, i can pass to it arguments via command line like so:
python foo.py "C:\user\myimg.jpeg"
and in foo.py ican get the passed argument like so:
import sys
if __name__ == '__main__':
for arg in sys.argv[1:]:
#do work with arg! arg is a string type!
with open(arg) as fp:
#.... do work ....
argparse module
The following code is a Python program that takes a list of integers and produces either the sum or the max:
import argparse
parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('integers', metavar='N', type=int, nargs='+',
help='an integer for the accumulator')
parser.add_argument('--sum', dest='accumulate', action='store_const',
const=sum, default=max,
help='sum the integers (default: find the max)')
args = parser.parse_args()
print(args.accumulate(args.integers))
using or calling from command line:
python foo.py 1 2 3 4
output: 4
python foo.py 1 2 3 4 --sum
10
New to python,
My professor has given me a piece of code to help process some imagery, however it only works one image at a time due to an input and output needing to be stipulated each time. Usually I would put import os or glob but argparse is something new to me and my usual methods do not work.
I need to edit this in order to create a list of '.hdf' files with the output being the same as the input just with a name change of '_Processed.hdf'
Code below:
# Import the numpy library
import numpy
# Import the GDAL library
from osgeo import gdal
# Import the GDAL/OGR spatial reference library
from osgeo import osr
# Import the HDF4 reader.
import pyhdf.SD
# Import the system library
import sys
# Import the python Argument parser
import argparse
import pprint
import rsgislib
def creatGCPs(lat_arr, lon_arr):
y_size = lat_arr.shape[0]
x_size = lat_arr.shape[1]
print(x_size)
print(y_size)
gcps = []
for y in range(y_size):
for x in range(x_size):
gcps.append([x, y, lon_arr[y,x], lat_arr[y,x]])
return gcps
def run(inputFile, outputFile):
hdfImg = pyhdf.SD.SD(inputFile)
#print("Available Datasets")
pprint.pprint(hdfImg.datasets())
#print("Get Header Attributes")
#attr = hdfImg.attributes(full=1)
#pprint.pprint(attr)
rsgisUtils = rsgislib.RSGISPyUtils()
wktStr = rsgisUtils.getWKTFromEPSGCode(4326)
#print(wktStr)
lat_arr = hdfImg.select('Latitude')[:]
long_arr = hdfImg.select('Longitude')[:]
sel_dataset_arr = hdfImg.select('Optical_Depth_Land_And_Ocean')[:]
gcplst = creatGCPs(lat_arr, long_arr)
y_size = lat_arr.shape[0]
x_size = lat_arr.shape[1]
min_lat = numpy.min(lat_arr)
max_lat = numpy.max(lat_arr)
min_lon = numpy.min(long_arr)
max_lon = numpy.max(long_arr)
lat_res = (max_lat-min_lat)/float(y_size)
lon_res = (max_lon-min_lon)/float(x_size)
driver = gdal.GetDriverByName( "KEA" )
metadata = driver.GetMetadata()
dst_ds = driver.Create( outputFile, x_size, y_size, 1, gdal.GDT_Float32 )
dst_ds.GetRasterBand(1).WriteArray(sel_dataset_arr)
gcp_list = []
for gcp_arr in gcplst:
gcp = gdal.GCP(int(gcp_arr[2]), int(gcp_arr[3]), int(0), gcp_arr[0], gcp_arr[1])
gcp_list.append(gcp)
dst_ds.SetGCPs(gcp_list, wktStr)
dst_ds = None
if __name__ == '__main__':
parser = argparse.ArgumentParser()
# Define the argument for specifying the input file.
parser.add_argument("-i", "--input", type=str, required=True, help="Specify the input image file.")
# Define the argument for specifying the output file.
parser.add_argument("-o", "--output", type=str, required=True, help="Specify the output image file.")
args = parser.parse_args()
run(args.input, args.output)
From the argparse docs here, you can simply add a nargs='*' to the argument definitions. However, be sure to give the input and output files in the same order...
Also, you can use the pathlib.Path object, which is now standard in Python >=3.4, to play with file names.
So with an added from pathlib import Path at the top, the last part of your code becomes:
if __name__ == '__main__':
parser = argparse.ArgumentParser()
# Define the argument for specifying the input file.
parser.add_argument("-i", "--input", nargs='*', type=str, required=True, help="Specify the input image file.")
args = parser.parse_args()
for input in args.input:
output = Path(input).stem + '_Processed.hdf'
run(input, output)
Here, args.input is now a list of strings, so we iterate on it. The .stem attribute returns the file name without any extensions, I find it cleaner than something like input[:-4], which only works for specific extension lengths...
This works well with glob patterns in a standard linux shell (I don't know for other cases).
Ex. calling python this_script.py -i Image_*, processes every file with filenames beginning with "Image_".
You can use the nargs='+' option, and since you're going to have only have one required argument, I'd recommend that you don't use --input as an option, but simply run the script as script_name.py input_file1 input_file2 input_file3 ...:
import os.path
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('input', nargs='+', help="Specify the input image file.")
args = parser.parse_args()
for filename in args.input:
root, ext = os.path.splitext(filename)
run(filename, ''.join((root, '_Processed', ext)))
This code is for creating tfrecords which is tensorflows standard input format for keeping audios and labels taken from video samples.This file is given as input for training in neural network.
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
import menpo
import tensorflow as tf
import numpy as np
import os
from io import BytesIO
from pathlib import Path
from moviepy.editor import VideoFileClip
from menpo.visualize import progress_bar_str, print_progress
from moviepy.audio.AudioClip import AudioArrayClip
root_dir = Path('/home/user/Desktop/PROJECT/Multimodal-Emotion-Recognition-master/RECOLA') #Where RECOLA is located
portion_to_id = dict(
train = [1], # 25
valid = [70, 71],
test = [80, 81] # 54, 53
) #samples taken
def get_samples(subject_id): #location of arousal and valence files and appropriate video sample
arousal_label_path = root_dir / 'ratings_individual/arousal/{}.csv'.format(subject_id)
valence_label_path = root_dir / 'ratings_individual/valence/{}.csv'.format(subject_id)
clip = VideoFileClip(str(root_dir /"Video_recordings_MP4/{}.mp4".format(subject_id)))
subsampled_audio = clip.audio.set_fps(16000)
audio_frames = []
for i in range(1, 7501): #extract audio sample
try:
time = 0.04 * i
audio = np.array(list(subsampled_audio.subclip(time - 0.04, time).iter_frames()))
audio = audio.mean(1)[:640]
audio_frames.append(audio.astype(np.float32))
except ValueError:
print('Not float')
quit()
try:
arousal = np.loadtxt(str(arousal_label_path), delimiter=',')[:+1][1:]
valence = np.loadtxt(str(valence_label_path), delimiter=',')[:+1][1:]
return audio_frames, np.dstack([arousal, valence])[0].astype(np.float32) #return audio frames
except ValueError:
print('problem')
def get_jpg_string(im):
# Gets the serialized jpg from a menpo `Image`.
fp = BytesIO()
menpo.io.export_image(im, fp, extension='jpg')
fp.seek(0)
return fp.read()
def _int_feauture(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def _bytes_feauture(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def serialize_sample(writer, subject_id):
subject_name = 'P{}'.format(subject_id)
print(subject_name)
print(get_samples)
#repeat for each sample
for i, (audio, label) in enumerate(zip(*get_samples(subject_name))):
example = tf.train.Example(features=tf.train.Features(feature={
'sample_id': _int_feauture(i),
'subject_id': _int_feauture(subject_id),
'label': _bytes_feauture(label.tobytes()),
'raw_audio': _bytes_feauture(audio.tobytes()),
})) #extract sample_id,subject_id,label and raw_audio
writer.write(example.SerializeToString())
del audio, label
def main(directory):
print('In Main')
for portion in portion_to_id.keys():
print(portion)
for subj_id in print_progress(portion_to_id[portion]):
temp = (directory / 'tf_records' / portion / '{}.tfrecords'.format(subj_id)
).as_posix() #display sample
print(temp)
writer = tf.python_io.TFRecordWriter(
(directory / 'tf_records' / portion / '{}.tfrecords'.format(subj_id)
).as_posix()) #write to tfrecords
serialize_sample(writer, subj_id)
if __name__ == "__main__":
print("Calling Main")
main(Path('/home/user/Desktop/PROJECT/Multimodal-Emotion-Recognition-master/records')) #save tfrecord
This code raises an error and terminates.I have given all paths to locate input video.
Error
for i, (audio, label) in enumerate(zip(*get_samples(subject_name))):
TypeError: zip() argument after * must be an iterable, not NoneType
Why do I get this error?
do you have following video/audio files in your test, train and valid folders:
train = P1.mp4
valid = P70.mp4 , P71.mp4
test = P80.mp4 , P81.mp4 ??
because the code: zip(*get_samples(subject_name)) seems to unable to fetch the data: Nonetype!