Related
I wrote a code to remove the background of 8000 images but that whole code is taking approximately 8 hours to give the result.
How to improve its time complexity as I have to work on a large dataset in future?
Or do I have to write a whole new code? If it is, please suggest some sample codes.
from rembg import remove
import cv2
import glob
for img in glob.glob('../images/*.jpg'):
a = img.split('../images/')
a1 = a[1].split('.jpg')
try:
cv_img = cv2.imread(img)
output = remove(cv_img)
except:
continue
cv2.imwrite('../output image/' + str(a1[0]) + '.png', output)
One simple approach would be to divide the work into multiple threads. See ThreadPoolExecutor for more.
You can play around with max_workers= to see what get's the best results. Note that max-workers can be any number between 1 and 32.
This sample code is ready to run. It assumes the image files are in the same directory as your main.py and the output_image directory exits.
import cv2
import rembg
import sys
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor
out_dir = Path("output_image")
in_dir = Path(".")
def is_image(absolute_path: Path):
return absolute_path.is_file and str(absolute_path).endswith('.png')
input_filenames = [p for p in filter(is_image, Path(in_dir).iterdir())]
def process_image(in_dir):
try:
image = cv2.imread(str(in_dir))
if image is None or not image.data:
raise cv2.error("read failed")
output = rembg.remove(image)
in_dir = out_dir / in_dir.with_suffix(".png").name
cv2.imwrite(str(in_dir), output)
except Exception as e:
print(f"{in_dir}: {e}", file=sys.stderr)
executor = ThreadPoolExecutor(max_workers=4)
for result in executor.map(process_image, input_filenames):
print(f"Processing image: {result}")
Check out the U^2Net repository. Like u2net_test.py, Writing your own remove function and using dataloaders can speed up the process. if it is not necessary skip the alpha matting else you can add the alpha matting code from rembg.
def main():
# --------- 1. get image path and name ---------
model_name='u2net'#u2netp
image_dir = os.path.join(os.getcwd(), 'test_data', 'test_images')
prediction_dir = os.path.join(os.getcwd(), 'test_data', model_name + '_results' + os.sep)
model_dir = os.path.join(os.getcwd(), 'saved_models', model_name, model_name + '.pth')
img_name_list = glob.glob(image_dir + os.sep + '*')
print(img_name_list)
#1. dataloader
test_salobj_dataset = SalObjDataset(img_name_list = img_name_list,
lbl_name_list = [],
transform=transforms.Compose([RescaleT(320),
ToTensorLab(flag=0)])
)
test_salobj_dataloader = DataLoader(test_salobj_dataset,
batch_size=1,
shuffle=False,
num_workers=1)
for i_test, data_test in enumerate(test_salobj_dataloader):
print("inferencing:",img_name_list[i_test].split(os.sep)[-1])
inputs_test = data_test['image']
inputs_test = inputs_test.type(torch.FloatTensor)
if torch.cuda.is_available():
inputs_test = Variable(inputs_test.cuda())
else:
inputs_test = Variable(inputs_test)
d1,d2,d3,d4,d5,d6,d7= net(inputs_test)
# normalization
pred = d1[:,0,:,:]
pred = normPRED(pred)
# save results to test_results folder
if not os.path.exists(prediction_dir):
os.makedirs(prediction_dir, exist_ok=True)
save_output(img_name_list[i_test],pred,prediction_dir)
del d1,d2,d3,d4,d5,d6,d7
Try to use parallelization with multiprocessing like Mark Setchell mentioned in his comment. I rewrote your code according to Method 8 from here. Multiprocessing should speed up your execution time. I did not test the code, try if it works.
import glob
from multiprocessing import Pool
import cv2
from rembg import remove
def remove_background(filename):
a = filename.split("../images/")
a1 = a[1].split(".jpg")
try:
cv_img = cv2.imread(filename)
output = remove(cv_img)
except:
continue
cv2.imwrite("../output image/" + str(a1[0]) + ".png", output)
files = glob.glob("../images/*.jpg")
pool = Pool(8)
results = pool.map(remove_background, files)
Ah, you used the example from https://github.com/danielgatis/rembg#usage-as-a-library as template for your code. Maybe try the other example with PIL image instead of OpenCV. The latter is mostly less fast, but who knows. Try it with maybe 10 images and compare execution time.
Here is your code using PIL instead of OpenCV. Not tested.
import glob
from PIL import Image
from rembg import remove
for img in glob.glob("../images/*.jpg"):
a = img.split("../images/")
a1 = a[1].split(".jpg")
try:
cv_img = Image.open(img)
output = remove(cv_img)
except:
continue
output.save("../output image/" + str(a1[0]) + ".png")
I am trying to the do edge detection and apply laplacian, HOG on multiple images stored in a folder at a time for feature extraction but I am unable to do so.
I have actually saved the images of the directory in a list and now when i am trying to process these images, it is throwing an error.
import numpy as np
import cv2
import os
import matplotlib.pyplot as plt
def resize():
data = []
img_size = 244
data_dir = r'C:\technocolab project2\testing'
for img in os.listdir(data_dir):
try:
imgPath = os.path.join(data_dir,img)
global images
images = cv2.imread(imgPath, cv2.IMREAD_GRAYSCALE)
global image_resized
image_resized = cv2.resize(images,(img_size,img_size))
data.append(image_resized)
#except Exception as e:
#print(e)
except:
pass
return data
data = resize()
for i in range(len(data)):
data[i]=data[i].astype('float32')
splitting the dataset
training = data[:int(0.2*len(data))]
validation = data[int(0.2*len(data)):int(0.4*len(data))]
testing = data[int(0.4*len(data)):int(0.5*len(data))]
train_norm =[]
valid_norm = []
test_norm = []
for a in range(len(training)):
train_norm.append(training[a]/255)
print(train_norm[0])
for b in range(len(validation)):
valid_norm.append(validation[b]/255)
print(valid_norm[0])
for c in range(len(testing)):
test_norm.append(testing[c]/255)
print(test_norm[0])
train_edge = []
for e in range(len(train_norm)):
train_edge.append(cv2.Canny(train_norm[e],90,300))
OpenCV(4.6.0) D:\a\opencv-python\opencv-python\opencv\modules\imgproc\src\canny.cpp:829: error: (-215:Assertion failed) _src.depth() == CV_8U in function 'cv::Canny'
I have a python script for pre-processing audio and it has frame length, frame step and fft length as the command line arguments. I am able to run the code if I have single values of these arguments. I wanted to know if there is a way in which I can run the python script with multiple values of the arguments? For example, get the output if values of fft lengths are 128, 256 and 512 instead of just one value.
The code for pre-processing is as follows:
import numpy as np
import pandas as pd
import tensorflow as tf
from scipy.io import wavfile
import os
import time
import pickle
import random
import argparse
import configlib
from configlib import config as C
import mfccwithpaddingandcmd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import MultiLabelBinarizer
from tensorflow import keras
from tensorflow.python.keras import Sequential
from tensorflow.keras.layers import Dense,Conv2D,MaxPooling2D,Flatten,Dropout,BatchNormalization,LSTM,Lambda,Reshape,Bidirectional,GRU
from tensorflow.keras.callbacks import TensorBoard
start = time.time()
classes = ['blinds','fan','light','music','tv']
#dire = r"/mnt/beegfs/home/gehani/test_speech_command/"
parser = configlib.add_parser("Preprocessing config")
parser.add_argument("-dir","--dire", metavar="", help="Directory for the audio files")
def pp():
data_list=[] #To save paths of all the audio files.....all audio files in list format in data_list
#data_list-->folder-->files in folder
for index,label in enumerate(classes):
class_list=[]
if label=='silence': #creating silence folder and storing 1sec noise audio files
silence_path = os.path.join(C["dire"],'silence')
if not os.path.exists(silence_path):
os.mkdir(silence_path)
silence_stride = 2000
#sample_rate = 16000
folder = os.path.join(C["dire"],'_background_noise_') #all silence are kept in the background_noise folder
for file_ in os.listdir(folder):
if '.wav' in file_:
load_path = os.path.join(folder,file_)
sample_rate,y = wavfile.read(load_path)
for i in range(0,len(y)-sample_rate,silence_stride):
file_path = "silence/{}_{}.wav".format(file_[:-4],i)
y_slice = y[i:i+sample_rate]
wavfile.write(os.path.join(C["dire"],file_path),sample_rate,y_slice)
class_list.append(file_path)
else:
folder = os.path.join(C["dire"],label)
for file_ in os.listdir(folder):
file_path = '{}/{}'.format(label,file_) #Ex: up/c9b653a0_nohash_2.wav
class_list.append(file_path)
random.shuffle(class_list) #To shuffle files
data_list.append(class_list) #if not a silence file then just append to the datalist
X = []
Y = []
preemphasis = 0.985
print("Feature Extraction Started")
for i,class_list in enumerate(data_list): #datalist = all files, class list = folder name in datalist, sample = path to the audio file in that particular class list
for j,samples in enumerate(class_list): #samples are of the form classes_name/audio file
if(samples.endswith('.wav')):
sample_rate,audio = wavfile.read(os.path.join(C["dire"],samples))
if(audio.size<sample_rate):
audio = np.pad(audio,(sample_rate-audio.size,0),mode="constant")
#print("****")
#print(sample_rate)
#print(preemphasis)
#print(audio.shape)
coeff = mfccwithpaddingandcmd.mfcc(audio,sample_rate,preemphasis) # 0.985 = preemphasis
#print("****")
#print(coeff)
#print("****")
X.append(coeff)
#print(X)
if(samples.split('/')[0] in classes):
Y.append(samples.split('/')[0])
elif(samples.split('/')[0]=='_background_noise_'):
Y.append('silence')
#print(len(X))
#print(len(Y))
#X= coefficient array and Y = name of the class
A = np.zeros((len(X),X[0].shape[0],X[0][0].shape[0]),dtype='object')
for i in range(0,len(X)):
A[i] = np.array(X[i]) #Converting list X into array A
end1 = time.time()
print("Time taken for feature extraction:{}sec".format(end1-start))
MLB = MultiLabelBinarizer() # one hot encoding for converting labels into binary form
MLB.fit(pd.Series(Y).fillna("missing").str.split(', '))
Y_MLB = MLB.transform(pd.Series(Y).fillna("missing").str.split(', '))
MLB.classes_ #Same like classes array
print(Y_MLB.shape)
pickle_out = open("A_all.pickle","wb") #Writes array A to a file A.pickle
pickle.dump(A, pickle_out) #pickle is the file containing the extracted features
pickle_out.close()
pickle_out = open("Y_all.pickle","wb")
pickle.dump(Y_MLB, pickle_out)
pickle_out.close()
pickle_in = open("Y_all.pickle","rb")
Y = pickle.load(pickle_in)
X = tf.keras.utils.normalize(X)
X_train,X_valtest,Y_train,Y_valtest = train_test_split(X,Y,test_size=0.2,random_state=37)
X_val,X_test,Y_val,Y_test = train_test_split(X_valtest,Y_valtest,test_size=0.5,random_state=37)
print(X_train.shape,X_val.shape,X_test.shape,Y_train.shape,Y_val.shape,Y_test.shape)
if __name__ == "__main__":
configlib.parse(save_fname="last_arguments.txt")
print("Running with configuration:")
configlib.print_config()
pp()
The code for MFCC is as follows:
import tensorflow as tf
import scipy.io.wavfile as wav
import numpy as np
import matplotlib.pyplot as plt
import pickle
import argparse
import configlib
from configlib import config as C
# Configuration arguments
parser = configlib.add_parser("MFCC config")
parser.add_argument("-fl","--frame_length", type=int, default=400, metavar="", help="Frame Length")
parser.add_argument("-fs","--frame_step", type=int, default=160, metavar="", help="Frame Step")
parser.add_argument("-fft","--fft_length", type=int, default=512, metavar="", help="FFT length")
#args = parser.parse_args()
def Preemphasis(signal,pre_emp):
return np.append(signal[0],signal[1:]-pre_emp*signal[:-1])
def Paddinggg(framelength,framestep,samplerate):
frameStart = np.arange(0,samplerate,framestep)
frameEnd = frameStart + framelength
padding = min(frameEnd[(frameEnd > samplerate)]) - samplerate
return padding
def mfcc(audio,sample_rate,pre_emp):
audio = np.pad(audio,(Paddinggg(C["frame_length"],C["frame_step"],sample_rate),0),mode='reflect')
audio = audio.astype('float32')
#Normalization
audio = tf.keras.utils.normalize(audio)
#Preemphasis
audio = Preemphasis(audio,pre_emp)
stfts = tf.signal.stft(audio,C["frame_length"],C["frame_step"],C["fft_length"],window_fn=tf.signal.hann_window)
spectrograms = tf.abs(stfts)
num_spectrogram_bins = stfts.shape[-1]
lower_edge_hertz, upper_edge_hertz, num_mel_bins = 0.0, sample_rate/2.0, 32
linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(num_mel_bins, num_spectrogram_bins, sample_rate, lower_edge_hertz,upper_edge_hertz)
mel_spectrograms = tf.tensordot(spectrograms, linear_to_mel_weight_matrix, 1)
mel_spectrograms.set_shape(spectrograms.shape[:-1].concatenate(linear_to_mel_weight_matrix.shape[-1:]))
# Compute a stabilized log to get log-magnitude mel-scale spectrograms.
log_mel_spectrograms = tf.math.log(mel_spectrograms + 1e-6)
# Compute MFCCs from log_mel_spectrograms and take the first 13.
return log_mel_spectrograms
print("End")
And the code for configlib is as follows:
from typing import Dict, Any
import logging
import pprint
import sys
import argparse
# Logging for config library
logger = logging.getLogger(__name__)
# Our global parser that we will collect arguments into
parser = argparse.ArgumentParser(description=__doc__, fromfile_prefix_chars="#")
# Global configuration dictionary that will contain parsed arguments
# It is also this variable that modules use to access parsed arguments
config:Dict[str, Any] = {}
def add_parser(title: str, description: str = ""):
"""Create a new context for arguments and return a handle."""
return parser.add_argument_group(title, description)
def parse(save_fname: str = "") -> Dict[str, Any]:
"""Parse given arguments."""
config.update(vars(parser.parse_args()))
logging.info("Parsed %i arguments.", len(config))
# Optionally save passed arguments
if save_fname:
with open(save_fname, "w") as fout:
fout.write("\n".join(sys.argv[1:]))
logging.info("Saving arguments to %s.", save_fname)
return config
def print_config():
"""Print the current config to stdout."""
pprint.pprint(config)
I use the following command to run my python file:
python3.7 preprocessingwithpaddingandcmd.py -fl 1103 -fs 88 -fft 512 -dir /mnt/beegfs/home/gehani/appliances_audio_one_channel
Should I be writing a shell script or python has some options for it?
EDIT 1
I tried using
parser.add_argument('-fft', '--fft_length', type=int, default=[], nargs=3)
for getting fft length from the command line and used the command
run preprocessingwithpaddingandcmd -dir filepath -fl 1765 -fs 1102 -fft 512 218 64
to run it. But, it gives me this error: ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
Can anyone please help?
I found you can do it by these. mfcc features extraction
You can create your own mfcc features extraction or you can limit window lengths and ceptrums that is enough for simple works except you need logarithms scales where you can use target matrix ( convolution ) or else.
It is logarithms when you use FFT or alternative derivation but mfcc is only extraction where I will provide the sample output in picture.
[ Sample ]:
from python_speech_features import mfcc
from python_speech_features import logfbank
import scipy.io.wavfile as wav
import tensorflow as tf
import matplotlib.pyplot as plt
(rate,sig) = wav.read("F:\\temp\\Python\\Speech\\temple_of_love-sisters_of_mercy.wav")
mfcc_feat = mfcc(signal=sig, samplerate=rate, winlen=0.025, winstep=0.01, numcep=13, nfilt=26, nfft=512, lowfreq=0, highfreq=None, preemph=0.97, ceplifter=22, appendEnergy=True)
fbank_feat = logfbank(sig,rate)
plt.plot( mfcc_feat[50:42000,0] )
plt.xlabel("sample")
plt.show()
plt.close()
input('...')
I am using this library midi2img to generate midi from images
From this library, this is the file am using:
from PIL import Image
import numpy as np
from music21 import instrument, note, chord, stream
lowerBoundNote = 21
def column2notes(column):
notes = []
for i in range(len(column)):
if column[i] > 255/2:
notes.append(i+lowerBoundNote)
return notes
resolution = 0.25
def updateNotes(newNotes,prevNotes):
res = {}
for note in newNotes:
if note in prevNotes:
res[note] = prevNotes[note] + resolution
else:
res[note] = resolution
return res
def image2midi(image_path):
with Image.open(image_path) as image:
im_arr = np.fromstring(image.tobytes(), dtype=np.uint8)
try:
im_arr = im_arr.reshape((image.size[1], image.size[0]))
except:
im_arr = im_arr.reshape((image.size[1], image.size[0],3))
im_arr = np.dot(im_arr, [0.33, 0.33, 0.33])
""" convert the output from the prediction to notes and create a midi file
from the notes """
offset = 0
output_notes = []
# create note and chord objects based on the values generated by the model
prev_notes = updateNotes(im_arr.T[0,:],{})
for column in im_arr.T[1:,:]:
notes = column2notes(column)
# pattern is a chord
notes_in_chord = notes
old_notes = prev_notes.keys()
for old_note in old_notes:
if not old_note in notes_in_chord:
new_note = note.Note(old_note,quarterLength=prev_notes[old_note])
new_note.storedInstrument = instrument.Piano()
if offset - prev_notes[old_note] >= 0:
new_note.offset = offset - prev_notes[old_note]
output_notes.append(new_note)
elif offset == 0:
new_note.offset = offset
output_notes.append(new_note)
else:
print(offset,prev_notes[old_note],old_note)
prev_notes = updateNotes(notes_in_chord,prev_notes)
# increase offset each iteration so that notes do not stack
offset += resolution
for old_note in prev_notes.keys():
new_note = note.Note(old_note,quarterLength=prev_notes[old_note])
new_note.storedInstrument = instrument.Piano()
new_note.offset = offset - prev_notes[old_note]
output_notes.append(new_note)
prev_notes = updateNotes(notes_in_chord,prev_notes)
midi_stream = stream.Stream(output_notes)
midi_stream.write('midi', fp=image_path.split("/")[-1].replace(".jpeg",".mid"))
import sys
image_path = sys.argv[1]
image2midi(image_path)
and this is the code I execute in terminal to geneate midi from image:
python img2midi.py samples/image.png
I need to make the above code to loop over all input images I put inside samples folder and generate midi for each, not just one file at a time.
Any help would be much appreciated.
You can do this by getting list of images from directory and iterate over them.
import sys
import os
sample_folder_path = sys.argv[1]
images = os.listdir(sample_folder_path) # getting all images stored in sample folder
images_path = [os.path.abspath(f"{sample_folder_path}/{image}") for image in images] # gets absolute path for all images
for image_path in images_path:
image2midi(image_path)
Usage:
python img2midi.py folder_path
Here folder_path is the path of folder that contains images. It can
This code is for creating tfrecords which is tensorflows standard input format for keeping audios and labels taken from video samples.This file is given as input for training in neural network.
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
import menpo
import tensorflow as tf
import numpy as np
import os
from io import BytesIO
from pathlib import Path
from moviepy.editor import VideoFileClip
from menpo.visualize import progress_bar_str, print_progress
from moviepy.audio.AudioClip import AudioArrayClip
root_dir = Path('/home/user/Desktop/PROJECT/Multimodal-Emotion-Recognition-master/RECOLA') #Where RECOLA is located
portion_to_id = dict(
train = [1], # 25
valid = [70, 71],
test = [80, 81] # 54, 53
) #samples taken
def get_samples(subject_id): #location of arousal and valence files and appropriate video sample
arousal_label_path = root_dir / 'ratings_individual/arousal/{}.csv'.format(subject_id)
valence_label_path = root_dir / 'ratings_individual/valence/{}.csv'.format(subject_id)
clip = VideoFileClip(str(root_dir /"Video_recordings_MP4/{}.mp4".format(subject_id)))
subsampled_audio = clip.audio.set_fps(16000)
audio_frames = []
for i in range(1, 7501): #extract audio sample
try:
time = 0.04 * i
audio = np.array(list(subsampled_audio.subclip(time - 0.04, time).iter_frames()))
audio = audio.mean(1)[:640]
audio_frames.append(audio.astype(np.float32))
except ValueError:
print('Not float')
quit()
try:
arousal = np.loadtxt(str(arousal_label_path), delimiter=',')[:+1][1:]
valence = np.loadtxt(str(valence_label_path), delimiter=',')[:+1][1:]
return audio_frames, np.dstack([arousal, valence])[0].astype(np.float32) #return audio frames
except ValueError:
print('problem')
def get_jpg_string(im):
# Gets the serialized jpg from a menpo `Image`.
fp = BytesIO()
menpo.io.export_image(im, fp, extension='jpg')
fp.seek(0)
return fp.read()
def _int_feauture(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def _bytes_feauture(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def serialize_sample(writer, subject_id):
subject_name = 'P{}'.format(subject_id)
print(subject_name)
print(get_samples)
#repeat for each sample
for i, (audio, label) in enumerate(zip(*get_samples(subject_name))):
example = tf.train.Example(features=tf.train.Features(feature={
'sample_id': _int_feauture(i),
'subject_id': _int_feauture(subject_id),
'label': _bytes_feauture(label.tobytes()),
'raw_audio': _bytes_feauture(audio.tobytes()),
})) #extract sample_id,subject_id,label and raw_audio
writer.write(example.SerializeToString())
del audio, label
def main(directory):
print('In Main')
for portion in portion_to_id.keys():
print(portion)
for subj_id in print_progress(portion_to_id[portion]):
temp = (directory / 'tf_records' / portion / '{}.tfrecords'.format(subj_id)
).as_posix() #display sample
print(temp)
writer = tf.python_io.TFRecordWriter(
(directory / 'tf_records' / portion / '{}.tfrecords'.format(subj_id)
).as_posix()) #write to tfrecords
serialize_sample(writer, subj_id)
if __name__ == "__main__":
print("Calling Main")
main(Path('/home/user/Desktop/PROJECT/Multimodal-Emotion-Recognition-master/records')) #save tfrecord
This code raises an error and terminates.I have given all paths to locate input video.
Error
for i, (audio, label) in enumerate(zip(*get_samples(subject_name))):
TypeError: zip() argument after * must be an iterable, not NoneType
Why do I get this error?
do you have following video/audio files in your test, train and valid folders:
train = P1.mp4
valid = P70.mp4 , P71.mp4
test = P80.mp4 , P81.mp4 ??
because the code: zip(*get_samples(subject_name)) seems to unable to fetch the data: Nonetype!