I am using streamlit
I want to prepare audio dataset, so for that I have a .csv file which contains n numbers of lines which should be spoken by a user and voice of that user should be saved in a folder
Structure of CSV FILE
Sr#
Word
1
Hello
2
World
this file should be loaded and one-by-one all the words should be shown and below that word user should record sound and then press save button to save that audio and then next word appear. I have written the code but it is not working properly
import re
from turtle import onclick
from zlib import DEFLATED
from requests import session
from streamlit_option_menu import option_menu
import streamlit as st
import pandas as pd
import os
import sounddevice as sd
import wavio
def record(duration=5, fs=48000):
sd.default.samplerate = fs
sd.default.channels = 1
myrecording = sd.rec(int(duration * fs))
sd.wait(duration)
return myrecording
def save_record(path_myrecording, myrecording, fs):
wavio.write(path_myrecording, myrecording, fs, sampwidth=2)
return None
def read_audio(file):
with open(file, "rb") as audio_file:
audio_bytes = audio_file.read()
return audio_bytes
st.header("Data Pipeline")
with st.sidebar:
choose = option_menu("Modules", ["1. Recording","2. Preprocess Data","3. Create
Dataset"],
icons=['headset', 'mic', 'clipboard'],
default_index=0)
my_list=['a','b']
if choose=="1. Recording":
st.header("Voice Recording")
st.text("Follow the following steps")
s_name=''
s_name= st.text_input("1. Enter Your Name")
if os.path.isdir("./Recordings/"+s_name):
pass
else:
os.mkdir("./Recordings/"+s_name)
file =st.file_uploader("2. Upload an csv file", type=
['csv'],accept_multiple_files=False,key='file')
# if file:
# df= pd.read_csv(file)
# for i in df['Urdu']:
# my_list.append(i)
show_next = st.button("next", disabled = False)
if "current_index" not in st.session_state:
st.session_state.current_index = 0
st.session_state.load_state=True
# Whenever someone clicks on the button
if show_next and st.session_state.load_state :
# Show next element in list
st.write(my_list[st.session_state.current_index])
Record=st.button('Record', key='rec')
if st.session_state.get('rec')!=True:
st.session_state['rec']=True
if st.session_state['rec']==True:
print('i am here')
record_state = st.text("Recording...")
duration = 1 # seconds
fs = 48000
myrecording = record(duration, fs)
path_myrecording = f"./temp/AMAD.wav"
save_record(path_myrecording, myrecording, fs)
st.audio(read_audio(path_myrecording))
path="./Recordings/"+s_name+"/1.wav"
os.remove(path_myrecording)
save_record(path, myrecording, fs)
st.success("Recording Saved")
# Update and store the index
st.session_state.current_index += 1
if len(my_list) == st.session_state.current_index :
st.session_state.load_state = False
st.session_state
Related
I am attempting to use the speech recognition toolkit VOSK and the speech diarization package Resemblyzer to transcibe audio and then identify the speakers in the audio.
Tools:
https://github.com/alphacep/vosk-api
https://github.com/resemble-ai/Resemblyzer
I can do both things individually but run into issues when trying to do them when running the one python script.
I used the following guide when setting up the diarization system:
https://medium.com/saarthi-ai/who-spoke-when-build-your-own-speaker-diarization-module-from-scratch-e7d725ee279
Computer specs are as follows:
Intel(R) Core(TM) i3-7100 CPU # 3.90GHz, 3912 Mhz, 2 Core(s), 4 Logical Processor(s)
32GB RAM
The following is my code, I am not to sure if using threading is appropriate or if I even implemented it correctly, how can I best optimize this code as to achieve the results I am looking for and not crash.
from vosk import Model, KaldiRecognizer
from pydub import AudioSegment
import json
import sys
import os
import subprocess
import datetime
from resemblyzer import preprocess_wav, VoiceEncoder
from pathlib import Path
from resemblyzer.hparams import sampling_rate
from spectralcluster import SpectralClusterer
import threading
import queue
import gc
def recognition(queue, audio, FRAME_RATE):
model = Model("Vosk_Models/vosk-model-small-en-us-0.15")
rec = KaldiRecognizer(model, FRAME_RATE)
rec.SetWords(True)
rec.AcceptWaveform(audio.raw_data)
result = rec.Result()
transcript = json.loads(result)#["text"]
#return transcript
queue.put(transcript)
def diarization(queue, audio):
wav = preprocess_wav(audio)
encoder = VoiceEncoder("cpu")
_, cont_embeds, wav_splits = encoder.embed_utterance(wav, return_partials=True, rate=16)
print(cont_embeds.shape)
clusterer = SpectralClusterer(
min_clusters=2,
max_clusters=100,
p_percentile=0.90,
gaussian_blur_sigma=1)
labels = clusterer.predict(cont_embeds)
def create_labelling(labels, wav_splits):
times = [((s.start + s.stop) / 2) / sampling_rate for s in wav_splits]
labelling = []
start_time = 0
for i, time in enumerate(times):
if i > 0 and labels[i] != labels[i - 1]:
temp = [str(labels[i - 1]), start_time, time]
labelling.append(tuple(temp))
start_time = time
if i == len(times) - 1:
temp = [str(labels[i]), start_time, time]
labelling.append(tuple(temp))
return labelling
#return
labelling = create_labelling(labels, wav_splits)
queue.put(labelling)
def identify_speaker(queue1, queue2):
transcript = queue1.get()
labelling = queue2.get()
for speaker in labelling:
speakerID = speaker[0]
speakerStart = speaker[1]
speakerEnd = speaker[2]
result = transcript['result']
words = [r['word'] for r in result if speakerStart < r['start'] < speakerEnd]
#return
print("Speaker",speakerID,":",' '.join(words), "\n")
def main():
queue1 = queue.Queue()
queue2 = queue.Queue()
FRAME_RATE = 16000
CHANNELS = 1
podcast = AudioSegment.from_mp3("Podcast_Audio/Film-Release-Clip.mp3")
podcast = podcast.set_channels(CHANNELS)
podcast = podcast.set_frame_rate(FRAME_RATE)
first_thread = threading.Thread(target=recognition, args=(queue1, podcast, FRAME_RATE))
second_thread = threading.Thread(target=diarization, args=(queue2, podcast))
third_thread = threading.Thread(target=identify_speaker, args=(queue1, queue2))
first_thread.start()
first_thread.join()
gc.collect()
second_thread.start()
second_thread.join()
gc.collect()
third_thread.start()
third_thread.join()
gc.collect()
# transcript = recognition(podcast,FRAME_RATE)
#
# labelling = diarization(podcast)
#
# print(identify_speaker(transcript, labelling))
if __name__ == '__main__':
main()
When I say crash I mean everything freezes, I have to hold down the power button on the desktop and turn it back on again. No blue/blank screen, just frozen in my IDE looking at my code. Any help in resolving this issue would be greatly appreciated.
Pydubs AudioSegment was not returning a suitable type for the Resembylzer function preprocess_wav.
podcast = AudioSegment.from_mp3("Podcast_Audio/Film-Release-Clip.mp3")
preprocess_wav instead requires a Numpy Array / Path.
audio_file_path = 'Podcast_Audio/WAV-Film-Release-Clip.wav'
wav_fpath = Path(audio_file_path)
wav = preprocess_wav(wav_fpath)
Additionally preprocess_wav functionality can be achieved using Librosa if desired.
import librosa
def preprocess_wav(waveform, sr):
waveform = librosa.resample(waveform, orig_sr=sr, target_sr=16000)
waveform = waveform.astype(np.float32) / np.max(np.abs(waveform))
return waveform
waveform, sr = librosa.load('Podcast_Audio/WAV-Film-Release-Clip.wav')
wav = preprocess_wav(waveform, sr)
My aim is to send an image file in whatsapp to various persons who are not on my contact list. Try with pywhatkit. Store the number in an excel sheet and read it by pandas. Loop the number to send the image file. The first number gets the image file successfully, and the remaining numbers WhatsApp will open but the message will not send. Also tell, me how to avoid WhatsApp open in a new tab(chrome)every time.
Here's my code
import pandas as pd
file_name = r'd:\\Recipients data.xlsx'
df = pd.read_excel(file_name)
contact = df['Contact'].tolist()
import pywhatkit
import time
from datetime import datetime
now = datetime.now()
current_time = now.strftime("%H:%M : %S")
h,m,s = current_time.split(':')
print(now)
h1 = int(h)
m1 = int(m)
s1 = int(s)
file_path = r'C:/Users/Asus/Downloads/subadev.jpeg'
for send_number in contact:
new_format = (f'"{str("+91")+str(send_number)}"')
print(new_format)
pywhatkit.sendwhats_image(new_format,file_path)
time.sleep(15)
expecting : send images to every number without open whats app every time
Detect beep sound from Audio file using python
this code I found from somewhere but it is not giving the actual beep result
means in the audio where beep is not there then also this code is showing beep in it
from moviepy.editor import *
import matplotlib.pyplot as plt
import cv2
#from time import sleep
import sounddevice as sd
from scipy.io import wavfile
import numpy as np
filename = 'C:/Users/YahyaSirguroh/Downloads/output.mp4'
video = VideoFileClip(filename)
audio = video.audio
duration = video.duration
audio.write_audiofile("audio.wav")
#sleep(0.3)
samplerate, data = wavfile.read('audio.wav')
step = 30
audio_signal = []
cnt = 0
flag = 0
text = ''
for t in range(int(duration*step)):
t = t/step
if cnt:
flag+=1
if t > audio.duration or t > video.duration: break
audio_frame = audio.get_frame(t) #numpy array representing mono/stereo values
audio_signal.extend(list(audio_frame))
if (audio_frame>0.6).sum()==2:
cnt+=1
if cnt>=2:
print('beep detected at %5.2f' %(t))
text = 'beep detected at %d' %(np.round(t))
if flag>=4:
cnt=0
flag=0
I have a wav conversation of 2 people(customer and tech support)
I have 3 separate functions that extract 1 voice, cut 10 seconds and transform it to embedding.
def get_customer_voice(file):
print('getting customer voice only')
wav = wf.read(file)
ch = wav[1].shape[1]#customer voice always in 1st track
sr = wav[0]
c1 = wav[1][:,1]
#print('c0 %i'%c0.size)
if ch==1:
exit()
vad = VoiceActivityDetection()
vad.process(c1)
voice_samples = vad.get_voice_samples()
#this is trouble - how to pass it without saving anywhere as wav?
wf.write('%s_customer.wav'%file,sr,voice_samples)
function below cuts 10 seconds of wav file from function above.
import sys
from pydub import AudioSegment
def get_customer_voice_10_seconds(file):
voice = AudioSegment.from_wav(file)
new_voice = voice[0:10000]
file = str(file) + '_10seconds.wav'
new_voice.export(file, format='wav')
if __name__ == '__main__':
if len(sys.argv) < 2:
print('give wav file to process!')
else:
print(sys.argv)
get_customer_voice_10_seconds(sys.argv[1])
how to pass it as wav or other format without saving it to some directory? It's to be used in rest api, i don't know where it will save that wav, so preferably it should be passed somehow.
I figured it out - the function below just works without saving, buffer etc.
It receives a wav file and edits it and just sends straight to the get math embedding function:
def get_customer_voice_and_cutting_10_seconds_embedding(file):
print('getting customer voice only')
wav = read(file)
ch = wav[1].shape[1]
sr = wav[0]
c1 = wav[1][:,1]
vad = VoiceActivityDetection()
vad.process(c1)
voice_samples = vad.get_voice_samples()
audio_segment = AudioSegment(voice_samples.tobytes(), frame_rate=sr,sample_width=voice_samples.dtype.itemsize, channels=1)
audio_segment = audio_segment[0:10000]
file = str(file) + '_10seconds.wav'
return get_embedding(file)
the key is tobytes() in Audio segment, it just assembles all them together in 1 track again
I made a script which is suppose to use Tkinter to allow to choose and load files and store their content in different objects and then process each of these documents.
I would like to make the script able to process only a certain amount of documents determined by a question (the value is stored under "File_number")
For exemple: if at the question "how many files do you want to compare?"
the user enter 3
I would like the tkinter openfile window to ask only for 3 files then keep going
I am using the If Else statement like below
but it doesn't seem to work well and the code is really not pythonic.
Is there a better/shorter way to perform the same?
Thanks
My script look like this
import pandas as pd
from pandas import *
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns
import pylab
import pandas.io.data
import os
import Tkinter
from Tkinter import *
import tkFileDialog
import tkSimpleDialog
from tkFileDialog import askopenfilename
import sys
# Set up GUI
root = Tkinter.Tk(); root.withdraw()
# Prompt for user info
File_number = tkSimpleDialog.askinteger("File number", "How many files do you want to compare?")
# Prompt for file explorer
# Also extract the file_name
process_a = 0
if process_a = File_number:
break
else:
process_a = process_a + 1
fileloc1 = tkFileDialog.askopenfilename(parent=root, title='Choose file 1')
fileloc1_name_clean = os.path.splitext(fileloc1)[0]
fileloc1_name = os.path.basename(fileloc1_name_clean)
if process_a = File_number:
break
else:
process_a = process_a + 1
fileloc2 = tkFileDialog.askopenfilename(parent=root, title='Choose file 2')
fileloc2_name_clean = os.path.splitext(fileloc2)[0]
fileloc2_name = os.path.basename(fileloc2_name_clean)
if process_a = File_number:
break
else:
process_a = process_a + 1
fileloc3 = tkFileDialog.askopenfilename(parent=root, title='Choose file 3')
fileloc3_name_clean = os.path.splitext(fileloc3)[0]
fileloc3_name = os.path.basename(fileloc3_name_clean)
EDIT 1
The next part of my script is:
dfa_1 = pd.read_csv(fileloc1, delimiter='\t')
dfa_nodupli = dfa_1.drop_duplicates(cols='N', take_last=False)
dfa_nodu_2pep = dfa_nodupli[(dfa_nodupli['Peptides(95%)'] > 1)]
dfa_nodu_2pep = dfa_nodu_2pep[~dfa_nodu_2pep['Name'].str.contains('Keratin')]
dfa_nodu_2pep.to_csv(fileloc1_name + ".csv")
dfb_1 = pd.read_csv(fileloc2, delimiter='\t')
dfb_nodupli = dfb_1.drop_duplicates(cols='N', take_last=False)
dfb_nodu_2pep = dfb_nodupli[(dfb_nodupli['Peptides(95%)'] > 1)]
dfb_nodu_2pep = dfb_nodu_2pep[~dfb_nodu_2pep['Name'].str.contains('Keratin')]
dfb_nodu_2pep.to_csv(fileloc2_name + ".csv")
I modified your code, so that it works, in a way you want it ( I hope).
import Tkinter
import tkFileDialog
import tkSimpleDialog
from tkFileDialog import askopenfilename
import os
# Set up GUI
def main():
root = Tkinter.Tk();
root.withdraw()
# Prompt for user info
File_number = tkSimpleDialog.askinteger("File number",
"How many files do you want to compare?")
if not File_number:
return
user_fiels = []
max_file_no = int(File_number)
current_file = 1;
while(current_file <= max_file_no):
fileloc = tkFileDialog.askopenfilename(parent=root, title='Choose file {}'.format(current_file))
if not fileloc:
continue
fileloc_name_clean = os.path.splitext(fileloc)[0]
fileloc_name = os.path.basename(fileloc_name_clean)
user_fiels.append([fileloc, fileloc_name_clean, fileloc_name])
current_file += 1
#print(fileloc_name_clean, fileloc_name)
print(user_fiels)
main()
I use while loop to get file paths as many times as you want.