Record and Recognize music from URL [closed] - python

Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 6 years ago.
Improve this question
I am using an open source audio fingerprinting platform in python DeJavu that can recognize music from disk and from microphone.
I have tested the recognition from disk and it is amazing. 100% accuracy.
I seek assistance on how to add a class "BroadcastRecognizer"
This will recognize music from a URL online stream example online radio [http://bbcmedia.ic.llnwd.net/stream/bbcmedia_radio1_mf_p]
Because the music in the radio stream is constantly changing, would like to set it to recognize after every 10 seconds.
Here is the recognize.py
import dejavu.fingerprint as fingerprint
import dejavu.decoder as decoder
import numpy as np
import pyaudio
import time
class BaseRecognizer(object):
def __init__(self, dejavu):
self.dejavu = dejavu
self.Fs = fingerprint.DEFAULT_FS
def _recognize(self, *data):
matches = []
for d in data:
matches.extend(self.dejavu.find_matches(d, Fs=self.Fs))
return self.dejavu.align_matches(matches)
def recognize(self):
pass # base class does nothing
class FileRecognizer(BaseRecognizer):
def __init__(self, dejavu):
super(FileRecognizer, self).__init__(dejavu)
def recognize_file(self, filename):
frames, self.Fs, file_hash = decoder.read(filename, self.dejavu.limit)
t = time.time()
match = self._recognize(*frames)
t = time.time() - t
if match:
match['match_time'] = t
return match
def recognize(self, filename):
return self.recognize_file(filename)
class MicrophoneRecognizer(BaseRecognizer):
default_chunksize = 8192
default_format = pyaudio.paInt16
default_channels = 2
default_samplerate = 44100
def __init__(self, dejavu):
super(MicrophoneRecognizer, self).__init__(dejavu)
self.audio = pyaudio.PyAudio()
self.stream = None
self.data = []
self.channels = MicrophoneRecognizer.default_channels
self.chunksize = MicrophoneRecognizer.default_chunksize
self.samplerate = MicrophoneRecognizer.default_samplerate
self.recorded = False
def start_recording(self, channels=default_channels,
samplerate=default_samplerate,
chunksize=default_chunksize):
self.chunksize = chunksize
self.channels = channels
self.recorded = False
self.samplerate = samplerate
if self.stream:
self.stream.stop_stream()
self.stream.close()
self.stream = self.audio.open(
format=self.default_format,
channels=channels,
rate=samplerate,
input=True,
frames_per_buffer=chunksize,
)
self.data = [[] for i in range(channels)]
def process_recording(self):
data = self.stream.read(self.chunksize)
nums = np.fromstring(data, np.int16)
for c in range(self.channels):
self.data[c].extend(nums[c::self.channels])
def stop_recording(self):
self.stream.stop_stream()
self.stream.close()
self.stream = None
self.recorded = True
def recognize_recording(self):
if not self.recorded:
raise NoRecordingError("Recording was not complete/begun")
return self._recognize(*self.data)
def get_recorded_time(self):
return len(self.data[0]) / self.rate
def recognize(self, seconds=10):
self.start_recording()
for i in range(0, int(self.samplerate / self.chunksize
* seconds)):
self.process_recording()
self.stop_recording()
return self.recognize_recording()
class NoRecordingError(Exception):
pass
Here is the dejavu.py
import os``
import sys
import json
import warnings
import argparse
from dejavu import Dejavu
from dejavu.recognize import FileRecognizer
from dejavu.recognize import MicrophoneRecognizer
from argparse import RawTextHelpFormatter
warnings.filterwarnings("ignore")
DEFAULT_CONFIG_FILE = "dejavu.cnf.SAMPLE"
def init(configpath):
"""
Load config from a JSON file
"""
try:
with open(configpath) as f:
config = json.load(f)
except IOError as err:
print("Cannot open configuration: %s. Exiting" % (str(err)))
sys.exit(1)
# create a Dejavu instance
return Dejavu(config)
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description="Dejavu: Audio Fingerprinting library",
formatter_class=RawTextHelpFormatter)
parser.add_argument('-c', '--config', nargs='?',
help='Path to configuration file\n'
'Usages: \n'
'--config /path/to/config-file\n')
parser.add_argument('-f', '--fingerprint', nargs='*',
help='Fingerprint files in a directory\n'
'Usages: \n'
'--fingerprint /path/to/directory extension\n'
'--fingerprint /path/to/directory')
parser.add_argument('-r', '--recognize', nargs=2,
help='Recognize what is '
'playing through the microphone\n'
'Usage: \n'
'--recognize mic number_of_seconds \n'
'--recognize file path/to/file \n')
args = parser.parse_args()
if not args.fingerprint and not args.recognize:
parser.print_help()
sys.exit(0)
config_file = args.config
if config_file is None:
config_file = DEFAULT_CONFIG_FILE
# print "Using default config file: %s" % (config_file)
djv = init(config_file)
if args.fingerprint:
# Fingerprint all files in a directory
if len(args.fingerprint) == 2:
directory = args.fingerprint[0]
extension = args.fingerprint[1]
print("Fingerprinting all .%s files in the %s directory"
% (extension, directory))
djv.fingerprint_directory(directory, ["." + extension], 4)
elif len(args.fingerprint) == 1:
filepath = args.fingerprint[0]
if os.path.isdir(filepath):
print("Please specify an extension if you'd like to fingerprint a directory!")
sys.exit(1)
djv.fingerprint_file(filepath)
elif args.recognize:
# Recognize audio source
song = None
source = args.recognize[0]
opt_arg = args.recognize[1]
if source in ('mic', 'microphone'):
song = djv.recognize(MicrophoneRecognizer, seconds=opt_arg)
elif source == 'file':
song = djv.recognize(FileRecognizer, opt_arg)
print(song)
sys.exit(0)

I still think that you need a discrete "piece" of audio, so you need a beginning and an end.
For what it is worth, start with something like this, which records a 10 second burst of audio, which you can then test against your finger-printed records.
Note: that this is bashed out for python 2, so you would have to edit it for it to run on python 3
import time, sys
import urllib2
url = "http://bbcmedia.ic.llnwd.net/stream/bbcmedia_radio1_mf_p"
print ("Connecting to "+url)
response = urllib2.urlopen(url, timeout=10.0)
fname = "Sample"+str(time.clock())[2:]+".wav"
f = open(fname, 'wb')
block_size = 1024
print ("Recording roughly 10 seconds of audio Now - Please wait")
limit = 10
start = time.time()
while time.time() - start < limit:
try:
audio = response.read(block_size)
if not audio:
break
f.write(audio)
sys.stdout.write('.')
sys.stdout.flush()
except Exception as e:
print ("Error "+str(e))
f.close()
sys.stdout.flush()
print("")
print ("10 seconds from "+url+" have been recorded in "+fname)
#
# here run the finger print test to identify the audio recorded
# using the sample you have downloaded in the file "fname"
#

Related

ModuleNotFoundError: No module named 'jarvis_api'

I am making a Chatbot with Mic input for that I am Using Jarvis below code is for Mic Input But when I am executing the code I am getting the error:-
import jarvis_api.audio_pb2 as ja
ModuleNotFoundError: No module named 'jarvis_api'.
I have installed JarvisAI and jarvis api (using pip install JarvisAI) and (npm i api-jarvis).
import sys
import grpc
import queue
import argparse
import jarvis_api.audio_pb2 as ja
import jarvis_api.jarvis_asr_pb2 as jasr
import jarvis_api.jarvis_asr_pb2_grpc as jasr_srv
import pyaudio
RATE = 44100
CHUNK = int(RATE/10)
def get_args():
parser = argparse.ArgumentParser(description="Streaming transcription via jarvis AI Services")
parser.add_argument("--server",default='localhost:50051',type=str,help="URI to GRPC Services endpoint")
parser.add_argument("--input-devices",type=int,default=None,help="output device to use")
parser.add_argument("--list-devices",action='store_true',help="list output device indices")
return parser.parse_args()
class MicrophoneStream(object):
def __init__(self,rate,chunk,device=None):
self._rate = rate
self._chunk = chunk
self._device = device
self._buff = queue.Queue()
self.closed = True
def __enter__(self):
self._audio_interface = pyaudio.pyAudio()
self._audio_stream = self._audio_interface.open(
format=pyaudio.paInt16,
input_device_index=self._device,
channels=1,
rate=self._rate,
input=True,
frames_per_buffer=self._chunk,
stream_callback=self._fill_buffer,
)
self.closed=False
return self
def __exit__(self,type,value,traceback):
self.audio_stream.stop_stream()
self.audio_stream.close()
self.closed= True
self._buff.put(None)
self._audio_interface.terminate()
def generator(self):
while not self.closed:
chunk=self._buff.get()
if chunk is None:
return
data = [chunk]
while True:
try:
chunk = self._buff.get(block=False)
if chunk is None:
return
data.append(chunk)
except queue.Empty:
break
yield b''.join(data)
def listen_print_loop(responses):
num_chars_printed = 0
for response in responses:
if not response.results:
continue
result = response.results[0]
if not result.alternatives:
continue
transcript = result.alternatives[0].transcript
overwrite_chars = '' * (num_chars_printed - len(transcript))
if not result.is_final:
sys.stdout.write(transcript + overwrite_chars + '\r')
sys.stdout.flush()
num_chars_printed = len(transcript)
else:
print(transcript + overwrite_chars)
num_chars_printed = 0
def main():
args = get_args()
if args.list_devices:
p =pyaudio.pyAudio()
for i in range(p.get_device_count()):
info = p.get_device_info_by_index(i)
if info['maxInputChannels']<1:
continue
print(f"{info['index']}:{info['name']}")
sys.exit(0)
channel = grpc.insecure_channel(args.server)
client = jasr_srv.JarvisASRStub(channel)
config = jasr.RecognitionConfig(
encoding=ja.AudioEncoding.LINEAR_PCM,
sample_rate_hertz=RATE,
language_code="en-US",
max_alternatives=1,
enable_automatic_punctuation=True,
)
streaming_config = jasr.StreamingRecognitionConfig(config=config,interim_results=True)
with MicrophoneStream(RATE,CHUNK,device=args.input_device) as stream:
audio_generator = stream.generator()
requests = (jasr.StreamingRecognizeRequest(audio_content=content) for content in audio_generator)
def build_generator(cfg,gen):
yield jasr.StreamingRecognizeRequest(streaming_config=cfg)
for x in gen:
yield x
responses = client.StreamingRecognize(build_generator(streaming_config,requests))
listen_print_loop(responses)
if __name__ == '__main__':
main()

How to use Wave file as input in VOSK speech recognition?

I have a project that needs to get a recorded file and then process by the code and extract the text from file and match the extracted file with the other text and verify it.
my problem is:
I can't use recorded file in code and it does'nt read the file
init function is the fundamental of code.
verify functtion confirm the matched speech and text.
import argparse
import json
import os
import queue
import random
import sys
from difflib import SequenceMatcher
import numpy as np
import sounddevice as sd
import vosk
q = queue.Queue()
def int_or_str(text):
"""Helper function for argument parsing."""
try:
return int(text)
except ValueError:
return text
def callback(indata, frames, time, status):
"""This is called (from a separate thread) for each audio block."""
if status:
print(status, file=sys.stderr)
q.put(bytes(indata))
def init():
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument(
'-l', '--list-devices', action='store_true',
help='show list of audio devices and exit')
args, remaining = parser.parse_known_args()
if args.list_devices:
print(sd.query_devices())
parser.exit(0)
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter,
parents=[parser])
parser.add_argument(
'-f', '--filename', type=str, metavar='FILENAME',
help='audio file to store recording to')
parser.add_argument(
'-m', '--model', type=str, metavar='MODEL_PATH',
help='Path to the model')
parser.add_argument(
'-d', '--device', type=int_or_str,
help='input device (numeric ID or substring)')
parser.add_argument(
'-r', '--samplerate', type=int, help='sampling rate')
args = parser.parse_args(remaining)
try:
if args.model is None:
args.model = "model"
if not os.path.exists(args.model):
print("Please download a model for your language from https://alphacephei.com/vosk/models")
print("and unpack as 'model' in the current folder.")
parser.exit(0)
if args.samplerate is None:
device_info = sd.query_devices(args.device, 'input')
# soundfile expects an int, sounddevice provides a float:
args.samplerate = int(device_info['default_samplerate'])
model = vosk.Model(args.model)
if args.filename:
dump_fn = open(args.filename, "wb")
else:
dump_fn = None
except KeyboardInterrupt:
print('\nDone')
parser.exit(0)
except Exception as e:
parser.exit(type(e).__name__ + ': ' + str(e))
return model, args
def verify(random_sentence, model, args):
num, T_num, F_num, num_word = 0, 0, 0, 1
with sd.RawInputStream(samplerate=args.samplerate, blocksize=8000, device=args.device, dtype='int16',
channels=1, callback=callback):
rec = vosk.KaldiRecognizer(model, args.samplerate)
print("{}) ".format(num_word), random_sentence, end='\n')
print('=' * 30, end='\n')
run = True
while run:
data = q.get()
if rec.AcceptWaveform(data):
res = json.loads(rec.FinalResult())
res['text'] = res['text'].replace('ي', 'ی')
if SequenceMatcher(None, random_sentence, res['text']).ratio() > 0.65:
T_num, num, num_word += 1
else:
F_num, num, num_word += 1
run = False
print('=' * 30)
print('True Cases : {}\n False Cases : {}'.format(T_num, F_num))
if __name__ == "__main__":
model, args = init()
verify(random_sentences, model, args)
I have been working on a similar project. I modified the code from VOSK Git repo and wrote the following function that takes file name / path as the input and outputs the captured text. Sometimes, when there is a long pause (~seconds) in the audio file, the returned text would be an empty string. To remedy this problem, I had to write additional code that picks out the longest string that was captured. I could make do with this fix.
def get_text_from_voice(filename):
if not os.path.exists("model"):
print ("Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder.")
exit (1)
wf = wave.open(filename, "rb")
if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
print ("Audio file must be WAV format mono PCM.")
exit (1)
model = Model("model")
rec = KaldiRecognizer(model, wf.getframerate())
rec.SetWords(True)
text_lst =[]
p_text_lst = []
p_str = []
len_p_str = []
while True:
data = wf.readframes(4000)
if len(data) == 0:
break
if rec.AcceptWaveform(data):
text_lst.append(rec.Result())
print(rec.Result())
else:
p_text_lst.append(rec.PartialResult())
print(rec.PartialResult())
if len(text_lst) !=0:
jd = json.loads(text_lst[0])
txt_str = jd["text"]
elif len(p_text_lst) !=0:
for i in range(0,len(p_text_lst)):
temp_txt_dict = json.loads(p_text_lst[i])
p_str.append(temp_txt_dict['partial'])
len_p_str = [len(p_str[j]) for j in range(0,len(p_str))]
max_val = max(len_p_str)
indx = len_p_str.index(max_val)
txt_str = p_str[indx]
else:
txt_str =''
return txt_str
Make sure that the correct model is present in the same directory or put in the path to the model. Also, note that VOSK accepts audio files only in wav mono PCM format.

Pyaudio - I can't record more than 10 seconds.What could be the issue?

I'm developing a screen recorder with Tkinter and Pyaudio but i'm encountering some issues:I can not record more than 10 seconds.For example if i try to record 1 minute of video, i can only get 10 seconds.
I'll leave my code down bellow.I didn't saw any errors in the command line.I made comments in my code so that you can understand what is happening:
"""
recordFile.py records audio from the default microphone in a background
thread using pyaudio.
"""
import pyaudio
import wave
import threading
import time
import subprocess
from tkinter import messagebox
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
RECORD_SECONDS = 5
# WAVE_OUTPUT_FILENAME = "tmp/tmp.wav"
class recorder:
def __init__(self):
self.going = False # is the process running?
self.process = None # stores a reference to the background thread
self.filename = "" # the name of the file to record to
self.p = pyaudio.PyAudio()
self.devices = [None]
self.error = False
def record(self, filename):
# end the process before starting a new one
if self.process and self.process.is_alive():
self.going = False
self.error = False
# start a recording thread
self.process = threading.Thread(target=self._record)
self.process.start()
self.filename = filename
def _record(self):
try:
# initialize pyaudio
streams = []
frames = [] # stores audio data
for i in range(len(self.devices)):
streams.append(self.p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK,
input_device_index=self.devices[i]))
frames.append([])
print("* recording")
self.going = True # let the system know that we are running
while self.going: # stream the audio into "frames"
for i in range(len(self.devices)):
data = streams[i].read(CHUNK)
frames[i].append(data)
print("* done recording")
# stop recording
for i in range(len(self.devices)):
streams[i].stop_stream()
streams[i].close()
# write the audio data to a file (tmp/tmp.wav)
for i in range(len(self.devices)):
wf = wave.open(
self.filename[:self.filename.find(".")] + "_" + str(i) + self.filename[self.filename.find("."):],
'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(self.p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames[i]))
wf.close()
except Exception as e:
self.error = True
messagebox.showerror("AUDIO ERROR", "ERROR ENCOUNTERED RECORDING AUDIO: " + str(e))
def getDeviceCount(self):
return self.p.get_device_count()
def getDeviceName(self, deviceID):
return self.p.get_device_info_by_index(deviceID)["name"]
def isInputDevice(self, deviceID):
return int(self.p.get_device_info_by_index(deviceID)["maxInputChannels"]) > 0
def getAPIName(self, deviceID):
return self.p.get_host_api_info_by_index(self.p.get_device_info_by_index(deviceID)["hostApi"])["name"]
def setToDefault(self):
self.devices = [None]
def setToDevices(self, devices):
self.devices = devices
def stop_recording(self):
self.going = False
def destroy(self):
self.p.terminate()

Python: How to decode a mp3 chunk into PCM samples?

I'm trying to catch chunks of an mp3 webstream and decoding them into PCM samples for signal processing. I tried to catch the audio via requests and io.BytesIO to save the data as .wav file.
I have to convert the mp3 data to wav data, but I don't know how. (My goal is not to record a .wav file, i am just doing this to test the algorithm.)
I found the pymedia lib, but it is very old (last commit in 2006), using python 2.7 and for me not installable.
Maybe it is possible with ffmpeg-python, but I have just seen examples using files as input and output.
Here's my code:
import requests
import io
import soundfile as sf
import struct
import wave
import numpy as np
def main():
stream_url = r'http://dg-wdr-http-dus-dtag-cdn.cast.addradio.de/wdr/1live/diggi/mp3/128/stream.mp3'
r = requests.get(stream_url, stream=True)
sample_array = []
try:
for block in r.iter_content(1024):
data, samplerate = sf.read(io.BytesIO(block), format="RAW", channels=2, samplerate=44100, subtype='FLOAT',
dtype='float32')
sample_array = np.append(sample_array, data)
except KeyboardInterrupt:
print("...saving")
obj = wave.open('sounds/stream1.wav', 'w')
obj.setnchannels(1) # mono
obj.setsampwidth(2) # bytes
obj.setframerate(44100)
data_max = np.nanmax(abs(sample_array))
# fill WAV with samples from sample_array
for sample in sample_array:
if (np.isnan(sample) or np.isnan(32760 * sample / data_max)) is True:
continue
try:
value = int(32760 * sample / data_max) # normalization INT16
except ValueError:
value = 1
finally:
data = struct.pack('<h', value)
obj.writeframesraw(data)
obj.close()
print("end")
if __name__ == '__main__':
main()
Do you have an idea how to handle this problem?
You are missing the decoding of mp3 stream. You are just saving mp3 file as wav.
You first need to decode mp3 audio. Which will give you PCM samples + audio info.
With the help from Irmen and his "miniaudio" and "synthesizer" library, I could solve the problem.
The problem was, that most radio webstreams uses the ICECAST protocol, which includes interleaved metadata information, so you can't decode it directly.
With the example script https://github.com/irmen/synthesizer/blob/master/examples/internetradio.py as template, I could write a script, which records a webstream until KeyboardInterrupt and saves it as a .wav file.
Here's the main part I edited:
...
def _audio_playback(self, pcm_stream):
sample_array = None
with Output(mixing="sequential", frames_per_chunk=44100 // 4) as output:
print("begin recording")
while self.decode_flag:
try:
audio = pcm_stream.read(44100 * 2 * 2 // 20)
if not audio:
break
except (IOError, ValueError):
break
else:
sample = Sample.from_raw_frames(audio, 2, 44100, 2)
if sample_array is None:
sample_array = sample.get_frames_numpy_float()
else:
sample_array = np.append(sample_array, sample.get_frames_numpy_float(), axis=0)
print("...saving")
wavf.write(self.file_location, 44100, sample_array)
print("saved")
...
Based on Bendzko answer here is my code:
pip install pyaudio miniaudio
import threading
import urllib.request
import time
try:
import miniaudio
except ImportError:
miniaudio = None
import pyaudio
import ctypes
import sys
CHUNK = 4096
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paInt16,channels=2,rate=44100,output=True)
class RadioThread(threading.Thread):
def run(self):
self.url = "https://impradio.bytemasters.gr/8002/stream"
#run in threading
client = miniaudio.IceCastClient(self.url)
pcm_stream = MiniaudioDecoderPcmStream(client.audio_format,client)
self.audio_playback(pcm_stream)
def audio_playback(self,pcm_stream):
global stop_peradio_thread
while stop_peradio_thread==False:
try:
audio = pcm_stream.read(CHUNK)
stream.write(audio.tobytes())
except:
pass
class MiniaudioDecoderPcmStream(miniaudio.StreamableSource):
def __init__(self, fmt, stream):
self.pcm_stream = miniaudio.stream_any(stream, fmt, dither=miniaudio.DitherMode.TRIANGLE)
def read(self, size):
try:
return self.pcm_stream.send(size)
except StopIteration:
return b""
def main():
global stop_peradio_thread
stop_peradio_thread = False
t1 = RadioThread()
t1.start()
while True:
try:
time.sleep(1)
except KeyboardInterrupt:
stop_peradio_thread = True
t1.join()
sys.exit()
main()

pyaudio - "Listen" until voice is detected and then record to a .wav file

I'm having some problems and I cannot seem to get my head around the concept.
What I am trying to do is this:
Have the microphone "listen" for voiced (above a particular threshold) and then start recording to a .wav file until the person has stopped speaking / the signal is no longer there. For example:
begin:
listen() -> nothing is being said
listen() -> nothing is being said
listen() -> VOICED - _BEGIN RECORDING_
listen() -> VOICED - _BEGIN RECORDING_
listen() -> UNVOICED - _END RECORDING_
end
I want to do this also using "threading" so a thread would be created that "listens" to the file constantly, and, another thread will begin when there is voiced data.. But, I cannot for the life of me figure out how I should go about it.. Here is my code so far:
import wave
import sys
import threading
from array import array
from sys import byteorder
try:
import pyaudio
CHECK_PYLIB = True
except ImportError:
CHECK_PYLIB = False
class Audio:
_chunk = 0.0
_format = 0.0
_channels = 0.0
_rate = 0.0
record_for = 0.0
stream = None
p = None
sample_width = None
THRESHOLD = 500
# initial constructor to accept params
def __init__(self, chunk, format, channels, rate):
#### set data-types
self._chunk = chunk
self.format = pyaudio.paInt16,
self.channels = channels
self.rate = rate
self.p = pyaudio.PyAudio();
def open(self):
# print "opened"
self.stream = self.p.open(format=pyaudio.paInt16,
channels=2,
rate=44100,
input=True,
frames_per_buffer=1024);
return True
def record(self):
# create a new instance/thread to record the sound
threading.Thread(target=self.listen).start();
def is_silence(snd_data):
return max(snd_data) < THRESHOLD
def listen(self):
r = array('h')
while True:
snd_data = array('h', self.stream.read(self._chunk))
if byteorder == 'big':
snd_data.byteswap()
r.extend(snd_data)
return sample_width, r
I'm guessing that I could record "5" second blocks, and, then if the block is deemed as "voiced" then it the thread should be started until all the voice data has been captured. However, because at current it's at while True: i don't want to capture all of the audio up until there are voiced commands, so e.g. "no voice", "no voice", "voice", "voice", "no voice", "no voice" i just want the "voice" inside the wav file.. Anyone have any suggestions?
Thank you
EDIT:
import wave
import sys
import time
import threading
from array import array
from sys import byteorder
from Queue import Queue, Full
import pyaudio
CHUNK_SIZE = 1024
MIN_VOLUME = 500
BUF_MAX_SIZE = 1024 * 10
process_g = 0
def main():
stopped = threading.Event()
q = Queue(maxsize=int(round(BUF_MAX_SIZE / CHUNK_SIZE)))
listen_t = threading.Thread(target=listen, args=(stopped, q))
listen_t.start()
process_g = threading.Thread(target=process, args=(stopped, q))
process_g.start()
try:
while True:
listen_t.join(0.1)
process_g.join(0.1)
except KeyboardInterrupt:
stopped.set()
listen_t.join()
process_g.join()
def process(stopped, q):
while True:
if stopped.wait(timeout = 0):
break
print "I'm processing.."
time.sleep(300)
def listen(stopped, q):
stream = pyaudio.PyAudio().open(
format = pyaudio.paInt16,
channels = 2,
rate = 44100,
input = True,
frames_per_buffer = 1024
)
while True:
if stopped and stopped.wait(timeout=0):
break
try:
print process_g
for i in range(0, int(44100 / 1024 * 5)):
data_chunk = array('h', stream.read(CHUNK_SIZE))
vol = max(data_chunk)
if(vol >= MIN_VOLUME):
print "WORDS.."
else:
print "Nothing.."
except Full:
pass
if __name__ == '__main__':
main()
Now, after every 5 seconds, I need the "process" function to execute, and then process the data (time.delay(10) whilst it does this and then start the recording back up..
Having spent some time on it, I've come up with the following code that seems to be doing what you need, except writing to file:
import threading
from array import array
from Queue import Queue, Full
import pyaudio
CHUNK_SIZE = 1024
MIN_VOLUME = 500
# if the recording thread can't consume fast enough, the listener will start discarding
BUF_MAX_SIZE = CHUNK_SIZE * 10
def main():
stopped = threading.Event()
q = Queue(maxsize=int(round(BUF_MAX_SIZE / CHUNK_SIZE)))
listen_t = threading.Thread(target=listen, args=(stopped, q))
listen_t.start()
record_t = threading.Thread(target=record, args=(stopped, q))
record_t.start()
try:
while True:
listen_t.join(0.1)
record_t.join(0.1)
except KeyboardInterrupt:
stopped.set()
listen_t.join()
record_t.join()
def record(stopped, q):
while True:
if stopped.wait(timeout=0):
break
chunk = q.get()
vol = max(chunk)
if vol >= MIN_VOLUME:
# TODO: write to file
print "O",
else:
print "-",
def listen(stopped, q):
stream = pyaudio.PyAudio().open(
format=pyaudio.paInt16,
channels=2,
rate=44100,
input=True,
frames_per_buffer=1024,
)
while True:
if stopped.wait(timeout=0):
break
try:
q.put(array('h', stream.read(CHUNK_SIZE)))
except Full:
pass # discard
if __name__ == '__main__':
main()
Look here:
https://github.com/jeysonmc/python-google-speech-scripts/blob/master/stt_google.py
It even converts Wav to flac and sends it to the google Speech api , just delete the stt_google_wav function if you dont need it ;)

Categories

Resources