Python: How to decode a mp3 chunk into PCM samples? - python

I'm trying to catch chunks of an mp3 webstream and decoding them into PCM samples for signal processing. I tried to catch the audio via requests and io.BytesIO to save the data as .wav file.
I have to convert the mp3 data to wav data, but I don't know how. (My goal is not to record a .wav file, i am just doing this to test the algorithm.)
I found the pymedia lib, but it is very old (last commit in 2006), using python 2.7 and for me not installable.
Maybe it is possible with ffmpeg-python, but I have just seen examples using files as input and output.
Here's my code:
import requests
import io
import soundfile as sf
import struct
import wave
import numpy as np
def main():
stream_url = r'http://dg-wdr-http-dus-dtag-cdn.cast.addradio.de/wdr/1live/diggi/mp3/128/stream.mp3'
r = requests.get(stream_url, stream=True)
sample_array = []
try:
for block in r.iter_content(1024):
data, samplerate = sf.read(io.BytesIO(block), format="RAW", channels=2, samplerate=44100, subtype='FLOAT',
dtype='float32')
sample_array = np.append(sample_array, data)
except KeyboardInterrupt:
print("...saving")
obj = wave.open('sounds/stream1.wav', 'w')
obj.setnchannels(1) # mono
obj.setsampwidth(2) # bytes
obj.setframerate(44100)
data_max = np.nanmax(abs(sample_array))
# fill WAV with samples from sample_array
for sample in sample_array:
if (np.isnan(sample) or np.isnan(32760 * sample / data_max)) is True:
continue
try:
value = int(32760 * sample / data_max) # normalization INT16
except ValueError:
value = 1
finally:
data = struct.pack('<h', value)
obj.writeframesraw(data)
obj.close()
print("end")
if __name__ == '__main__':
main()
Do you have an idea how to handle this problem?

You are missing the decoding of mp3 stream. You are just saving mp3 file as wav.
You first need to decode mp3 audio. Which will give you PCM samples + audio info.

With the help from Irmen and his "miniaudio" and "synthesizer" library, I could solve the problem.
The problem was, that most radio webstreams uses the ICECAST protocol, which includes interleaved metadata information, so you can't decode it directly.
With the example script https://github.com/irmen/synthesizer/blob/master/examples/internetradio.py as template, I could write a script, which records a webstream until KeyboardInterrupt and saves it as a .wav file.
Here's the main part I edited:
...
def _audio_playback(self, pcm_stream):
sample_array = None
with Output(mixing="sequential", frames_per_chunk=44100 // 4) as output:
print("begin recording")
while self.decode_flag:
try:
audio = pcm_stream.read(44100 * 2 * 2 // 20)
if not audio:
break
except (IOError, ValueError):
break
else:
sample = Sample.from_raw_frames(audio, 2, 44100, 2)
if sample_array is None:
sample_array = sample.get_frames_numpy_float()
else:
sample_array = np.append(sample_array, sample.get_frames_numpy_float(), axis=0)
print("...saving")
wavf.write(self.file_location, 44100, sample_array)
print("saved")
...

Based on Bendzko answer here is my code:
pip install pyaudio miniaudio
import threading
import urllib.request
import time
try:
import miniaudio
except ImportError:
miniaudio = None
import pyaudio
import ctypes
import sys
CHUNK = 4096
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paInt16,channels=2,rate=44100,output=True)
class RadioThread(threading.Thread):
def run(self):
self.url = "https://impradio.bytemasters.gr/8002/stream"
#run in threading
client = miniaudio.IceCastClient(self.url)
pcm_stream = MiniaudioDecoderPcmStream(client.audio_format,client)
self.audio_playback(pcm_stream)
def audio_playback(self,pcm_stream):
global stop_peradio_thread
while stop_peradio_thread==False:
try:
audio = pcm_stream.read(CHUNK)
stream.write(audio.tobytes())
except:
pass
class MiniaudioDecoderPcmStream(miniaudio.StreamableSource):
def __init__(self, fmt, stream):
self.pcm_stream = miniaudio.stream_any(stream, fmt, dither=miniaudio.DitherMode.TRIANGLE)
def read(self, size):
try:
return self.pcm_stream.send(size)
except StopIteration:
return b""
def main():
global stop_peradio_thread
stop_peradio_thread = False
t1 = RadioThread()
t1.start()
while True:
try:
time.sleep(1)
except KeyboardInterrupt:
stop_peradio_thread = True
t1.join()
sys.exit()
main()

Related

i have a code in python which is showing error of input overflow and unsanitized input from http

import sys
import sounddevice as sd
import numpy as np
import speech_recognition as sr
# Define the sample rate and chunk size
sample_rate = 44100
chunk_size = 2048
# Initialize recognizer class (for recognizing the speech)
r = sr.Recognizer()
# Define the callback function for the sounddevice stream
def audio_callback(indata, frames, time, status):
if status:
print(status, file=sys.stderr)
audio_data = np.frombuffer(indata, dtype=np.int16)
try:
text = r.recognize_google(sr.AudioData(audio_data, sample_rate, sample_width=2))
print("Recognized text: ", text)
except Exception as e:
print("Error: ", e)
# Start the stream
stream = sd.InputStream(callback=audio_callback, channels=1, blocksize=chunk_size,
samplerate=sample_rate)
with stream:
while True:
pass
audio_data = stream.read(chunk_size,Exception = False)
Here the error is input overflow. What should I do?
I tried to minimize flow of data but was unsuccessful. I want the code to take input in proper quantity and deliver the output. I am a beginner at Python so any help will be great.

Opening wav from URL in Python

With the Python script shown below I try to play a wav file from the internet but I'm getting the error message OSError: [Errno 22] Invalid argument: 'https://file-examples-com.github.io/uploads/2017/11/file_example_WAV_1MG.wav'.
How can I play a wav file from the internet?
import pyaudio
import wave
chunk = 1024
f = wave.open("https://file-examples-com.github.io/uploads/2017/11/file_example_WAV_1MG.wav","rb")
p = pyaudio.PyAudio()
stream = p.open(format = p.get_format_from_width(f.getsampwidth()),
channels = f.getnchannels(),
rate = f.getframerate(),
output = True)
data = f.readframes(chunk)
while data:
stream.write(data)
data = f.readframes(chunk)
stream.stop_stream()
stream.close()
p.terminate()
You can also get the content of website, store it in a variable, and play it. There is no need to store it on the disk for a short file like this. Here is an example of how to do this:
import logging
import requests
import simpleaudio
sample_rate = 8000
num_channels = 2
bytes_per_sample = 2
total = sample_rate * num_channels * bytes_per_sample
logging.basicConfig(level=logging.INFO)
audio_url = "https://file-examples-com.github.io/uploads/2017/11/file_example_WAV_1MG.wav"
logging.info(f"Downloading audio file from: {audio_url}")
content = requests.get(audio_url).content
# Just to ensure that the file does not have extra bytes
blocks = len(content) // total
content = content[:total * blocks]
wave = simpleaudio.WaveObject(audio_data=content,
sample_rate=sample_rate,
num_channels=num_channels,
bytes_per_sample=bytes_per_sample)
control = wave.play()
control.wait_done()
I'm demonstrating what #larsks suggests.
import requests
with open(audio_file, 'wb') as a:
resp = requests.get("https://file-examples-com.github.io/uploads/2017/11/file_example_WAV_1MG.wav")
if resp.status_code == 200:
a.write(resp.content)
print('downloaded')
else:
print(resp.reason)
exit(1)
f = wave.open(audio_file, "rb")
# the remaining lines are the same
And I also suggest another great python library python-mpv which is based on mpv, this library can handle much more codecs and also online streaming play.

Use pyaudio to play files immediately after the other

I'm trying to modify my first Python program. I'm trying to use this repository to do some rudimentary text-to-speech. It does fine, but I want to improve it.
From the looks of it, there is a 0.145 second delay between samples played. Not all the samples of my voice will be 0.145 seconds, however, and I want to have each sample play one after the other with no delays or skips.
import re
import wave
import pyaudio
import _thread
import time
class TextToSpeech:
CHUNK = 1024
def __init__(self, words_pron_dict:str = 'cmudict-0.7b.txt'):
self._l = {}
self._load_words(words_pron_dict)
def _load_words(self, words_pron_dict:str):
with open(words_pron_dict, 'r') as file:
for line in file:
if not line.startswith(';;;'):
key, val = line.split(' ',2)
self._l[key] = re.findall(r"[A-Z]+",val)
def get_pronunciation(self, str_input):
list_pron = []
for word in re.findall(r"[\w']+",str_input.upper()):
if word in self._l:
list_pron += self._l[word]
print(list_pron)
delay = 0.0
for pron in list_pron:
_thread.start_new_thread( TextToSpeech._play_audio, (pron,delay,))
delay += 0.145
def _play_audio(sound, delay):
try:
time.sleep(delay)
wf = wave.open("sounds/"+sound+".wav", 'rb')
p = pyaudio.PyAudio()
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True)
data = wf.readframes(TextToSpeech.CHUNK)
while data:
stream.write(data)
data = wf.readframes(TextToSpeech.CHUNK)
stream.stop_stream()
stream.close()
p.terminate()
except:
pass
if __name__ == '__main__':
tts = TextToSpeech()
while True:
tts.get_pronunciation(input('Enter a word or phrase: '))
I've tried getting rid of the threading and delay, but there is some delay still between samples. I'm thinking that I should, instead of incrementing delay by 0.145, increment it by the length of the sample in seconds, but I've looked at the pyaudio documentation, and I have no idea how to do that.
Can someone help?
Here is a modified code that plays wav files continuously.
import re
import wave
import pyaudio
class TextToSpeech:
CHUNK = 1024
def __init__(self, words_pron_dict='cmudict-0.7b.txt'):
self._l = {}
self._load_words(words_pron_dict)
def _load_words(self, words_pron_dict: str):
with open(words_pron_dict, 'r') as file:
for line in file:
if not line.startswith(';;;'):
key, val = line.split(' ', 2)
self._l[key] = re.findall(r"[A-Z]+", val)
def get_pronunciation(self, str_input):
list_pron = []
for word in re.findall(r"[\w']+", str_input.upper()):
if word in self._l:
list_pron += self._l[word]
print(list_pron)
# pyaudio set up.
# This open method assume all wave files have the same format.
p = pyaudio.PyAudio()
stream = p.open(format=p.get_format_from_width(2),
channels=2,
rate=44100,
output=True,
frames_per_buffer=self.CHUNK)
# play each wav file contineuously
for pron in list_pron:
with wave.open("sounds/"+pron+".wav", 'rb') as wf:
data = wf.readframes(TextToSpeech.CHUNK)
while data:
stream.write(data)
data = wf.readframes(TextToSpeech.CHUNK)
stream.stop_stream()
stream.close()
p.terminate()
if __name__ == '__main__':
tts = TextToSpeech()
while True:
tts.get_pronunciation(input('Enter a word or phrase: '))

How to reverse audio with pyaudio

I've been tinkering around with pyaudio for a while now, trying to reverse a simple wave file with no success.
In (my) theory I would only have to iterate from end to beginning through the file with every callback of pyaudio (1024 frames) fetch the audio data from the according index in the file, reverse the resulting string and play it.
Here is my code (only pyaudio callback and file handling, the rest is untouched from the example code):
import pyaudio
import wave
import time
import sys
if len(sys.argv) < 2:
print("Plays a wave file.\n\nUsage: %s filename.wav" % sys.argv[0])
sys.exit(-1)
index = 40*1024
wf = wave.open(sys.argv[1], 'rb')
wf.setpos(index)
p = pyaudio.PyAudio()
def callback(in_data, frame_count, time_info, status):
global index
data = wf.readframes(frame_count)
data = data[::-1]
index-=1024
wf.setpos(index)
return (data, pyaudio.paContinue)
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True,
stream_callback=callback)
stream.start_stream()
while stream.is_active():
time.sleep(0.1)
stream.stop_stream()
stream.close()
wf.close()
p.terminate()
I know this will crash when it reaches the file beginning, but it should play 40 × 1024 frames of reversed audio...
If the file you want reversed is small enough to fit in memory, your best bet would be loading it entirely and reversing the data, then streaming it:
import pyaudio
import wave
wavefile_name = 'wavefile.wav'
wf = wave.open(wavefile_name, 'rb')
p = pyaudio.PyAudio()
stream = p.open(format =
p.get_format_from_width(wf.getsampwidth()),
channels = wf.getnchannels(),
rate = wf.getframerate(),
output = True)
full_data = []
data = wf.readframes(1024)
while data:
full_data.append(data)
data = wf.readframes(1024)
data = ''.join(full_data)[::-1]
for i in range(0, len(data), 1024):
stream.write(data[i:i+1024])
However, if the file is too big to fit in memory, you'll need some way of reading the file backwards and feed into the sound system. That's an entirely different problem, because it involves some low level I/O programming to handle the backward reading.
Edit: after seeing your full code, I don't see any errors. The code runs properly in my computer, only to fail at the ending of the playback. However, you can do two things. First, you can go to the end of the wavefile to play the entire sound backwards, using this:
wf = wave.open(sys.argv[1], 'rb')
index = wf.getnframes() - 1024
wf.setpos(index)
Second, you have to modify the callback so it doesn't fail when the seek head goes beyond the beginning of the file:
def callback(in_data, frame_count, time_info, status):
global index
data = wf.readframes(frame_count)
data = data[::-1]
index-=1024
if index < 0:
return (data, pyaudio.paAbort)
else:
wf.setpos(max(index,0))
return (data, pyaudio.paContinue)
Other than that, it works pretty ok.

pyaudio - "Listen" until voice is detected and then record to a .wav file

I'm having some problems and I cannot seem to get my head around the concept.
What I am trying to do is this:
Have the microphone "listen" for voiced (above a particular threshold) and then start recording to a .wav file until the person has stopped speaking / the signal is no longer there. For example:
begin:
listen() -> nothing is being said
listen() -> nothing is being said
listen() -> VOICED - _BEGIN RECORDING_
listen() -> VOICED - _BEGIN RECORDING_
listen() -> UNVOICED - _END RECORDING_
end
I want to do this also using "threading" so a thread would be created that "listens" to the file constantly, and, another thread will begin when there is voiced data.. But, I cannot for the life of me figure out how I should go about it.. Here is my code so far:
import wave
import sys
import threading
from array import array
from sys import byteorder
try:
import pyaudio
CHECK_PYLIB = True
except ImportError:
CHECK_PYLIB = False
class Audio:
_chunk = 0.0
_format = 0.0
_channels = 0.0
_rate = 0.0
record_for = 0.0
stream = None
p = None
sample_width = None
THRESHOLD = 500
# initial constructor to accept params
def __init__(self, chunk, format, channels, rate):
#### set data-types
self._chunk = chunk
self.format = pyaudio.paInt16,
self.channels = channels
self.rate = rate
self.p = pyaudio.PyAudio();
def open(self):
# print "opened"
self.stream = self.p.open(format=pyaudio.paInt16,
channels=2,
rate=44100,
input=True,
frames_per_buffer=1024);
return True
def record(self):
# create a new instance/thread to record the sound
threading.Thread(target=self.listen).start();
def is_silence(snd_data):
return max(snd_data) < THRESHOLD
def listen(self):
r = array('h')
while True:
snd_data = array('h', self.stream.read(self._chunk))
if byteorder == 'big':
snd_data.byteswap()
r.extend(snd_data)
return sample_width, r
I'm guessing that I could record "5" second blocks, and, then if the block is deemed as "voiced" then it the thread should be started until all the voice data has been captured. However, because at current it's at while True: i don't want to capture all of the audio up until there are voiced commands, so e.g. "no voice", "no voice", "voice", "voice", "no voice", "no voice" i just want the "voice" inside the wav file.. Anyone have any suggestions?
Thank you
EDIT:
import wave
import sys
import time
import threading
from array import array
from sys import byteorder
from Queue import Queue, Full
import pyaudio
CHUNK_SIZE = 1024
MIN_VOLUME = 500
BUF_MAX_SIZE = 1024 * 10
process_g = 0
def main():
stopped = threading.Event()
q = Queue(maxsize=int(round(BUF_MAX_SIZE / CHUNK_SIZE)))
listen_t = threading.Thread(target=listen, args=(stopped, q))
listen_t.start()
process_g = threading.Thread(target=process, args=(stopped, q))
process_g.start()
try:
while True:
listen_t.join(0.1)
process_g.join(0.1)
except KeyboardInterrupt:
stopped.set()
listen_t.join()
process_g.join()
def process(stopped, q):
while True:
if stopped.wait(timeout = 0):
break
print "I'm processing.."
time.sleep(300)
def listen(stopped, q):
stream = pyaudio.PyAudio().open(
format = pyaudio.paInt16,
channels = 2,
rate = 44100,
input = True,
frames_per_buffer = 1024
)
while True:
if stopped and stopped.wait(timeout=0):
break
try:
print process_g
for i in range(0, int(44100 / 1024 * 5)):
data_chunk = array('h', stream.read(CHUNK_SIZE))
vol = max(data_chunk)
if(vol >= MIN_VOLUME):
print "WORDS.."
else:
print "Nothing.."
except Full:
pass
if __name__ == '__main__':
main()
Now, after every 5 seconds, I need the "process" function to execute, and then process the data (time.delay(10) whilst it does this and then start the recording back up..
Having spent some time on it, I've come up with the following code that seems to be doing what you need, except writing to file:
import threading
from array import array
from Queue import Queue, Full
import pyaudio
CHUNK_SIZE = 1024
MIN_VOLUME = 500
# if the recording thread can't consume fast enough, the listener will start discarding
BUF_MAX_SIZE = CHUNK_SIZE * 10
def main():
stopped = threading.Event()
q = Queue(maxsize=int(round(BUF_MAX_SIZE / CHUNK_SIZE)))
listen_t = threading.Thread(target=listen, args=(stopped, q))
listen_t.start()
record_t = threading.Thread(target=record, args=(stopped, q))
record_t.start()
try:
while True:
listen_t.join(0.1)
record_t.join(0.1)
except KeyboardInterrupt:
stopped.set()
listen_t.join()
record_t.join()
def record(stopped, q):
while True:
if stopped.wait(timeout=0):
break
chunk = q.get()
vol = max(chunk)
if vol >= MIN_VOLUME:
# TODO: write to file
print "O",
else:
print "-",
def listen(stopped, q):
stream = pyaudio.PyAudio().open(
format=pyaudio.paInt16,
channels=2,
rate=44100,
input=True,
frames_per_buffer=1024,
)
while True:
if stopped.wait(timeout=0):
break
try:
q.put(array('h', stream.read(CHUNK_SIZE)))
except Full:
pass # discard
if __name__ == '__main__':
main()
Look here:
https://github.com/jeysonmc/python-google-speech-scripts/blob/master/stt_google.py
It even converts Wav to flac and sends it to the google Speech api , just delete the stt_google_wav function if you dont need it ;)

Categories

Resources