I have Flask API that receives a wav file and then send it to function (in another file):
audio_file = request.files['audio_file']
audio_file_read = audio_file.read()
bytes_io = io.BytesIO(audio_file_read)
load_audio(bytes_io)
def load_audio(bytes_io: bytes, sr: int = 16000):
try:
print("Trying FFmpeg call...")
out, _ = (
ffmpeg.input('pipe:', format='wav', stdin=bytes_io)
.output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=sr)
.run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
)
print("FFmpeg call successful")
except ffmpeg.Error as e:
raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
return 'finished'
I get this logs:
<_io.BytesIO object at 0x0000018B00834040>
Trying FFmpeg call..
Why don't I receive other logs? How can I solve it?
Related
As title, I would like to know how socket programming transfer Excel files across the stream. I can send and receive any txt file using open method and read the file, convert it into binary string and send across the socket stream, but I am in nowhere how to send a complicated file type like xlsx/xls over the stream.
Here is my sample code on how to send a file via socket. But this only applicable for files that can be open using the open method. What if I want to send other files like PDF, xlsx, etc?
Server:
class Server:
def __init__(self, host_ip_address, host_port):
self._host_ip_address = host_ip_address
self._host_port = host_port
self._socket = socket.socket()
self._connection = None
self._address = None
self._chunk_size = 0
self._process = None
self._queue = None
def start_connection(self):
try:
self._socket.bind((self._host_ip_address, self._host_port))
self._socket.listen(0)
except socket.error:
raise Exception('Unable to initiate the server connection, please check the socket')
def receive_file(self, queue):
try:
connection, address = self._socket.accept()
filename = 'stream_file.txt'
with open(filename, 'wb') as file:
while True:
file_data = connection.recv(self._chunk_size)
if not file_data:
break
file.write(f'{file_data}')
queue.put(filename)
except Exception as err:
raise Exception('There is an issue while receiving the file, please try again')
def start_server(self, chunk_size=4096):
self._queue = multiprocessing.Queue()
self._chunk_size = chunk_size
self.start_connection()
self._process = multiprocessing.Process(target=self.receive_file, args=(self._queue,))
self._process.start()
print(self._queue.get())
if __name__ == '__main__':
server = Server('127.0.0.1', 5001)
server.start_server()
Client:
class Client:
def __init__(self, host_ip_address, host_port):
self._host_ip_address = host_ip_address
self._host_port = host_port
self._socket = socket.socket()
self._chunk_size = 0
def start_connection(self):
try:
self._socket.connect((self._host_ip_address, self._host_port))
except socket.error:
raise Exception('Unable to connect to the host')
def send_file(self, filename):
try:
file = open(filename, 'rb')
while True:
read_bytes = file.read(self._chunk_size)
if not read_bytes:
break
self._socket.sendall(read_bytes)
except Exception as err:
raise Exception('No file has been selected. Please make sure the file exist')
def start_client(self, chunk_size=4096, file=None):
if data is None:
raise Exception('No data selected to send across the socket')
self._chunk_size = chunk_size
self.start_connection()
self.send_file(data_type, file)
if __name__ == '__main__':
client = Client('127.0.0.1', 5001)
filename = r'D:\Work\sample.xlsx'
client.start_client(file=filename)
I am new to Google API and web services. I only tried GoogleTransateAPI once but that one works fine. Now, I want to use Google Media Translation API to translate voice input. I followed the tutorial from https://cloud.google.com/translate/media/docs/streaming.
However, I cannot make it work. There is no error at the run time so I don't know where to look at. Could you please help me identify the problem?
# [START media_translation_translate_from_mic]
from __future__ import division
import itertools
from google.cloud import mediatranslation as media
import pyaudio
from six.moves import queue
import os
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="/Users/Me/GoogleMT/TranslationAPI/MediaKey.json"
# Audio recording parametersss
RATE = 16000
CHUNK = int(RATE / 10) # 100ms
SpeechEventType = media.StreamingTranslateSpeechResponse.SpeechEventType
class MicrophoneStream:
"""Opens a recording stream as a generator yielding the audio chunks."""
def __init__(self, rate, chunk):
self._rate = rate
self._chunk = chunk
# Create a thread-safe buffer of audio data
self._buff = queue.Queue()
self.closed = True
def __enter__(self):
self._audio_interface = pyaudio.PyAudio()
self._audio_stream = self._audio_interface.open(
format=pyaudio.paInt16,
channels=1, rate=self._rate,
input=True, frames_per_buffer=self._chunk,
# Run the audio stream asynchronously to fill the buffer object.
# This is necessary so that the input device's buffer doesn't
# overflow while the calling thread makes network requests, etc.
stream_callback=self._fill_buffer,
)
self.closed = False
return self
def __exit__(self, type=None, value=None, traceback=None):
self._audio_stream.stop_stream()
self._audio_stream.close()
self.closed = True
# Signal the generator to terminate so that the client's
# streaming_recognize method will not block the process termination.
self._buff.put(None)
self._audio_interface.terminate()
def _fill_buffer(self, in_data, frame_count, time_info, status_flags):
"""Continuously collect data from the audio stream, into the buffer."""
self._buff.put(in_data)
return None, pyaudio.paContinue
def exit(self):
self.__exit__()
def generator(self):
while not self.closed:
# Use a blocking get() to ensure there's at least one chunk of
# data, and stop iteration if the chunk is None, indicating the
# end of the audio stream.
chunk = self._buff.get()
if chunk is None:
return
data = [chunk]
# Now consume whatever other data's still buffered.
while True:
try:
chunk = self._buff.get(block=False)
if chunk is None:
return
data.append(chunk)
except queue.Empty:
break
yield b''.join(data)
def listen_print_loop(responses):
"""Iterates through server responses and prints them.
The responses passed is a generator that will block until a response
is provided by the server.
"""
translation = ''
source = ''
for response in responses:
# Once the transcription settles, the response contains the
# END_OF_SINGLE_UTTERANCE event.
if (response.speech_event_type ==
SpeechEventType.END_OF_SINGLE_UTTERANCE):
print(u'\nFinal translation: {0}'.format(translation))
print(u'Final recognition result: {0}'.format(source))
return 0
result = response.result
translation = result.text_translation_result.translation
source = result.recognition_result
print(u'\nPartial translation: {0}'.format(translation))
print(u'Partial recognition result: {0}'.format(source))
def do_translation_loop():
print('Begin speaking...')
client = media.SpeechTranslationServiceClient()
speech_config = media.TranslateSpeechConfig(
audio_encoding='linear16',
source_language_code='en-US',
target_language_code='ja')
config = media.StreamingTranslateSpeechConfig(
audio_config=speech_config, single_utterance=True)
# The first request contains the configuration.
# Note that audio_content is explicitly set to None.
first_request = media.StreamingTranslateSpeechRequest(
streaming_config=config, audio_content=None)
with MicrophoneStream(RATE, CHUNK) as stream:
audio_generator = stream.generator()
mic_requests = (media.StreamingTranslateSpeechRequest(
audio_content=content,
streaming_config=config)
for content in audio_generator)
requests = itertools.chain(iter([first_request]), mic_requests)
responses = client.streaming_translate_speech(requests)
# Print the translation responses as they arrive
result = listen_print_loop(responses)
if result == 0:
stream.exit()
def main():
while True:
print()
option = input('Press any key to translate or \'q\' to quit: ')
if option.lower() == 'q':
break
do_translation_loop()
if __name__ == '__main__':
main()
# [END media_translation_translate_from_mic]
The result is like this. No translation nor recognition result.
Result screenshot
I was not sure if the problem is with my mic so I tried a similar example code from another Google tutorial to translate an audio file. The result is the same, no recognition result nor translation.
Did I miss something?
Thank you very much.
import pyaudio
from ibm_watson import SpeechToTextV1
from ibm_watson.websocket import RecognizeCallback, AudioSource
from threading import Thread
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
try:
from Queue import Queue, Full
except ImportError:
from queue import Queue, Full
###############################################
#### Initalize queue to store the recordings ##
###############################################
CHUNK = 1024
# Note: It will discard if the websocket client can't consumme fast enough
# So, increase the max size as per your choice
BUF_MAX_SIZE = CHUNK * 10
# Buffer to store audio
q = Queue(maxsize=int(round(BUF_MAX_SIZE / CHUNK)))
# Create an instance of AudioSource
audio_source = AudioSource(q, True, True)
###############################################
#### Prepare Speech to Text Service ########
###############################################
# initialize speech to text service
authenticator = IAMAuthenticator('apikey')
speech_to_text = SpeechToTextV1(authenticator=authenticator)
#speech_to_text.set_service_url('https://api.us-south.speech-to-text.watson.cloud.ibm.com/instances/62a2f19f-959f-4c3c-a276-27ab0e458341/v1/recognize')
speech_to_text.set_service_url('https://stream.watsonplatform.net/speech-to-text/api')
# define callback for the speech to text service
class MyRecognizeCallback(RecognizeCallback):
def __init__(self):
RecognizeCallback.__init__(self)
def on_transcription(self, transcript):
print(transcript)
def on_connected(self):
print('Connection was successful')
def on_error(self, error):
print('Error received: {}'.format(error))
def on_inactivity_timeout(self, error):
print('Inactivity timeout: {}'.format(error))
def on_listening(self):
print('Service is listening')
def on_hypothesis(self, hypothesis):
print(hypothesis)
def on_data(self, data):
print(data)
def on_close(self):
print("Connection closed")
# this function will initiate the recognize service and pass in the AudioSource
def recognize_using_weboscket(*args):
mycallback = MyRecognizeCallback()
speech_to_text.recognize_using_websocket(audio=audio_source,
content_type='audio/l16; rate=44100',
recognize_callback=mycallback,
interim_results=True)
###############################################
#### Prepare the for recording using Pyaudio ##
###############################################
# Variables for recording the speech
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
# define callback for pyaudio to store the recording in queue
def pyaudio_callback(in_data, frame_count, time_info, status):
try:
q.put(in_data)
except Full:
pass # discard
return (None, pyaudio.paContinue)
# instantiate pyaudio
audio = pyaudio.PyAudio()
# open stream using callback
stream = audio.open(
format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK,
stream_callback=pyaudio_callback,
start=False
)
#########################################################################
#### Start the recording and start service to recognize the stream ######
#########################################################################
print("Enter CTRL+C to end recording...")
stream.start_stream()
try:
recognize_thread = Thread(target=recognize_using_weboscket, args=())
recognize_thread.start()
while True:
pass
except KeyboardInterrupt:
# stop recording
stream.stop_stream()
stream.close()
audio.terminate()
audio_source.completed_recording()
This is the code for IBM's Speech-To-Text service using a mic as input. May I know what the output of the program is? This is the output im getting:
Enter CTRL+C to end recording...
Connection was successful
Service is listening
File "C:\Users\---\AppData\Local\Programs\Python\Python38-32\lib\site-packages\websocket\_app.py", line 320, in _callback
callback(self, *args)
File "C:\Users\---\AppData\Local\Programs\Python\Python38-32\lib\site-packages\ibm_watson\websocket\recognize_listener.py", line 199, in on_data
hypothesis = json_object['results'][0]['alternatives'][0][
Connection closed
It suddenly works when I tested with my wireless headset mic. Not sure why though as both devices are functioning well. The output is the transcript in the console.
This is happening to me too and I think the cause of your problem is the audio that you sent to the websocket was probably difficult to recognize, so the websocket's response was none / null, and when the hypothesis function tries to get the answer this the error occurs because the result does not exist.
The output on hypotesis function (def hypotesis ) will be a string with the result of transcript audio file and on data function (def data) will be a json like that:
{'result_index': 0, 'results': [{'final': True, 'alternatives': [{'transcript': 'hello ', 'confidence': 0.66}], 'keywords_result': {}}]}
I'm trying to catch chunks of an mp3 webstream and decoding them into PCM samples for signal processing. I tried to catch the audio via requests and io.BytesIO to save the data as .wav file.
I have to convert the mp3 data to wav data, but I don't know how. (My goal is not to record a .wav file, i am just doing this to test the algorithm.)
I found the pymedia lib, but it is very old (last commit in 2006), using python 2.7 and for me not installable.
Maybe it is possible with ffmpeg-python, but I have just seen examples using files as input and output.
Here's my code:
import requests
import io
import soundfile as sf
import struct
import wave
import numpy as np
def main():
stream_url = r'http://dg-wdr-http-dus-dtag-cdn.cast.addradio.de/wdr/1live/diggi/mp3/128/stream.mp3'
r = requests.get(stream_url, stream=True)
sample_array = []
try:
for block in r.iter_content(1024):
data, samplerate = sf.read(io.BytesIO(block), format="RAW", channels=2, samplerate=44100, subtype='FLOAT',
dtype='float32')
sample_array = np.append(sample_array, data)
except KeyboardInterrupt:
print("...saving")
obj = wave.open('sounds/stream1.wav', 'w')
obj.setnchannels(1) # mono
obj.setsampwidth(2) # bytes
obj.setframerate(44100)
data_max = np.nanmax(abs(sample_array))
# fill WAV with samples from sample_array
for sample in sample_array:
if (np.isnan(sample) or np.isnan(32760 * sample / data_max)) is True:
continue
try:
value = int(32760 * sample / data_max) # normalization INT16
except ValueError:
value = 1
finally:
data = struct.pack('<h', value)
obj.writeframesraw(data)
obj.close()
print("end")
if __name__ == '__main__':
main()
Do you have an idea how to handle this problem?
You are missing the decoding of mp3 stream. You are just saving mp3 file as wav.
You first need to decode mp3 audio. Which will give you PCM samples + audio info.
With the help from Irmen and his "miniaudio" and "synthesizer" library, I could solve the problem.
The problem was, that most radio webstreams uses the ICECAST protocol, which includes interleaved metadata information, so you can't decode it directly.
With the example script https://github.com/irmen/synthesizer/blob/master/examples/internetradio.py as template, I could write a script, which records a webstream until KeyboardInterrupt and saves it as a .wav file.
Here's the main part I edited:
...
def _audio_playback(self, pcm_stream):
sample_array = None
with Output(mixing="sequential", frames_per_chunk=44100 // 4) as output:
print("begin recording")
while self.decode_flag:
try:
audio = pcm_stream.read(44100 * 2 * 2 // 20)
if not audio:
break
except (IOError, ValueError):
break
else:
sample = Sample.from_raw_frames(audio, 2, 44100, 2)
if sample_array is None:
sample_array = sample.get_frames_numpy_float()
else:
sample_array = np.append(sample_array, sample.get_frames_numpy_float(), axis=0)
print("...saving")
wavf.write(self.file_location, 44100, sample_array)
print("saved")
...
Based on Bendzko answer here is my code:
pip install pyaudio miniaudio
import threading
import urllib.request
import time
try:
import miniaudio
except ImportError:
miniaudio = None
import pyaudio
import ctypes
import sys
CHUNK = 4096
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paInt16,channels=2,rate=44100,output=True)
class RadioThread(threading.Thread):
def run(self):
self.url = "https://impradio.bytemasters.gr/8002/stream"
#run in threading
client = miniaudio.IceCastClient(self.url)
pcm_stream = MiniaudioDecoderPcmStream(client.audio_format,client)
self.audio_playback(pcm_stream)
def audio_playback(self,pcm_stream):
global stop_peradio_thread
while stop_peradio_thread==False:
try:
audio = pcm_stream.read(CHUNK)
stream.write(audio.tobytes())
except:
pass
class MiniaudioDecoderPcmStream(miniaudio.StreamableSource):
def __init__(self, fmt, stream):
self.pcm_stream = miniaudio.stream_any(stream, fmt, dither=miniaudio.DitherMode.TRIANGLE)
def read(self, size):
try:
return self.pcm_stream.send(size)
except StopIteration:
return b""
def main():
global stop_peradio_thread
stop_peradio_thread = False
t1 = RadioThread()
t1.start()
while True:
try:
time.sleep(1)
except KeyboardInterrupt:
stop_peradio_thread = True
t1.join()
sys.exit()
main()
I have the following code to create thumbnails and save images. However, after about 1000 items it raises an error saying too many open files. Where is this coming from? And how would I fix the code?
def download_file(url, extension='jpg'):
""" Download a large file. Return path to saved file.
"""
req = requests.get(url)
if not req.ok:
return None
guid = str(uuid.uuid4())
tmp_filename = '/tmp/%s.%s' % (guid, extension)
with open(tmp_filename, 'w') as f:
for chunk in req.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
f.flush()
return tmp_filename
def update_artwork_item(item):
# Download the file
tmp_filename = util.download_file(item.artwork_url)
# Create thumbs
THUMB_SIZES = [(1000, 120), (1000, 30)]
guid = str(uuid.uuid4())
S3_BASE_URL = 'https://s3-us-west-1.amazonaws.com/xxx/'
try:
for size in THUMB_SIZES:
outfile = '%s_%s.jpg' % (guid, size[1])
img = Image.open(tmp_filename).convert('RGB')
img.thumbnail(size, Image.ANTIALIAS)
img.save(outfile, "JPEG")
s3_cmd = '%s %s premiere-avails --norr --public' % (S3_CMD, outfile) ## doesn't work half the time
x = subprocess.check_call(shlex.split(s3_cmd))
if x: raise
subprocess.call(['rm', outfile], stdout=FNULL, stderr=subprocess.STDOUT)
except Exception, e:
print '&&&&&&&&&&', Exception, e
else:
# Save the artwork icons
item.artwork_120 = S3_BASE_URL + guid + '_120.jpg'
item.artwork_30 = S3_BASE_URL + guid + '_30.jpg'
# hack to fix parallel saving
while True:
try:
item.save()
except Exception, e:
print '******************', Exception, e
time.sleep(random.random()*1e-1)
continue
else:
subprocess.call(['rm', tmp_filename], stdout=FNULL, stderr=subprocess.STDOUT)
break
It's almost certainly your use of subprocess.call. subprocess.call is asynchronous, and returns a pipe object, which you are responsible for closing. (See the documentation). So what's happening is that each time you call subprocess.call, a new pipe object is being returned, and you eventually run out of file handles.
By far the easiest thing to do would be to just remove the file from Python by calling os.remove instead of piping to the Unix rm command. Your use of check_call is okay, because check_call is synchronous and won't return a file object you have to close.