Trying to detect speech using VAD(Voice Activity Detector)

Trying to detect speech using VAD(Voice Activity Detector) - python

I am able to read the audio but I am getting an error message while passing it to VAD(Voice Activity Detector). I think the error message is because the frames is in bytes, when feeding it to vad.is_speech(frame, sample_rate), should this frame be in bytes?
Here is the code below:
frame_duration_ms=10
duration_in_ms = (frame_duration_ms / 1000) #duration in 10ms
frame_size = int(sample_rate * duration_in_ms) #frame size of 160
frame_bytes = frame_size * 2
def frame_generator(buffer, frame_bytes):
# repeatedly store 320 length array to the frame_stored when the frame_bytes is less than the size of the buffer
while offset+frame_bytes < len(buffer):
frame_stored = buffer[offset : offset+frame_bytes]
offset = offset + frame_bytes
return frame_stored
num_padding_frames = int(padding_duration_ms / frame_duration_ms)
# use deque for the sliding window
ring_buffer = deque(maxlen=num_padding_frames)
# we have two states TRIGGERED and NOTTRIGGERED state
triggered = True #NOTTRIGGERED state
frames = frame_generator(buffer, frame_bytes)
speech_frame = []
for frame in frames:
is_speech = vad.is_speech(frame, sample_rate)
Here is the error message:
TypeError Traceback (most recent call
last) in
16 speech_frame = []
17 for frame in frames:
---> 18 is_speech = vad.is_speech(frame, sample_rate)
19 #print(frames)
C:\Program Files\Python38\lib\site-packages\webrtcvad.py in
is_speech(self, buf, sample_rate, length)
20
21 def is_speech(self, buf, sample_rate, length=None):
---> 22 length = length or int(len(buf) / 2)
23 if length * 2 > len(buf):
24 raise IndexError(
TypeError: object of type 'int' has no len()

I have solved it, you know vad.is_speech(buf=frame, sample_rate), it takes the buf and calculates it length, but an integer value does not posses the len() attributes in python.
This throws an error for example:
num = 1
print(len(num))
Use this instead:
data = [1,2,3,4]
print(len(data))
So here is the correction to the code below:
frame_duration_ms=10
duration_in_ms = (frame_duration_ms / 1000) #duration in 10ms
frame_size = int(sample_rate * duration_in_ms) #frame size of 160
frame_bytes = frame_size * 2
values = []
def frame_generator(buffer, frame_bytes):
# repeatedly store 320 length array to the frame_stored when the frame_bytes is less than the size of the buffer
while offset+frame_bytes < len(buffer):
frame_stored = buffer[offset : offset+frame_bytes]
offset = offset + frame_bytes
values.append(frame_stored)
return values
num_padding_frames = int(padding_duration_ms / frame_duration_ms)
# use deque for the sliding window
ring_buffer = deque(maxlen=num_padding_frames)
# we have two states TRIGGERED and NOTTRIGGERED state
triggered = True #NOTTRIGGERED state
frames = frame_generator(buffer, frame_bytes)
frame = []
for frame in frames:
is_speech = vad.is_speech(frame, sample_rate)

import wave
import webrtcvad
# Initialize a vad object
audioFile = wave.open('ENG_M.wav')
framesAudio = audioFile.readframes(800)
#print(fraud.frames)
vad = webrtcvad.Vad()
# Run the VAD on 10 ms of silence and 16000 sampling rate
sample_rate = 16000
frame_duration = 10 # in ms
for f in framesAudio :
# Detecting speech
final_frame = f.to_bytes(2,"big")* int(sample_rate * frame_duration / 1000)
print(f'Contains speech: {vad.is_speech(final_frame, sample_rate)}')

Related

Audio frame not converting to ndarray

I am trying to run a colab file training openAI's jukebox, however when I try to run the function code which loads the audio, I am getting an error:
File "/content/jukebox/jukebox/data/files_dataset.py", line 82, in get_song_chunk
data, sr = load_audio(filename, sr=self.sr, offset=offset, duration=self.sample_length)
File "/content/jukebox/jukebox/utils/io.py", line 48, in load_audio
frame = frame.to_ndarray(format='fltp') # Convert to floats and not int16
AttributeError: 'list' object has no attribute 'to_ndarray'
It seems to be interpreting the frame input as a list, which when printed looks like this:
[<av.AudioFrame 0, pts=None, 778 samples at 22050Hz, stereo, fltp at
0x7fd03dd64150>]
When I try to change to frame = resampler.resample(frame) I get this error:
TypeError: 'av.audio.frame.AudioFrame' object cannot be interpreted as
an integer
I don't really know much about audio files so i'm not sure how to debug and would appreciate help here.
the full code to load the audio is below.
def load_audio(file, sr, offset, duration, resample=True, approx=False, time_base='samples', check_duration=True):
if time_base == 'sec':
offset = offset * sr
duration = duration * sr
# Loads at target sr, stereo channels, seeks from offset, and stops after duration
container = av.open(file)
audio = container.streams.get(audio=0)[0] # Only first audio stream
audio_duration = audio.duration * float(audio.time_base)
if approx:
if offset + duration > audio_duration*sr:
# Move back one window. Cap at audio_duration
offset = np.min(audio_duration*sr - duration, offset - duration)
else:
if check_duration:
assert offset + duration <= audio_duration*sr, f'End {offset + duration} beyond duration {audio_duration*sr}'
if resample:
resampler = av.AudioResampler(format='fltp',layout='stereo', rate=sr)
else:
assert sr == audio.sample_rate
offset = int(offset / sr / float(audio.time_base)) #int(offset / float(audio.time_base)) # Use units of time_base for seeking
duration = int(duration) #duration = int(duration * sr) # Use units of time_out ie 1/sr for returning
sig = np.zeros((2, duration), dtype=np.float32)
container.seek(offset, stream=audio)
total_read = 0
for frame in container.decode(audio=0): # Only first audio stream
if resample:
frame.pts = None
frame = resampler.resample(frame)
frame = frame.to_ndarray(format='fltp') # Convert to floats and not int16
read = frame.shape[-1]
if total_read + read > duration:
read = duration - total_read
sig[:, total_read:total_read + read] = frame[:, :read]
total_read += read
if total_read == duration:
break
assert total_read <= duration, f'Expected {duration} frames, got {total_read}'
return sig, sr

If your variable frame is interpreted as a list, you could replace frame = resampler.resample(frame) with frame = resampler.resample(frame)[0]. Your code ran without errors once I made this edit.

Try replacing frame = frame.to_ndarray(format='fltp') by a direct assignation of the variable frame:
import numpy as np
#frame = frame.to_ndarray(format='fltp') #Original line
frame = np.ndarray(frame)
If you want it to be a specific data type, you can change the dtype argument of the ndarray function:
frame = np.ndarray(frame, dtype=np.float32)

Try: frame = frame[0].to_ndarray(format='fltp')

Sending and receiving a signal at the same time

I’m working in python on a raspberry pi. I’m trying to send out a signal on a motor controller, and then receive a signal with a sensing hat after it pass through my plant (an RC filter in this case).
The important thing is I want to generate the output and read the input as close to simultaneously as possible. I was hoping to use multiprocessing to have a thread send the signal while the other read the incoming signal. But I keep getting confused on how threads work in python.
In short is it possible to do 2 different tasks with multiprocessing and then repeat those tasks (sending and reading a signal) until a condition is met. (like in a while loop)
(Edited with Code)
from __future__ import print_function
from PyQt5.QtWidgets import QAction
from pyqtgraph.Qt import QtGui, QtCore
from adafruit_motorkit import MotorKit
import pyqtgraph as pg
import sys
from sys import stdout
import numpy as np
from daqhats import mcc118, OptionFlags, HatIDs, HatError
from daqhats_utils import select_hat_device, enum_mask_to_string, \
chan_list_to_mask
from decimal import *
import math
import time
getcontext().prec = 3
total_samples_read = 0
READ_ALL_AVAILABLE = -1
channelData = np.zeros(4, dtype=float)
CURSOR_BACK_2 = '\x1b[2D'
ERASE_TO_END_OF_LINE = '\x1b[0K'
# for plotting data
########################################
scan_rate = 1000 # scan rate in hz
maxtime = 30 # second s to run for
Datatime = np.zeros(maxtime * scan_rate, dtype=float)#List of times when smaples are taken
Data1 = np.zeros(maxtime * scan_rate, dtype=float) #sampels taken
data_index = 0 # Maximum index of data points taken
dt = Decimal(1 / scan_rate) # difference in time between indexes of Datatime
display_index = 0 # maximum index of Data being displayed on plot
#################################
# variables for Data logger
##########################
is_scanning = False
channels = [0]
channel_mask = chan_list_to_mask(channels)
num_channels = len(channels)
samples_per_channel = 0
options = OptionFlags.CONTINUOUS
######################################
startedTime = 0 # time at program start
myTime = 0 # time since program started
try:
address = select_hat_device(HatIDs.MCC_118)
hat = mcc118(address)
except (HatError, ValueError) as err:
print('\n', err)
class MainWindow(pg.GraphicsWindow):
def __init__(self, *args, **kwargs):
super(pg.GraphicsWindow, self).__init__(*args, **kwargs)
self.delay = 30 #ms
self.quit = QAction("Quit", self)
self.quit.triggered.connect(self.clean_close)
self.timer = QtCore.QTimer()
self.timer.setInterval(self.delay)
self.timer.timeout.connect(self.update_plot)
# plots data and runs calibrate between trials
def update_plot(self):
global display_index, Datatime, Data1
kit.motor1.throttle = .4 + .2 * math.cos((time.time()-startedTime)* 2 * np.pi* 1) # 1hz sinusiod out of motor
if data_index < len(Data1):
Collect_Data()
plot.setXRange(0, 20, padding=0)
plot.setXRange(0, 20, padding=0)
curve.setData(Datatime[:display_index], Data1[:display_index])
display_index += 1
app.processEvents()
def clean_close(self):
self.close()
# starts data collection
def Collect_Data():
global is_scanning
"""
This function is executed automatically when the module is run directly.
"""
# Store the channels in a list and convert the list to a channel mask that
# can be passed as a parameter to the MCC 118 functions.
try:
# Select an MCC 118 HAT device to use.
# actual_scan_rate = hat.a_in_scan_actual_rate(num_channels, scan_rate)
# Configure and start the scan.
# Since the continuous option is being used, the samples_per_channel
# parameter is ignored if the value is less than the default internal
# buffer size (10000 * num_channels in this case). If a larger internal
# buffer size is desired, set the value of this parameter accordingly.
if not is_scanning:
hat.a_in_scan_start(channel_mask, samples_per_channel, scan_rate,
options)
is_scanning = True
try:
read_and_display_data(hat, num_channels)
except KeyboardInterrupt:
# Clear the '^C' from the display.
print(CURSOR_BACK_2, ERASE_TO_END_OF_LINE, '\n')
print('Stopping')
hat.a_in_scan_stop()
hat.a_in_scan_cleanup()
except (HatError, ValueError) as err:
print('\n', err)
# reads Data off of Hat and adds to Data1
def read_and_display_data(hat, num_channels):
global channelData, data_index, Datatime, Data1
total_samples_read = 0
read_request_size = READ_ALL_AVAILABLE
# When doing a continuous scan, the timeout value will be ignored in the
# call to a_in_scan_read because we will be requesting that all available
# samples (up to the default buffer size) be returned.
timeout = 5.0
# Read all of the available samples (up to the size of the read_buffer which
# is specified by the user_buffer_size). Since the read_request_size is set
# to -1 (READ_ALL_AVAILABLE), this function returns immediately with
# whatever samples are available (up to user_buffer_size) and the timeout
# parameter is ignored.
trigger = True
while trigger == True:
read_result = hat.a_in_scan_read(read_request_size, timeout)
# Check for an overrun error
if read_result.hardware_overrun:
print('\n\nHardware overrun\n')
break
elif read_result.buffer_overrun:
print('\n\nBuffer overrun\n')
break
samples_read_per_channel = int(len(read_result.data) / num_channels)
total_samples_read += samples_read_per_channel
# adds all data in buffer to data to be plotted.
count = 0
if samples_read_per_channel > 0:
index = samples_read_per_channel * num_channels - num_channels
while count < samples_read_per_channel:
for i in range(num_channels):
channelData[i] = read_result.data[index + i]
if data_index < len(Data1):
Data1[data_index] = channelData[0]
Datatime[data_index] = float(dt * Decimal(data_index))
data_index += 1
count += 1
trigger = False
stdout.flush()
if __name__ == '__main__':
app = QtGui.QApplication([])
win = MainWindow() # display window
plot = win.addPlot(1, 0)
curve = plot.plot()
win.show()
kit = MotorKit() # implements motor driver
kit.motor1.throttle = .4 # values 1 is 5v and 0 is 0 volts
startedTime = time.time()
# u = .2*math.cos(t * 2*np.pi*1)
win.timer.start()
sys.exit(app.exec_())

PinoroEnviro+ TypeError: argument should be integer or bytes-like object, not 'str'

I have been trying to understand the error reported when I run an example supplied with some hardware i purchased.
I have tried googling around but every answer I get is a bit beyond my comprehension. I think what is going wrong is that the script, or one of the imported scripts is written for Python 2 and i am trying to run it in python 3.
When I try and run it in Python 2 i get a whole host of other problems so I have been trying to make it work with 3.
The hardware I purchased is the Enviro+ sensor suite for the raspberry pi sold by Pimoroni
Hardware Link
Github Library
Pimoroni Tutorial
#!/usr/bin/env python
import time
import colorsys
import os
import sys
import ST7735
import ltr559
from bme280 import BME280
from pms5003 import PMS5003
from enviroplus import gas
from subprocess import PIPE, Popen
from PIL import Image
from PIL import ImageDraw
from PIL import ImageFont
print("""all-in-one.py - Displays readings from all of Enviro plus' sensors
Press Ctrl+C to exit!
""")
# BME280 temperature/pressure/humidity sensor
bme280 = BME280()
# PMS5003 particulate sensor
pms5003 = PMS5003()
# Create ST7735 LCD display class
st7735 = ST7735.ST7735(
port=0,
cs=1,
dc=9,
backlight=12,
rotation=270,
spi_speed_hz=10000000
)
# Initialize display
st7735.begin()
WIDTH = st7735.width
HEIGHT = st7735.height
# Set up canvas and font
img = Image.new('RGB', (WIDTH, HEIGHT), color=(0, 0, 0))
draw = ImageDraw.Draw(img)
path = os.path.dirname(os.path.realpath(__file__))
font = ImageFont.truetype(path + "/fonts/Asap/Asap-Bold.ttf", 20)
message = ""
# The position of the top bar
top_pos = 25
# Displays data and text on the 0.96" LCD
def display_text(variable, data, unit):
# Maintain length of list
values[variable] = values[variable][1:] + [data]
# Scale the values for the variable between 0 and 1
colours = [(v - min(values[variable]) + 1) / (max(values[variable])
- min(values[variable]) + 1) for v in values[variable]]
# Format the variable name and value
message = "{}: {:.1f} {}".format(variable[:4], data, unit)
print(message)
draw.rectangle((0, 0, WIDTH, HEIGHT), (255, 255, 255))
for i in range(len(colours)):
# Convert the values to colours from red to blue
colour = (1.0 - colours[i]) * 0.6
r, g, b = [int(x * 255.0) for x in colorsys.hsv_to_rgb(colour,
1.0, 1.0)]
# Draw a 1-pixel wide rectangle of colour
draw.rectangle((i, top_pos, i+1, HEIGHT), (r, g, b))
# Draw a line graph in black
line_y = HEIGHT - (top_pos + (colours[i] * (HEIGHT - top_pos)))\
+ top_pos
draw.rectangle((i, line_y, i+1, line_y+1), (0, 0, 0))
# Write the text at the top in black
draw.text((0, 0), message, font=font, fill=(0, 0, 0))
st7735.display(img)
# Get the temperature of the CPU for compensation
def get_cpu_temperature():
process = Popen(['vcgencmd', 'measure_temp'], stdout=PIPE)
output, _error = process.communicate()
return float(output[output.index('=') + 1:output.rindex("'")])
# Tuning factor for compensation. Decrease this number to adjust the
# temperature down, and increase to adjust up
factor = 0.8
cpu_temps = [0] * 5
delay = 0.5 # Debounce the proximity tap
mode = 0 # The starting mode
last_page = 0
light = 1
# Create a values dict to store the data
variables = ["temperature",
"pressure",
"humidity",
"light",
"oxidised",
"reduced",
"nh3",
"pm1",
"pm25",
"pm10"]
values = {}
for v in variables:
values[v] = [1] * WIDTH
# The main loop
try:
while True:
proximity = ltr559.get_proximity()
# If the proximity crosses the threshold, toggle the mode
if proximity > 1500 and time.time() - last_page > delay:
mode += 1
mode %= len(variables)
last_page = time.time()
# One mode for each variable
if mode == 0:
variable = "temperature"
unit = "C"
cpu_temp = get_cpu_temperature()
# Smooth out with some averaging to decrease jitter
cpu_temps = cpu_temps[1:] + [cpu_temp]
avg_cpu_temp = sum(cpu_temps) / float(len(cpu_temps))
raw_temp = bme280.get_temperature()
data = raw_temp - ((avg_cpu_temp - raw_temp) / factor)
display_text(variable, data, unit)
if mode == 1:
variable = "pressure"
unit = "hPa"
data = bme280.get_pressure()
display_text(variable, data, unit)
if mode == 2:
variable = "humidity"
unit = "%"
data = bme280.get_humidity()
display_text(variable, data, unit)
if mode == 3:
variable = "light"
unit = "Lux"
if proximity < 10:
data = ltr559.get_lux()
else:
data = 1
display_text(variable, data, unit)
if mode == 4:
variable = "oxidised"
unit = "kO"
data = gas.read_all()
data = data.oxidising / 1000
display_text(variable, data, unit)
if mode == 5:
variable = "reduced"
unit = "kO"
data = gas.read_all()
data = data.reducing / 1000
display_text(variable, data, unit)
if mode == 6:
variable = "nh3"
unit = "kO"
data = gas.read_all()
data = data.nh3 / 1000
display_text(variable, data, unit)
if mode == 7:
variable = "pm1"
unit = "ug/m3"
data = pms5003.read()
data = data.pm_ug_per_m3(1.0)
display_text(variable, data, unit)
if mode == 8:
variable = "pm25"
unit = "ug/m3"
data = pms5003.read()
data = data.pm_ug_per_m3(2.5)
display_text(variable, data, unit)
if mode == 9:
variable = "pm10"
unit = "ug/m3"
data = pms5003.read()
data = data.pm_ug_per_m3(10)
display_text(variable, data, unit)
# Exit cleanly
except KeyboardInterrupt:
sys.exit(0)
When I try and run the code i get the following results:
Traceback (most recent call last):
File "all-in-one.py", line 135, in <module>
cpu_temp = get_cpu_temperature()
File "all-in-one.py", line 89, in get_cpu_temperature
return float(output[output.index('=') + 1:output.rindex("'")])
TypeError: argument should be integer or bytes-like object, not 'str'
Please forgive me if I have not filled this help request out correctly - i am very new to forums (I hardly ever post in them, although i read them a lot for help), and i am also very new to Python and Linux.
Any help and support from the community would be massively appreciated - thank you in advance...
SW

According to Python 3 whitepages on subprocess.communicate(), the type of output and _error can be either strings (what you want) OR bytes. If you were getting strings back, you wouldn't have this problem, but the TypeError message you're getting is exactly what you get when you try to call index() on a bytes object with a string argument.
Demonstrably:
>>> output = "temperature = '88 C'".encode('utf-8') #this is of type bytes
>>> output
b"temperature = '88 C'"
>>> output.index('=')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: argument should be integer or bytes-like object, not 'str'
>>> output.index(ord('='))
12
So you should replace output.index('=') with output.index(ord('=')) and output.rindex("'") with output.rindex(ord("'")).
EDIT
I realized this much later, but you can circumvent using ord() by prepending your search string with a b.
output.index(b'=')

Sounddevice ValueError: could not broadcast input array from shape (2048) into shape (2048,1)

I know it may seems to be another one of those ValueError post but please hear me out and understand that I have tried Googling and browsing the StackOverflow community for a workable solution for my problem.
I am currently trying to pass my demodulated samples to the sounddevice module and playback it in realtime using its callback function.
Error:
File "rtl-fm-cont.py", line 120, in audio_callback
outdata[:] = data ValueError: could not broadcast input array from shape (2048) into shape (2048,1)
I have attached an full output of this code below:
#!/usr/bin/env python
# library imports ...
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('-d', '--device', type=int_or_str,
help='output device (numeric ID or substring)')
parser.add_argument('-b', '--blocksize', type=int, default=2048,
help='block size (default: %(default)s)')
parser.add_argument(
'-q', '--buffersize', type=int, default=20,
help='number of blocks used for buffering (default: %(default)s)')
args = parser.parse_args()
if args.blocksize == 0:
parser.error('blocksize must not be zero')
if args.buffersize < 1:
parser.error('buffersize must be at least 1')
q = queue.Queue(maxsize=args.buffersize)
event = threading.Event()
device_index = RtlSdr.get_device_index_by_serial('00000001')
class fmDemodulator(object):
# Called for each updates
def __init__(self, sdr=None):
self.sdr = sdr if sdr else RtlSdr(device_index)
def Demod(self, *args):
Fs = self.sdr.sample_rate
# Fc = self.sdr.center_freq
# Read IQ samples
samples = self.sdr.read_samples(4*12*2048)
print ('Fetching {} IQ samples from SDR #{}'.format(len(samples), device_index))
# Convert sampled data into numpy array
x1 = np.array(samples).astype("complex64")
# Downmixed Baseband Signal (Adjust offset to be centered)
offsetFreq = 0 # already centered
fc1 = np.exp(-1.0j*2.0*np.pi* offsetFreq/Fs*np.arange(len(x1)))
x2 = x1 * fc1
# Filter and downsample the FM Radio Signal
bwFM = 200000 # approx. 170 kHz for a single channel
decRate = int(Fs/bwFM)
x3 = signal.decimate(x2, decRate)
newFs = Fs/decRate
### Demodulate 200kHz FM Signal
# Polar discriminator
y4 = x3[1:] * np.conj(x3[:-1])
x4 = np.angle(y4)
# The de-emphasis filter
# Given a signal 'x4' (in a numpy array) with sampling rate newFS
d = newFs * 75e-6 # Calculate the # of samples to hit the -3dB point
x = np.exp(-1/d) # Calculate the decay between each sample
b = [1-x] # Create the filter coefficients
a = [1,-x]
x5 = signal.lfilter(b,a,x4)
# Find a decimation rate to achieve audio sampling rate between 44-48 kHz
audioFreq = 44100
dec_audio = int(newFs/audioFreq)
audioFs = newFs/dec_audio
x6 = signal.decimate(x5, dec_audio)
# Scale audio to adjust volume
x6 *= 10000 / np.max(np.abs(x6))
# debug
print ('decRate: {}, newFs : {}, dec_audio: {}'.format(decRate, newFs, dec_audio))
print ('Output audio: {} samples, audioFreq: {}, audioFs: {}'.format(len(x6), audioFreq, audioFs))
return x6
# https://python-sounddevice.readthedocs.io/en/0.3.6/examples.html
def audio_callback(outdata, frames, time, status):
"""This is called (from a separate thread) for each audio block."""
assert frames == args.blocksize
if status.output_underflow:
print('Output underflow: increase blocksize?', file=sys.stderr)
raise sd.CallbackAbort
assert not status
try:
data = q.get_nowait()
print(data)
print(data.dtype)
except queue.Empty:
print('Buffer is empty: increase buffersize?', file=sys.stderr)
raise sd.CallbackAbort
if len(data) < len(outdata):
outdata[:len(data)] = data
outdata[len(data):] = b'\x00' * (len(outdata) - len(data))
raise sd.CallbackStop
else:
outdata[:] = data
def main():
sdr = RtlSdr(device_index)
fm = fmDemodulator(sdr)
# SDR Configurations
sdr.sample_rate = int(2.4e6) # Hz
sdr.center_freq = int(102e6) # Hz
sdr.freq_correction = 77 # PPM +- 20
sdr.gain = 'auto'
samplerate = 50000
channels = 1
try:
for _ in range(args.buffersize):
data = fm.Demod()
if not np.any(data):
break
q.put_nowait(data) # pre-fill queue
stream = sd.OutputStream(
samplerate=samplerate, blocksize=args.blocksize,
device=args.device, channels=channels, dtype='int16',
callback=audio_callback, finished_callback=event.set)
with stream:
timeout = args.blocksize * args.buffersize / samplerate
while np.any(data): # while data
data = fm.Demod()
q.put(data, timeout=timeout)
event.wait() # Wait until playback is finished
except KeyboardInterrupt:
parser.exit('\nInterrupted by user')
except queue.Full:
# A timeout occured, i.e. there was an error in the callback
parser.exit(1)
except Exception as e:
parser.exit(type(e).__name__ + ': ' + str(e))
Output:
Output audio: 2048 samples, audioFreq: 44100, audioFs: 50000.0
Fetching 98304 IQ samples from SDR #0
...
Fetching 98304 IQ samples from SDR #0
decRate: 12, newFs : 200000.0, dec_audio: 4
Output audio: 2048 samples, audioFreq: 44100, audioFs: 50000.0
Fetching 98304 IQ samples from SDR #0
[ 627.05045796 1835.36815837 3381.16496121 ... 401.43836645
-1156.07050642 -1291.0900775 ]
float64
From cffi callback <function _StreamBase.__init__.<locals>.callback_ptr at 0x10eabbea0>:
Traceback (most recent call last):
File "/Users/user/.local/lib/python3.6/site-packages/sounddevice.py", line 741, in callback_ptr
return _wrap_callback(callback, data, frames, time, status)
File "/Users/user/.local/lib/python3.6/site-packages/sounddevice.py", line 2517, in _wrap_callback
decRate: 12, newFs : 200000.0, dec_audio: 4
Output audio: 2048 samples, audioFreq: 44100, audioFs: 50000.0
callback(*args)
File "rtl-fm-cont.py", line 120, in audio_callback
outdata[:] = data
ValueError: could not broadcast input array from shape (2048) into shape (2048,1)
Fetching 98304 IQ samples from SDR #0
decRate: 12, newFs : 200000.0, dec_audio: 4
Output audio: 2048 samples, audioFreq: 44100, audioFs: 50000.0
Your help will highly be appreciated.
Thank you in advance.

Seems like a simple:
outdata[:] = data.reshape(2048,1)
numpy.reshape
Fixed the problem.

Devide audio signal into small samples

I'm trying to process an audio signal and divide an audio signal into N discrete samples and then I want to play that samples independently.
How can I do this using python?

import wave
import pygame
import time
def slice(infile, outfilename, start_ms, end_ms):
width = infile.getsampwidth() #Returns sample width in bytes
rate = infile.getframerate() #Returns sampling frequency
fpms = rate / 1000 # frames per ms
length = (end_ms - start_ms) * fpms
start_index = start_ms * fpms
out = wave.open(outfilename, "w")
out.setparams((infile.getnchannels(), width, rate, length, infile.getcomptype(), infile.getcompname()))
infile.rewind() #Rewind the file pointer to the beginning of the audio stream
anchor = infile.tell() #Return current file pointer position
infile.setpos(anchor + start_index) #Set the file pointer to the specified position
out.writeframes(infile.readframes(length)) #Write audio frames and make sure nframes is correct
if __name__ == "__main__":
slice(wave.open("song1.wav", "r"), "out.wav", 500, 5000)
pygame.mixer.init()
pygame.mixer.music.load("out.wav")
pygame.mixer.music.play()
while pygame.mixer.music.get_busy() == True:
continue

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Trying to detect speech using VAD(Voice Activity Detector) - python

Related

Audio frame not converting to ndarray

Sending and receiving a signal at the same time

PinoroEnviro+ TypeError: argument should be integer or bytes-like object, not 'str'

Sounddevice ValueError: could not broadcast input array from shape (2048) into shape (2048,1)

Devide audio signal into small samples

Categories

Resources