I am trying to use aubio and python for a school project, here's the goal : detect when someone emit two sounds, each with a length of 2s, and with an interval between them of max 3s. The second one need to be higher than the first one. When these conditions are met, the program send a Wake-On-Lan package (not implemented in current code).
import alsaaudio
import numpy as np
import aubio
import time
import threading
class Audio_watcher:
# constants
samplerate = 44100
win_s = 2048
hop_s = win_s // 2
framesize = hop_s
nb_samples = 20
tone_duration = 2.0
per_sampling = tone_duration / nb_samples
tone_max_interval = 3.0
tone_diff_ratio = 2
def __init__(self):
self.last_frequencies = np.zeros(Audio_watcher.nb_samples)
self.last_energies = np.zeros(Audio_watcher.nb_samples)
self.detected_tone = 0
# set up audio input
recorder = alsaaudio.PCM(type=alsaaudio.PCM_CAPTURE)
recorder.setperiodsize(Audio_watcher.framesize)
recorder.setrate(Audio_watcher.samplerate)
recorder.setformat(alsaaudio.PCM_FORMAT_FLOAT_LE)
recorder.setchannels(1)
self.recorder = recorder
pitcher = aubio.pitch("default", Audio_watcher.win_s, Audio_watcher.hop_s, Audio_watcher.samplerate)
pitcher.set_unit("Hz")
pitcher.set_silence(-40)
self.pitcher = pitcher
# A filter
f = aubio.digital_filter(7)
f.set_a_weighting(Audio_watcher.samplerate)
self.f = f
def get_audio(self):
# read and convert data from audio input
_, data = self.recorder.read()
samples = np.fromstring(data, dtype=aubio.float_type)
filtered_samples = self.f(samples)
print(filtered_samples)
# pitch and energy of current frame
freq = self.pitcher(filtered_samples)[0]
print(freq)
self.last_frequencies = np.roll(self.last_frequencies, 1)
self.last_frequencies[0] = freq
self.last_energies = np.roll(self.last_energies, 1)
self.last_energies[0] = np.sum(filtered_samples**2)/len(filtered_samples)
threading.Timer(Audio_watcher.per_sampling, self.get_audio).start()
def reset_detected_tone():
self.detected_tone = 0
def detect_tone(self):
std_last = np.std(self.last_frequencies)
if std_last <= 200 and std_last > 0:
mean_freq = np.mean(self.last_frequencies)
if self.detected_tone == 0:
self.detected_tone = mean_freq
threading.Timer(Audio_watcher.tone_max_interval, self.reset_detected_tone).start()
elif mean_freq > Audio_watcher.tone_diff_ratio * self.detected_tone:
print('wol')
threading.Timer(Audio_watcher.tone_duration, self.detect_tone).start()
aw = Audio_watcher()
aw.get_audio()
aw.detect_tone()
However with this code I get a great delay between the sounds and their detection, I think it has to do with the recorder being called only one time every 0.1s, but I can't find how to give correct parameters to aubio.
Does anyone knows how to configure the constants so it works ?
Thanks a lot !
Found out what was causing this error, I needed to put the code that sets up the audio input in the get_audio function so it renewed everytime
Related
I am having some problems with multi-process in Xlib.thread or ImageGrab.
When I put like 3 or more references to get (getPixel and locateOnScreen(pyautogui)) in multi threads, it works fast and without errors initially, but 2 or 3 hours later the commands are getting slow and an error appears in the cmd warning that "it can't happen!"
Here's an example of the code:
from PIL import ImageGrab, Image
from pymouse import PyMouse
from pykeyboard import PyKeyboard
import time
m = PyMouse()
k = PyKeyboard()
def start(self):
if self.hotkeyEscolhidaManaTraining != None:
self.isManaTrainingOn = True
if self.isManaTrainingOn:
self.timerstartManaTraining=Interval.set_interval(self.startManaTraining, 1)
def startManaTraining(self):
if self.runningTimerManaTraining == True:
self.timerstartManaTraining.cancel()
return
self.runningTimerManaTraining = True
ManaTrainingBox = pyautogui.locateOnScreen('Pics/heal/manaLow.png', 0.8, True)
startBoxLeft = ManaTrainingBox[0]+2
self.startBoxTopManaTraining = ManaTrainingBox[1]+5
pixeisTotais = 92
if self.isManaTrainingOn:
pixelManaTraining = (self.porcentagemEscolhidaManaTraining * pixeisTotais) / 100
self.endBoxLeftManaTraining = startBoxLeft + pixelManaTraining
self.timerManaTraining = Interval.set_interval(self.validateManaTraining, 0.5)
def validateManaTraining(self):
if self.isManaTrainingOn:
pixelRGB = ImageGrab.getPixel((self.endBoxLeftManaTraining, self.startBoxTopManaTraining))
if not self.validatePixelsMana(pixelRGB):
pyautogui.press(self.hotkeyEscolhidaManaTraining)
return
and others 10 code like this in multi-threads, have a way to get all pixels and PNGs in just one 'locateOnScreen' without saving the img?
I’m working in python on a raspberry pi. I’m trying to send out a signal on a motor controller, and then receive a signal with a sensing hat after it pass through my plant (an RC filter in this case).
The important thing is I want to generate the output and read the input as close to simultaneously as possible. I was hoping to use multiprocessing to have a thread send the signal while the other read the incoming signal. But I keep getting confused on how threads work in python.
In short is it possible to do 2 different tasks with multiprocessing and then repeat those tasks (sending and reading a signal) until a condition is met. (like in a while loop)
(Edited with Code)
from __future__ import print_function
from PyQt5.QtWidgets import QAction
from pyqtgraph.Qt import QtGui, QtCore
from adafruit_motorkit import MotorKit
import pyqtgraph as pg
import sys
from sys import stdout
import numpy as np
from daqhats import mcc118, OptionFlags, HatIDs, HatError
from daqhats_utils import select_hat_device, enum_mask_to_string, \
chan_list_to_mask
from decimal import *
import math
import time
getcontext().prec = 3
total_samples_read = 0
READ_ALL_AVAILABLE = -1
channelData = np.zeros(4, dtype=float)
CURSOR_BACK_2 = '\x1b[2D'
ERASE_TO_END_OF_LINE = '\x1b[0K'
# for plotting data
########################################
scan_rate = 1000 # scan rate in hz
maxtime = 30 # second s to run for
Datatime = np.zeros(maxtime * scan_rate, dtype=float)#List of times when smaples are taken
Data1 = np.zeros(maxtime * scan_rate, dtype=float) #sampels taken
data_index = 0 # Maximum index of data points taken
dt = Decimal(1 / scan_rate) # difference in time between indexes of Datatime
display_index = 0 # maximum index of Data being displayed on plot
#################################
# variables for Data logger
##########################
is_scanning = False
channels = [0]
channel_mask = chan_list_to_mask(channels)
num_channels = len(channels)
samples_per_channel = 0
options = OptionFlags.CONTINUOUS
######################################
startedTime = 0 # time at program start
myTime = 0 # time since program started
try:
address = select_hat_device(HatIDs.MCC_118)
hat = mcc118(address)
except (HatError, ValueError) as err:
print('\n', err)
class MainWindow(pg.GraphicsWindow):
def __init__(self, *args, **kwargs):
super(pg.GraphicsWindow, self).__init__(*args, **kwargs)
self.delay = 30 #ms
self.quit = QAction("Quit", self)
self.quit.triggered.connect(self.clean_close)
self.timer = QtCore.QTimer()
self.timer.setInterval(self.delay)
self.timer.timeout.connect(self.update_plot)
# plots data and runs calibrate between trials
def update_plot(self):
global display_index, Datatime, Data1
kit.motor1.throttle = .4 + .2 * math.cos((time.time()-startedTime)* 2 * np.pi* 1) # 1hz sinusiod out of motor
if data_index < len(Data1):
Collect_Data()
plot.setXRange(0, 20, padding=0)
plot.setXRange(0, 20, padding=0)
curve.setData(Datatime[:display_index], Data1[:display_index])
display_index += 1
app.processEvents()
def clean_close(self):
self.close()
# starts data collection
def Collect_Data():
global is_scanning
"""
This function is executed automatically when the module is run directly.
"""
# Store the channels in a list and convert the list to a channel mask that
# can be passed as a parameter to the MCC 118 functions.
try:
# Select an MCC 118 HAT device to use.
# actual_scan_rate = hat.a_in_scan_actual_rate(num_channels, scan_rate)
# Configure and start the scan.
# Since the continuous option is being used, the samples_per_channel
# parameter is ignored if the value is less than the default internal
# buffer size (10000 * num_channels in this case). If a larger internal
# buffer size is desired, set the value of this parameter accordingly.
if not is_scanning:
hat.a_in_scan_start(channel_mask, samples_per_channel, scan_rate,
options)
is_scanning = True
try:
read_and_display_data(hat, num_channels)
except KeyboardInterrupt:
# Clear the '^C' from the display.
print(CURSOR_BACK_2, ERASE_TO_END_OF_LINE, '\n')
print('Stopping')
hat.a_in_scan_stop()
hat.a_in_scan_cleanup()
except (HatError, ValueError) as err:
print('\n', err)
# reads Data off of Hat and adds to Data1
def read_and_display_data(hat, num_channels):
global channelData, data_index, Datatime, Data1
total_samples_read = 0
read_request_size = READ_ALL_AVAILABLE
# When doing a continuous scan, the timeout value will be ignored in the
# call to a_in_scan_read because we will be requesting that all available
# samples (up to the default buffer size) be returned.
timeout = 5.0
# Read all of the available samples (up to the size of the read_buffer which
# is specified by the user_buffer_size). Since the read_request_size is set
# to -1 (READ_ALL_AVAILABLE), this function returns immediately with
# whatever samples are available (up to user_buffer_size) and the timeout
# parameter is ignored.
trigger = True
while trigger == True:
read_result = hat.a_in_scan_read(read_request_size, timeout)
# Check for an overrun error
if read_result.hardware_overrun:
print('\n\nHardware overrun\n')
break
elif read_result.buffer_overrun:
print('\n\nBuffer overrun\n')
break
samples_read_per_channel = int(len(read_result.data) / num_channels)
total_samples_read += samples_read_per_channel
# adds all data in buffer to data to be plotted.
count = 0
if samples_read_per_channel > 0:
index = samples_read_per_channel * num_channels - num_channels
while count < samples_read_per_channel:
for i in range(num_channels):
channelData[i] = read_result.data[index + i]
if data_index < len(Data1):
Data1[data_index] = channelData[0]
Datatime[data_index] = float(dt * Decimal(data_index))
data_index += 1
count += 1
trigger = False
stdout.flush()
if __name__ == '__main__':
app = QtGui.QApplication([])
win = MainWindow() # display window
plot = win.addPlot(1, 0)
curve = plot.plot()
win.show()
kit = MotorKit() # implements motor driver
kit.motor1.throttle = .4 # values 1 is 5v and 0 is 0 volts
startedTime = time.time()
# u = .2*math.cos(t * 2*np.pi*1)
win.timer.start()
sys.exit(app.exec_())
So I want to get the similarity index between 2D arrays representing 2 singers' voices.
I read my mp3 files with pydub.
AudioFunctions.py
from pydub import AudioSegment
class SongData():
def __init__(self, path):
self.audio = AudioSegment.from_file(path).set_channels(1)
self.rate, self.data = self.audio.frame_rate, np.array(self.audio.get_array_of_samples())
self.length = len(self.data)
self.duration = self.audio.duration_seconds
self.time = np.linspace(0, self.duration, self.length)
self.freq = np.linspace(0, self.rate / 2, int(self.length / 2))
self.fftArray = fft(self.data)
self.fftArrayPositive = self.fftArray[:self.length // 2]
self.fftArrayNegative = np.flip(self.fftArray[self.length // 2:])
self.fftArrayAbs = np.abs(self.fftArray)
self.fftPlotting = self.fftArrayAbs[: self.length // 2]
def song2data(path):
songClass = SongData(path)
return songClass
def getFirstData(songArr, time):
selectedData = songArr[:int(time*44100)]
return selectedData
And this is my code to get the data of 2 songs and get their spectrograms...
main.py
from AudioFunctions import *
from scipy import signal
import matplotlib.pyplot as plt
import librosa
import sklearn
from scipy import spatial
from sklearn.metrics.pairwise import cosine_similarity
songClass1 = song2data("sia1.mp3")
songClass2 = song2data("sia2.mp3")
# print(songClass.data)
# print(songClass.rate)
# print(songClass.duration)
# print(songClass.length)
songArray = getFirstData(songClass1.data, 120)
songArray2 = getFirstData(songClass2.data, 120)
frequencies, times, spectrogram = signal.spectrogram(songArray, 44100)
frequencies2, times2, spectrogram2 = signal.spectrogram(songArray2, 44100)
# print(frequencies)
spec = spectrogram.flatten()
spec2 = spectrogram2.flatten()
result = 1 - spatial.distance.cosine(spec, spec2)
print(result)
The result represents a simliarity index between the 2 voices. However, it gives me low number (0.133) when comparing between 2 songs of the same singer(Sia).
Song 1: https://drive.google.com/file/d/1svV0Ry_lNaEA9Z8c61t3S25XCSgIv6sW/view
Song 2:https://drive.google.com/file/d/1ToKQo2MERBbxZezqDcEtEE2dhgmH-wus/view
Is there any problem in my logic ? Or this result could be logic for some cases?
Thanks in advance
Bit of a necro answer as yes in terms of spectrogram that is a time line of frequencies played by intensity the 2 different songs will be totally different and yes your logic is flawed
I need to do some real time audio signal processing with Python, i.e. analyze the signal in the frequency domain by framing, windowing and computing the FFT, and then apply some filters depending on the analysis results. I've been using PyAudio for audio acquisition and PyQtGraph for waveform and FFT visualization, as suggested in this and this code.
For now my code only detects the N power spectrum bins with the highest value and highlights them by drawing vertical lines on the FFT plot. Here is what is looks like :
import pyaudio
import numpy as np
from scipy.signal import argrelextrema
import pyqtgraph as pg
from pyqtgraph.Qt import QtCore
##Some settings
FORMAT = pyaudio.paFloat32
CHANNELS = 1
FS = 44100
CHUNK = 256
NFFT = 2048
OVERLAP = 0.5
PLOTSIZE = 32*CHUNK
N = 4
freq_range = np.linspace(10, FS/2, NFFT//2 + 1)
df = FS/NFFT
HOP = NFFT*(1-OVERLAP)
##Some preliminary functions
def db_spectrum(data) : #computes positive frequency power spectrum
fft_input = data*np.hanning(NFFT)
spectrum = abs(np.fft.rfft(fft_input))/NFFT
spectrum[1:-1] *= 2
return 20*np.log10(spectrum)
def highest_peaks(spectrum) : #finds peaks (local maxima) and picks the N highest ones
peak_indices = argrelextrema(spectrum, np.greater)[0]
peak_values = spectrum[peak_indices]
highest_peak_indices = np.argpartition(peak_values, -N)[-N:]
return peak_indices[(highest_peak_indices)]
def detection_plot(peaks) : #formats data for vertical line plotting
x = []
y = []
for peak in peaks :
x.append(peak*df)
x.append(peak*df)
y.append(-200)
y.append(0)
return x, y
##Main class containing loop and UI
class SpectrumAnalyzer(pg.GraphicsWindow) :
def __init__(self) :
super().__init__()
self.initUI()
self.initTimer()
self.initData()
self.pa = pyaudio.PyAudio()
self.stream = self.pa.open(format = FORMAT,
channels = CHANNELS,
rate = FS,
input = True,
output = True,
frames_per_buffer = CHUNK)
def initUI(self) :
self.setWindowTitle("Microphone Audio Data")
audio_plot = self.addPlot(title="Waveform")
audio_plot.showGrid(True, True)
audio_plot.addLegend()
audio_plot.setYRange(-1,1)
self.time_curve = audio_plot.plot()
self.nextRow()
fft_plot = self.addPlot(title="FFT")
fft_plot.showGrid(True, True)
fft_plot.addLegend()
fft_plot.setLogMode(True, False)
fft_plot.setYRange(-140,0) #may be adjusted depending on your input
self.fft_curve = fft_plot.plot(pen='y')
self.detection = fft_plot.plot(pen='r')
def initTimer(self) :
self.timer = QtCore.QTimer()
self.timer.timeout.connect(self.update)
self.timer.start(0)
def initData(self) :
self.waveform_data = np.zeros(PLOTSIZE)
self.fft_data = np.zeros(NFFT)
self.fft_counter = 0
def closeEvent(self, event) :
self.timer.stop()
self.stream.stop_stream()
self.stream.close()
self.pa.terminate()
def update(self) :
raw_data = self.stream.read(CHUNK)
self.stream.write(raw_data, CHUNK)
self.fft_counter += CHUNK
sample_data = np.fromstring(raw_data, dtype=np.float32)
self.waveform_data = np.concatenate([self.waveform_data, sample_data]) #update plot data
self.waveform_data = self.waveform_data[CHUNK:] #
self.time_curve.setData(self.waveform_data)
self.fft_data = np.concatenate([self.fft_data, sample_data]) #update fft input
self.fft_data = self.fft_data[CHUNK:] #
if self.fft_counter == HOP :
spectrum = db_spectrum(self.fft_data)
peaks = highest_peaks(spectrum)
x, y = detection_plot(peaks)
self.detection.setData(x, y, connect = 'pairs')
self.fft_curve.setData(freq_range, spectrum)
self.fft_counter = 0
if __name__ == '__main__':
spec = SpectrumAnalyzer()
The code works fine but I still have some questions :
I understand that by calling timer.start() with 0 as an argument, the update method is being called again as soon as possible. How does my script know that the update method needs be called only when the next audio chunk is received and not before ?
In the codes I linked above, the closeEvent method is not modified in order to stop the timer and the stream when closing the PyQtGraph window. What used to happen for me is that even after closing the window, the update method was being called and my audio recorded. Was that normal behavior ?
I've read that when using PyQt GUIs, I should always start by calling a QtGui.QApplication instance and call the exec method. Why is that and why is my code working even though I'm not doing it ?
In the future I will need to implement analysis that is much more demanding than just detecting the N highest peaks. Given the actual structure of my code, if I add such analysis in the update method, I understand that the CPU will have to compute everything before the next audio chunk is received, while it could wait for the next FFT input data to be ready. The hop size being larger than the chunk size, this will give the CPU more time to compute everything. How can I achieve this ? Multi-threading ?
This is my first question on StackOverflow, so here goes:
Edit: I have edited this a few times, just fixing typing mistakes and updating the code. Even after adding various changes to the code, the issue still remains the exact same.
Also, pygame.mixer.music.fadeout() is not what I'm looking for. This code will also be for when I want to lower music volume to perhaps 50% on, say, pausing the game or entering a talk scene.
With Pygame, I am trying to perform music volume manipulation based on how much time has passed. I already have some decent code created, but it's not performing how I thought it intuitively should. Also, I should note that I am using the component-based EBS system I ripped from PySDL2. Here is the link to the EBS module: https://bitbucket.org/marcusva/py-sdl2/src/02a4bc4f79d9440fe98e372e0ffaadacaefaa5c6/sdl2/ext/ebs.py?at=default
This is my initial block of code:
import pygame
from pygame.locals import *
# Setup import paths for module.
pkg_dir = os.path.split(os.path.abspath(__file__))[0]
parent_dir, pkg_name = os.path.split(pkg_dir)
sys.path.insert(0, parent_dir)
sys.path.insert(0, os.path.join(parent_dir, "Game"))
import Game
from Porting.sdl2.ext import ebs
pygame.display.quit()
print("Counting down...")
for n in range(5):
print(str(n + 1))
pygame.time.delay(1000)
appworld = ebs.World()
audio_system = Game.audio.AudioSystem(44100, -16, 2, 4096)
appworld.add_system(audio_system)
test1 = Game.sprites.AudioSprite(appworld)
test2 = Game.sprites.AudioSprite(appworld)
test1.audio = Game.audio.Audio(database["BGMusic0"], True)
test2.audio = Game.audio.Audio(database["BGMusic1"], True)
game_clock = pygame.time.Clock()
volume_change_clock = pygame.time.Clock()
loop = True
time_passed = 0
while loop:
game_clock.tick(60)
appworld.process()
time_passed += volume_change_clock.tick(60)
if time_passed > (10 * 1000):
print(time_passed)
if not audio_system.music_volume_changed:
audio_system.set_music_volume(0, True)
My next block of code:
import pygame
from Porting.sdl2.ext import ebs
class AudioSystem(ebs.System):
def __init__(self, frequency, bit_size, channels, buffer):
super(AudioSystem, self).__init__()
self.componenttypes = Audio,
pygame.mixer.init(frequency, bit_size, channels, buffer)
pygame.mixer.set_num_channels(200)
self.frequency = frequency
self.bit_size = bit_size
self.channels = channels
self.buffer = buffer
self.music_volume_change_clock = None
self.music_volume_changed = False
self.music_volume_current = 0
self.music_volume_new = 0
self.music_fade = False
self.music_change_speed = 0
self.time_passed_total = 0
self.time_passed_remainder = 0
def process(self, world, componentsets):
for audio in componentsets:
if audio.is_music:
music = pygame.mixer.music
if not pygame.mixer.music.get_busy():
music.load(audio.file)
music.play()
if self.music_volume_changed:
self.music_volume_current = music.get_volume() * 100
if self.music_volume_current != self.music_volume_new and self.music_fade:
time_passed = self.music_volume_change_clock.tick(60)
self.time_passed_total += time_passed
self.time_passed_total += self.time_passed_remainder
self.time_passed_remainder = 0
if self.time_passed_total > self.music_change_speed:
self.time_passed_remainder = self.time_passed_total % self.music_change_speed
volume_change_amount = int(self.time_passed_total / self.music_change_speed)
self.time_passed_total = 0
if self.music_volume_current > self.music_volume_new:
self.music_volume_current -= volume_change_amount
music.set_volume(self.music_volume_current / 100)
elif self.music_current_volume < self.music_volume_new:
self.music_volume_current += volume_change_amount
music.set_volume(self.music_volume_current / 100)
elif self.music_volume_current != self.music_volume_new:
music.set_volume(self.music_volume_current / 100)
else:
self.music_volume_changed = False
self.music_fade = False
else:
if not audio.channel:
audio.channel = pygame.mixer.find_channel()
audio.channel.play()
def set_music_volume(self, percent, fade = False, change_speed = 50):
self.music_volume_changed = True
self.music_volume_new = percent
self.music_fade = fade
self.music_change_speed = change_speed
self.music_volume_change_clock = pygame.time.Clock()
class Audio(object):
def __init__(self, file, is_music = False):
self.is_music = is_music
if self.is_music:
self.file = file
else:
self.channel = None
self.file = pygame.mixer.Sound(file)
My testing has shown that manipulating the parameter of Clock.tick() in my Game.audio module in various ways influences how quickly the audio playing falls from 100 to 0. Leaving it blank causes it to stop almost instantaneously. At 60, it falls to 0 in around 2 seconds, which baffles me. At 30, in 1 second. At 5, it falls slowly, with the volume never seeming to reach 0. I want to completely desynchronize my audio volume manipulation completely from my game's frame-rate, but I am unsure of how I would accomplish that. I want to avoid threading and multiprocessing if possible.
Thanks in advance! :)
Clock.tick()'s parameter is used to call the SDL sleep function to limit how many times the loop runs per second.
Calling it with Clock.tick(5) limits it to five loops per second.
I've also never used two clocks in the same code, especially with the multiple ticks (all of which will calculate their sleep time individually). Instead of that, consider using the return value of tick (the time in ms since the last call), and use that to track time through the whole application.
Example:
timer = 0
Do things
timer += main_clock.tick(FPS)