I am trying to calculate the Bispectrum score (BSS) for the audio data frame array; the definition of this feature can be found here:
And the formulas can be found here:
The implementation I am using is:
import numpy as np
import numba as nb
from time import time
def bispectrum(*u, ntheta=None, kmin=None, kmax=None,
diagnostics=True, error=False,
nsamples=None, sample_thresh=None,
compute_fft=True, exclude_upper=False, use_pyfftw=False,
bench=False, progress=False, **kwargs):
shape, ndim = nb.typed.List(u[0].shape), u[0].ndim
ncomp = len(u)
if ncomp not in [1, 3]:
raise ValueError("Pass either 1 scalar field or 3 vector components.")
if ndim not in [2, 3]:
raise ValueError("Data must be 2D or 3D.")
# Geometry of output image
kmax = int(max(shape)/2) if kmax is None else int(kmax)
kmin = 1 if kmin is None else int(kmin)
kn = np.arange(kmin, kmax+1, 1, dtype=int)
dim = kn.size
theta = np.arange(0, np.pi, np.pi/ntheta) if ntheta is not None else None
# ...make costheta monotonically increase
costheta = np.flip(np.cos(theta)) if theta is not None else np.array([1.])
# theta = 0 should be included
if theta is not None:
costheta[-1] += 1e-5
if bench:
t0 = time()
# Get binned radial coordinates of FFT
kv = np.meshgrid(*([np.fft.fftfreq(Ni).astype(np.float32)*Ni
for Ni in shape]), indexing="ij")
kr = np.zeros_like(kv[0])
for i in range(ndim):
kr[...] += kv[i]**2
kr[...] = np.sqrt(kr)
kcoords = nb.typed.List()
for i in range(ndim):
temp = kv[i].astype(np.int16).ravel()
kcoords.append(temp)
del kv, temp
kbins = np.arange(int(np.ceil(kr.max())))
kbinned = (np.digitize(kr, kbins)-1).astype(np.int16)
del kr
# Enumerate indices in each bin
k1bins, k2bins = nb.typed.List(), nb.typed.List()
for ki in kn:
mask = kbinned == ki
temp1 = np.where(mask)
temp2 = np.where(mask[..., :shape[-1]//2+1])
k1bins.append(np.ravel_multi_index(temp1, shape))
k2bins.append(np.ravel_multi_index(temp2, shape))
del kbinned
# FFT
ffts = []
for i in range(ncomp):
if compute_fft:
temp = u[i]
if use_pyfftw:
fft = _fftn(temp, **kwargs)
else:
fft = np.fft.rfftn(temp, **kwargs)
del temp
else:
fft = u[i][..., :shape[-1]//2+1]
ffts.append(fft)
del fft
# Sampling settings
if sample_thresh is None:
sample_thresh = np.iinfo(np.int64).max
if nsamples is None:
nsamples = np.iinfo(np.int64).max
sample_thresh = np.iinfo(np.int64).max
# Sampling mask
if np.issubdtype(type(nsamples), np.integer):
nsamples = np.full((dim, dim), nsamples, dtype=np.int_)
elif np.issubdtype(type(nsamples), np.floating):
nsamples = np.full((dim, dim), nsamples)
elif type(nsamples) is np.ndarray:
if np.issubdtype(nsamples.dtype, np.integer):
nsamples = nsamples.astype(np.int_)
# Run main loop
compute_point = eval(f"_compute_point{ndim}D")
args = (k1bins, k2bins, kn, costheta, kcoords,
nsamples, sample_thresh, ndim, dim, shape,
progress, exclude_upper, error, compute_point, *ffts)
B, norm, omega, counts, stderr = _compute_bispectrum(*args)
# Set zero values to nan values for division
mask = counts == 0.
norm[mask] = np.nan
counts[mask] = np.nan
# Get bicoherence and average bispectrum
b = np.abs(B) / norm
B.real /= counts
B.imag /= counts
# Prepare diagnostics
if error:
stderr[counts <= 1.] = np.nan
# Switch back to theta monotonically increasing
if ntheta is not None:
B[...] = np.flip(B, axis=0)
b[...] = np.flip(b, axis=0)
if diagnostics:
counts[...] = np.flip(counts, axis=0)
if error:
stderr[...] = np.flip(stderr, axis=0)
else:
B, b = B[0], b[0]
if diagnostics:
counts = counts[0]
if error:
stderr = stderr[0]
if bench:
print(f"Time: {time() - t0:.04f} s")
result = [B, b, kn]
if ntheta is not None:
result.append(theta)
if diagnostics:
result.extend([counts, omega])
if error:
result.append(stderr)
return tuple(result)
def _fftn(image, overwrite_input=False, threads=-1, **kwargs):
"""
Calculate N-dimensional fft of image with pyfftw.
See pyfftw.builders.fftn for kwargs documentation.
Parameters
----------
image : np.ndarray
Real or complex-valued 2D or 3D image
overwrite_input : bool, optional
Specify whether input data can be destroyed.
This is useful for reducing memory usage.
See pyfftw.builders.fftn for more.
threads : int, optional
Number of threads for pyfftw to use. Default
is number of cores.
Returns
-------
fft : np.ndarray
The fft. Will be the shape of the input image
or the user specified shape.
"""
import pyfftw
if image.dtype in [np.complex64, np.complex128]:
dtype = 'complex128'
fftn = pyfftw.builders.fftn
elif image.dtype in [np.float32, np.float64]:
dtype = 'float64'
fftn = pyfftw.builders.rfftn
else:
raise ValueError(f"{data.dtype} is unrecognized data type.")
a = pyfftw.empty_aligned(image.shape, dtype=dtype)
f = fftn(a, threads=threads, overwrite_input=overwrite_input, **kwargs)
a[...] = image
fft = f()
del a, fftn
return fft
#nb.njit(parallel=True)
def _compute_bispectrum(k1bins, k2bins, kn, costheta, kcoords, nsamples,
sample_thresh, ndim, dim, shape, progress,
exclude, error, compute_point, *ffts):
knyq = max(shape) // 2
ntheta = costheta.size
nffts = len(ffts)
bispec = np.full((ntheta, dim, dim), np.nan+1.j*np.nan, dtype=np.complex128)
binorm = np.full((ntheta, dim, dim), np.nan, dtype=np.float64)
counts = np.full((ntheta, dim, dim), np.nan, dtype=np.float64)
omega = np.zeros((dim, dim), dtype=np.int64)
if error:
stderr = np.full((ntheta, dim, dim), np.nan, dtype=np.float64)
else:
stderr = np.zeros((1, 1, 1), dtype=np.float64)
for i in range(dim):
k1 = kn[i]
k1ind = k1bins[i]
nk1 = k1ind.size
dim2 = dim if nffts > 1 else i+1
for j in range(dim2):
k2 = kn[j]
if ntheta == 1 and (exclude and k1 + k2 > knyq):
continue
k2ind = k2bins[j]
nk2 = k2ind.size
nsamp = nsamples[i, j]
nsamp = int(nsamp) if type(nsamp) is np.int64 \
else max(int(nsamp*nk1*nk2), 1)
if nsamp < nk1*nk2 or nsamp > sample_thresh:
samp = np.random.randint(0, nk1*nk2, size=nsamp)
count = nsamp
else:
samp = np.arange(nk1*nk2)
count = nk1*nk2
bispecbuf = np.zeros(count, dtype=np.complex128)
binormbuf = np.zeros(count, dtype=np.float64)
cthetabuf = np.zeros(count, dtype=np.float64) if ntheta > 1 \
else np.array([0.], dtype=np.float64)
countbuf = np.zeros(count, dtype=np.float64)
compute_point(k1ind, k2ind, kcoords, ntheta,
nk1, nk2, shape, samp, count,
bispecbuf, binormbuf, cthetabuf, countbuf,
*ffts)
if ntheta == 1:
_fill_sum(i, j, bispec, binorm, counts, stderr,
bispecbuf, binormbuf, countbuf, nffts, error)
else:
binned = np.searchsorted(costheta, cthetabuf)
_fill_binned_sum(i, j, ntheta, binned, bispec, binorm,
counts, stderr, bispecbuf, binormbuf,
countbuf, nffts, error)
omega[i, j] = nk1*nk2
if nffts == 1:
omega[j, i] = nk1*nk2
if progress:
with nb.objmode():
_printProgressBar(i, dim-1)
return bispec, binorm, omega, counts, stderr
#nb.njit(parallel=True, cache=True)
def _fill_sum(i, j, bispec, binorm, counts, stderr,
bispecbuf, binormbuf, countbuf, nffts, error):
N = countbuf.sum()
norm = binormbuf.sum()
value = bispecbuf.sum()
bispec[0, i, j] = value
binorm[0, i, j] = norm
counts[0, i, j] = N
if nffts == 1:
bispec[0, j, i] = value
binorm[0, j, i] = norm
counts[0, j, i] = N
if error and N > 1:
variance = np.abs(bispecbuf - (value / N))**2
err = np.sqrt(variance.sum() / (N*(N - 1)))
stderr[0, i, j] = err
if nffts == 1:
stderr[0, j, i] = err
#nb.njit(parallel=True, cache=True)
def _fill_binned_sum(i, j, ntheta, binned, bispec, binorm, counts,
stderr, bispecbuf, binormbuf, countbuf, nffts, error):
N = np.bincount(binned, weights=countbuf, minlength=ntheta)
norm = np.bincount(binned, weights=binormbuf, minlength=ntheta)
value = np.bincount(binned, weights=bispecbuf.real, minlength=ntheta) +\
1.j*np.bincount(binned, weights=bispecbuf.imag, minlength=ntheta)
bispec[:, i, j] = value
binorm[:, i, j] = norm
counts[:, i, j] = N
if nffts == 1:
bispec[:, j, i] = value
binorm[:, j, i] = norm
counts[:, j, i] = N
if error:
variance = np.zeros_like(countbuf)
for n in range(ntheta):
if N[n] > 1:
idxs = np.where(binned == n)
mean = value[n] / N[n]
variance[idxs] = np.abs(bispecbuf[idxs] - mean)**2 / (N[n]*(N[n]-1))
err = np.sqrt(np.bincount(binned, weights=variance, minlength=ntheta))
stderr[:, i, j] = err
if nffts == 1:
stderr[:, j, i] = err
#nb.njit(parallel=True, cache=True)
def _compute_point3D(k1ind, k2ind, kcoords, ntheta, nk1, nk2, shape,
samp, count, bispecbuf, binormbuf,
cthetabuf, countbuf, *ffts):
kx, ky, kz = kcoords[0], kcoords[1], kcoords[2]
Nx, Ny, Nz = shape[0], shape[1], shape[2]
nffts = len(ffts)
fft1, fft2, fft3 = [ffts[0], ffts[0], ffts[0]] if nffts == 1 else ffts
for idx in nb.prange(count):
n, m = k1ind[samp[idx] % nk1], k2ind[samp[idx] // nk1]
k1x, k1y, k1z = kx[n], ky[n], kz[n]
k2x, k2y, k2z = kx[m], ky[m], kz[m]
k3x, k3y, k3z = k1x+k2x, k1y+k2y, k1z+k2z
if np.abs(k3x) > Nx//2 or np.abs(k3y) > Ny//2 or np.abs(k3z) > Nz//2:
continue
s1 = fft1[k1x, k1y, k1z] if k1z >= 0 \
else np.conj(fft1[-k1x, -k1y, -k1z])
s2 = fft2[k2x, k2y, k2z] if k2z >= 0 \
else np.conj(fft2[-k2x, -k2y, -k2z])
s3 = np.conj(fft3[k3x, k3y, k3z]) if k3z >= 0 \
else fft3[-k3x, -k3y, -k3z]
sample = s1*s2*s3
norm = np.abs(sample)
bispecbuf[idx] = sample
binormbuf[idx] = norm
countbuf[idx] = 1
if ntheta > 1:
k1dotk2 = k1x*k2x+k1y*k2y+k1z*k2z
k1norm, k2norm = np.sqrt(k1x**2+k1y**2+k1z**2), np.sqrt(k2x**2+k2y**2+k2z**2)
costheta = k1dotk2 / (k1norm*k2norm)
cthetabuf[idx] = costheta
#nb.njit(parallel=True, cache=True)
def _compute_point2D(k1ind, k2ind, kcoords, ntheta, nk1, nk2, shape,
samp, count, bispecbuf, binormbuf,
cthetabuf, countbuf, *ffts):
kx, ky = kcoords[0], kcoords[1]
Nx, Ny = shape[0], shape[1]
nffts = len(ffts)
fft1, fft2, fft3 = [ffts[0], ffts[0], ffts[0]] if nffts == 1 else ffts
for idx in nb.prange(count):
n, m = k1ind[samp[idx] % nk1], k2ind[samp[idx] // nk1]
k1x, k1y = kx[n], ky[n]
k2x, k2y = kx[m], ky[m]
k3x, k3y = k1x+k2x, k1y+k2y
if np.abs(k3x) > Nx//2 or np.abs(k3y) > Ny//2:
continue
s1 = fft1[k1x, k1y] if k1y >= 0 else np.conj(fft1[-k1x, -k1y])
s2 = fft2[k2x, k2y] if k2y >= 0 else np.conj(fft2[-k2x, -k2y])
s3 = np.conj(fft3[k3x, k3y]) if k3y >= 0 else fft3[-k3x, -k3y]
sample = s1*s2*s3
norm = np.abs(sample)
bispecbuf[idx] = sample
binormbuf[idx] = norm
countbuf[idx] = 1
if ntheta > 1:
k1dotk2 = k1x*k2x+k1y*k2y
k1norm, k2norm = np.sqrt(k1x**2+k1y**2), np.sqrt(k2x**2+k2y**2)
costheta = k1dotk2 / (k1norm*k2norm)
cthetabuf[idx] = costheta
#nb.jit(forceobj=True, cache=True)
def _printProgressBar(iteration, total, prefix='', suffix='', decimals=1,
length=50, fill='█', printEnd="\r"):
"""
Call in a loop to create terminal progress bar
Adapted from
https://stackoverflow.com/questions/3173320/text-progress-bar-in-the-console
"""
prefix = '(%d/%d)' % (iteration, total) if prefix == '' else prefix
percent = str("%."+str(decimals)+"f") % (100 * (iteration / float(total)))
filledLength = int(length * iteration // total)
bar = fill * filledLength + '-' * (length - filledLength)
prog = '\r%s |%s| %s%s %s' % (prefix, bar, percent, '%', suffix)
print(prog, end=printEnd, flush=True)
if iteration == total:
print()
Is this a correct implementation or is there any better and optimized method to calculate Bispectrum score (BSS) for audio data fragment array?
Thank you!
I want to use the color moment feature extraction, which must get the value (mean, standard deviation and skewness) but the skewness value cannot be entered into the excel because there is a negative division value and the skewness result becomes (Nan), how can you still use it?
<<RuntimeWarning: invalid value encountered in double_scalars
hasil_akhir_skewS (skewnessS/totS)**(1/3)>>
import cv2
import numpy as np
import math
import xlsxwriter as xlsx
book = xlsx.Workbook('data_ayam.xlsx')
sheet = book.add_worksheet()
sheet.write(0, 0, 'file')
column = 1
fitur = ['meanH','meanS','meanV', 'stdevH','stdevS','stdevV', 'skewnessH','skewnessS','skewnessV']
for i in fitur :
sheet.write(0, column, i)
column += 1
ayam_type = ['non_segar', 'segar']
sum_each_type = 100
row = 1
for i in ayam_type:
for j in range(1, sum_each_type+1):
column = 0
file_name = 'ayam/' + i + str(j) + '.jpg'
print(file_name)
sheet.write(row, column, file_name)
column += 1
#preprocessing
img = cv2.imread(file_name, 1)
resized_image = cv2.resize(img, (256, 256))
fitur = cv2.cvtColor(resized_image, cv2.COLOR_BGR2HSV)
H = fitur[:, :, 0]
S = fitur[:, :, 1]
V = fitur[:, :, 2]
totH = H.size
totS = S.size
totV = V.size
totalH = H.sum()
totalS = S.sum()
totalV = V.sum()
#Mean
meanH = totalH/totH #equivalent to file_name.mean()
meanS= totalS/totS #equivalent to file_name.mean()
meanV = totalV/totV #equivalent to file_name.mean()
#Stdev
vrH = ((H - meanH)**2).sum()
vrS = ((S - meanS)**2).sum()
vrV = ((V - meanV)**2).sum()
hasil_akhir_stdH = math.sqrt((vrH/totH))
hasil_akhir_stdS = math.sqrt((vrS/totS))
hasil_akhir_stdV = math.sqrt((vrV/totV))
#Skewness
meanijH = (H - meanH)
meanijS = (S - meanS)
meanijV = (V - meanV)
skewnessH = (meanijH**3).sum()
skewnessS = (meanijS**3).sum()
skewnessV = (meanijV**3).sum()
hasil_akhir_skewH = (skewnessH/totH)**(1/3)
hasil_akhir_skewS = (skewnessS/totS)**(1/3)
hasil_akhir_skewV = (skewnessV/totV)**(1/3)
hasil = [meanH,
meanS,
meanV,
hasil_akhir_stdH,
hasil_akhir_stdS,
hasil_akhir_stdV,
hasil_akhir_skewH,
hasil_akhir_skewS,
hasil_akhir_skewV]
feature_props = {
hasil[0],
hasil[1],
hasil[2],
hasil[3],
hasil[4],
hasil[5],
hasil[6],
hasil[7],
hasil[8]
}
for item in feature_props:
sheet.write(row, column, item)
column += 1
row += 1
book.close()
This is the error:
I could use a second set of eyes on my neural network.
This is the mnist number recognition project.
I'm not sure where the issue is.
I previously implemented the ai with tensor flow successfully.
I'm not looking to use an api as a solution.
I would appreciate any help anyone can give.
Here's the project on github, it's only an init file and then the neural_network.
https://github.com/nealchawn/ai_trial_2
class NeuralNetwork(object):
def __init__(self, sizes):
self.activations = []
self.outputs = []
self.weights = []
self.biases = []
self.sizes = sizes
self.set_random_weights()
self.set_random_biases()
def set_random_weights(self):
for layer_index, layer_size in enumerate(self.sizes[1:], start=1):
layer_weights = []
for size in range(layer_size):
for size in range(self.sizes[layer_index-1]):
layer_weights.append(random.uniform(-5.0, 5.0))
self.weights.append(layer_weights)
def set_random_biases(self):
total_biases = 0
# add extra zero bias to help future indexing
#self.biases.append(0)
for index, size in enumerate(self.sizes[0:-1], start=1):
total_biases += 1
for x in range(total_biases):
self.biases.append(random.uniform(-5.0, 5.0))
def train_network(self, training_data, training_labels):
if len(training_data) != len(training_labels):
print("Error data and labels must be the same length")
data = list(zip(training_data, training_labels))
self.sgd(data)
def sgd(self, data, mini_batch_size = 1000):
# first we'll create batches of training data
n = len(data)
data_batches = [
data[k:k + mini_batch_size]
for k in range(0, n, mini_batch_size)
]
print(len(data_batches))
i = 0
for mini_batch in data_batches:
print("Batch: " + str(i))
i += 1
self.update_mini_batch(mini_batch)
self.network_outputs()
print("Finished All training data!")
def update_mini_batch(self, mini_data_batch):
weight_gradients = []
bias_gradients = []
i = 0
for training_input in mini_data_batch:
training_object, training_label = training_input
self.feedforward(training_object)
weights_gradient, bias_gradient = self.backpropogation(training_label)
weight_gradients.append(weights_gradient)
bias_gradients.append(bias_gradient)
# average gradients
weights_gradient = np.average(weight_gradients,axis=0)
biases_gradient = np.average(bias_gradients, axis=0)
# may need to convert to list
weights_gradient_list = []
for weight_gradient in weights_gradient:
weights_gradient_list.append(weight_gradient.tolist())
#weights_gradient = weights_gradient.tolist()
biases_gradient = biases_gradient.tolist()
for x in range(len(self.biases)):
self.biases[x] -= 0.1*biases_gradient[x]
weight_gradient_index = 0
for layer_index, layer_weights in enumerate(self.weights, start=0):
for weight_index, weight in enumerate(layer_weights):
self.weights[layer_index][weight_index] = weight - 0.1*weights_gradient_list[layer_index][weight_index]
weight_gradient_index += 1
def feedforward(self, training_object):
# set inputs
self.outputs = []
self.activations = []
temp_activations = []
for index in range(self.sizes[0]):
temp_activations.append(training_object[index])
self.activations.append(temp_activations)
for layer_index, layer_size in enumerate(self.sizes[1:], start=0):
layer_weights = self.weights[layer_index]
layer_inputs = self.activations[layer_index]
weight_index = 0
layer_outputs = []
layer_activations = []
for node_index in range(layer_size):
node_weights = []
# get node weights
#print(f"layer size: {layer_size}, previous_layer_size: {self.sizes[layer_index]}, layer weights: {len(layer_weights)}")
for x in range(self.sizes[layer_index]):
node_weights.append(layer_weights[weight_index])
weight_index += 1
output = 0
for indx in range(len(node_weights)):
output += layer_inputs[indx]*node_weights[indx]
output = output + self.biases[layer_index]
layer_outputs.append(output)
layer_activations.append(self.sigmoid(output))
self.outputs.append(layer_outputs)
self.activations.append(layer_activations)
def backpropogation(self, training_label):
costs = []
output_layer_activations = self.activations[-1]
output_layer_outputs = self.outputs[-1]
correct_labels = self.translate_label_to_array(training_label)
costs.append(self.compute_cost_derivative(correct_labels, output_layer_activations))
for cost_index, cost in enumerate(costs[0]):
costs[0][cost_index] = cost*self.sigmoid_prime(output_layer_outputs[cost_index])
# calculate costs for layers
for layer_index, layer_size in enumerate(self.sizes[::-1][1:-1], start=1):
layer_costs = []
layer_weights = self.weights[-layer_index]
layer_outputs = self.outputs[-(layer_index+1)]
previous_layer_costs = costs[layer_index-1]
next_layer_size = self.sizes[::-1][1:][layer_index]
layer_weights_formatted = []
for x in range(layer_size):
layer_weights_formatted.append([])
for weight_index, weight in enumerate(layer_weights, start=0):
#print(f"weight index:{weight_index % next_layer_size} layer_index: {weight_index}")
layer_weights_formatted[weight_index%layer_size].append(layer_weights[weight_index])
#print(f"next_layer_size:{layer_size} costs: {len(previous_layer_costs)}, layer_weights_formatted: {layer_weights_formatted}")
for x in range(layer_size):
node_cost = 0
for y, cost in enumerate(previous_layer_costs,start=0):
node_cost += layer_weights_formatted[x][y]*cost
layer_costs.append(node_cost)
# layer_costs same order as next layer's activations
for cost_index, cost in enumerate(layer_costs):
layer_costs[cost_index] = cost * self.sigmoid_prime(layer_outputs[cost_index])
costs.append(layer_costs)
# calculate weight errors
weight_errors = []
bias_errors = []
for layer_index, layer_costs in enumerate(costs[::-1]):
layer_activations = self.activations[layer_index]
layer_weight_errors = []
for cost_index, cost in enumerate(layer_costs,start=0):
for activation in layer_activations:
layer_weight_errors.append(activation * cost)
weight_errors.append(np.array(layer_weight_errors))
bias_errors.append(sum(layer_costs))
return weight_errors, bias_errors
# conversion tool
def translate_label_to_array(self, y):
translated_label = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
translated_label[y] = 1
return np.array(translated_label)
# output tools
def network_outputs(self):
print("Output layer: ")
for x in range(self.sizes[-1]):
print("node " + str(x) + ": " + str(self.activations[-1][x]))
def total_activations(self):
print(len(self.activations))
def compute_cost_derivative(self, y, output_activations):
"""Return the vector of partial derivatives \partial C_x /
\partial a for the output activations."""
return (output_activations - y)
def sigmoid(self, z):
""""The sigmoid function."""
return (1.0 / (1.0 + np.exp(-z)))
def sigmoid_prime(self, z):
return (self.sigmoid(z) * (1 - self.sigmoid(z)))