I would like to create a zigzag indicator for stocks. I'm working with python and my english is bad so my appologies for that.
I took part of my code from: Pandas: Zigzag segmentation of data based on local minima-maxima
The problem is the zigzag I would like is this (Metastock zigzag indicator):
And my zigzag code looks like(note you can change the percent with a filter):
from pandas_datareader import data
import pandas as pd
from datetime import date
from pandas_datareader.nasdaq_trader import get_nasdaq_symbols
from scipy import signal
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
np.random.seed(0)
def filter(values, percentage):
previous = values[0]
mask = [True]
for value in values[1:]:
relative_difference = np.abs(value - previous)/previous
if relative_difference > percentage:
previous = value
mask.append(True)
else:
mask.append(False)
return mask
def main(stock=None, start_date=None, end_date=None):
df = data.DataReader(
stock,
start=start_date, end=end_date,
data_source='yahoo'
)
return df
if __name__ == '__main__':
today = '{}'.format(date.today())
stocks = ['BLL']
cont = 0
for stock in stocks:
cont += 1
try:
serie = main(stock=stock, start_date='2018-1-1', end_date=today)
serie.insert(loc=0, column='Date', value=serie.index)
serie = serie.reset_index(drop=True)
# Create zigzag trendline.
########################################
# Find peaks(max).
data_x = serie.index.values
data_y = serie['Close'].values
peak_indexes = signal.argrelextrema(data_y, np.greater)
peak_indexes = peak_indexes[0]
# Find valleys(min).
valley_indexes = signal.argrelextrema(data_y, np.less)
valley_indexes = valley_indexes[0]
# Merge peaks and valleys data points using pandas.
df_peaks = pd.DataFrame({'date': data_x[peak_indexes], 'zigzag_y': data_y[peak_indexes]})
df_valleys = pd.DataFrame({'date': data_x[valley_indexes], 'zigzag_y': data_y[valley_indexes]})
df_peaks_valleys = pd.concat([df_peaks, df_valleys], axis=0, ignore_index=True, sort=True)
# Sort peak and valley datapoints by date.
df_peaks_valleys = df_peaks_valleys.sort_values(by=['date'])
p = 0.1 # 20%
filter_mask = filter(df_peaks_valleys.zigzag_y, p)
filtered = df_peaks_valleys[filter_mask]
# Instantiate axes.
(fig, ax) = plt.subplots(figsize=(10,10))
# Plot zigzag trendline.
ax.plot(df_peaks_valleys['date'].values, df_peaks_valleys['zigzag_y'].values,
color='red', label="Extrema")
# Plot zigzag trendline.
ax.plot(filtered['date'].values, filtered['zigzag_y'].values,
color='blue', label="ZigZag")
# Plot original line.
ax.plot(data_x, data_y, linestyle='dashed', color='black', label="Org. line", linewidth=1)
plt.show()
print('{} - {}| success'.format(cont, stock))
except Exception:
print('{} - {}| ERROR'.format(cont, stock))
Here is an example on github:zigzag
cimport cython
import numpy as np
from numpy cimport ndarray, int_t
DEF PEAK = 1
DEF VALLEY = -1
#cython.boundscheck(False)
#cython.wraparound(False)
cpdef int_t identify_initial_pivot(double [:] X,
double up_thresh,
double down_thresh):
cdef:
double x_0 = X[0]
double x_t = x_0
double max_x = x_0
double min_x = x_0
int_t max_t = 0
int_t min_t = 0
up_thresh += 1
down_thresh += 1
for t in range(1, len(X)):
x_t = X[t]
if x_t / min_x >= up_thresh:
return VALLEY if min_t == 0 else PEAK
if x_t / max_x <= down_thresh:
return PEAK if max_t == 0 else VALLEY
if x_t > max_x:
max_x = x_t
max_t = t
if x_t < min_x:
min_x = x_t
min_t = t
t_n = len(X)-1
return VALLEY if x_0 < X[t_n] else PEAK
#cython.boundscheck(False)
#cython.wraparound(False)
cpdef peak_valley_pivots(double [:] X,
double up_thresh,
double down_thresh):
"""
Find the peaks and valleys of a series.
:param X: the series to analyze
:param up_thresh: minimum relative change necessary to define a peak
:param down_thesh: minimum relative change necessary to define a valley
:return: an array with 0 indicating no pivot and -1 and 1 indicating
valley and peak
The First and Last Elements
---------------------------
The first and last elements are guaranteed to be annotated as peak or
valley even if the segments formed do not have the necessary relative
changes. This is a tradeoff between technical correctness and the
propensity to make mistakes in data analysis. The possible mistake is
ignoring data outside the fully realized segments, which may bias
analysis.
"""
if down_thresh > 0:
raise ValueError('The down_thresh must be negative.')
cdef:
int_t initial_pivot = identify_initial_pivot(X,
up_thresh,
down_thresh)
int_t t_n = len(X)
ndarray[int_t, ndim=1] pivots = np.zeros(t_n, dtype=np.int_)
int_t trend = -initial_pivot
int_t last_pivot_t = 0
double last_pivot_x = X[0]
double x, r
pivots[0] = initial_pivot
# Adding one to the relative change thresholds saves operations. Instead
# of computing relative change at each point as x_j / x_i - 1, it is
# computed as x_j / x_1. Then, this value is compared to the threshold + 1.
# This saves (t_n - 1) subtractions.
up_thresh += 1
down_thresh += 1
for t in range(1, t_n):
x = X[t]
r = x / last_pivot_x
if trend == -1:
if r >= up_thresh:
pivots[last_pivot_t] = trend
trend = PEAK
last_pivot_x = x
last_pivot_t = t
elif x < last_pivot_x:
last_pivot_x = x
last_pivot_t = t
else:
if r <= down_thresh:
pivots[last_pivot_t] = trend
trend = VALLEY
last_pivot_x = x
last_pivot_t = t
elif x > last_pivot_x:
last_pivot_x = x
last_pivot_t = t
if last_pivot_t == t_n-1:
pivots[last_pivot_t] = trend
elif pivots[t_n-1] == 0:
pivots[t_n-1] = -trend
return pivots
#cython.boundscheck(False)
#cython.wraparound(False)
cpdef double max_drawdown(ndarray[double, ndim=1] X):
"""
Compute the maximum drawdown of some sequence.
:return: 0 if the sequence is strictly increasing.
otherwise the abs value of the maximum drawdown
of sequence X
"""
cdef:
double mdd = 0
double peak = X[0]
double x, dd
for x in X:
if x > peak:
peak = x
dd = (peak - x) / peak
if dd > mdd:
mdd = dd
return mdd if mdd != 0.0 else 0.0
#cython.boundscheck(False)
#cython.wraparound(False)
def pivots_to_modes(int_t [:] pivots):
"""
Translate pivots into trend modes.
:param pivots: the result of calling ``peak_valley_pivots``
:return: numpy array of trend modes. That is, between (VALLEY, PEAK] it
is 1 and between (PEAK, VALLEY] it is -1.
"""
cdef:
int_t x, t
ndarray[int_t, ndim=1] modes = np.zeros(len(pivots),
dtype=np.int_)
int_t mode = -pivots[0]
modes[0] = pivots[0]
for t in range(1, len(pivots)):
x = pivots[t]
if x != 0:
modes[t] = mode
mode = -x
else:
modes[t] = mode
return modes
def compute_segment_returns(X, pivots):
"""
:return: numpy array of the pivot-to-pivot returns for each segment."""
pivot_points = X[pivots != 0]
return pivot_points[1:] / pivot_points[:-1] - 1.0
Tradingview has an excellent zigzag. You can see the code in Pine (tradingView language) but you can translate it into python.
//#version=5
indicator("Zig Zag", overlay=true, max_lines_count=500, max_labels_count=500)
dev_threshold = input.float(title="Deviation (%)", defval=5.0, minval=0.00001, maxval=100.0)
depth = input.int(title="Depth", defval=10, minval=1)
line_color = input(title="Line Color", defval=#2962FF)
extend_to_last_bar = input(title="Extend to Last Bar", defval=true)
display_reversal_price = input(title="Display Reversal Price", defval=true)
display_cumulative_volume = input(title="Display Cumulative Volume", defval=true)
display_reversal_price_change = input(title="Display Reversal Price Change", defval=true, inline="price rev")
difference_price = input.string("Absolute", "", options=["Absolute", "Percent"], inline="price rev")
pivots(src, length, isHigh) =>
p = nz(src[length])
if length == 0
[time, p]
else
isFound = true
for i = 0 to math.abs(length - 1)
if isHigh and src[i] > p
isFound := false
if not isHigh and src[i] < p
isFound := false
for i = length + 1 to 2 * length
if isHigh and src[i] >= p
isFound := false
if not isHigh and src[i] <= p
isFound := false
if isFound and length * 2 <= bar_index
[time[length], p]
else
[int(na), float(na)]
[iH, pH] = pivots(high, math.floor(depth / 2), true)
[iL, pL] = pivots(low, math.floor(depth / 2), false)
calc_dev(base_price, price) =>
100 * (price - base_price) / base_price
price_rotation_aggregate(price_rotation, pLast, cum_volume) =>
str = ""
if display_reversal_price
str += str.tostring(pLast, format.mintick) + " "
if display_reversal_price_change
str += price_rotation + " "
if display_cumulative_volume
str += "\n" + cum_volume
str
caption(isHigh, iLast, pLast, price_rotation, cum_volume) =>
price_rotation_str = price_rotation_aggregate(price_rotation, pLast, cum_volume)
if display_reversal_price or display_reversal_price_change or display_cumulative_volume
if not isHigh
label.new(iLast, pLast, text=price_rotation_str, style=label.style_none, xloc=xloc.bar_time, yloc=yloc.belowbar, textcolor=color.red)
else
label.new(iLast, pLast, text=price_rotation_str, style=label.style_none, xloc=xloc.bar_time, yloc=yloc.abovebar, textcolor=color.green)
price_rotation_diff(pLast, price) =>
if display_reversal_price_change
tmp_calc = price - pLast
str = difference_price == "Absolute"? (math.sign(tmp_calc) > 0? "+" : "") + str.tostring(tmp_calc, format.mintick) : (math.sign(tmp_calc) > 0? "+" : "-") + str.tostring((math.abs(tmp_calc) * 100)/pLast, format.percent)
str := "(" + str + ")"
str
else
""
var line lineLast = na
var label labelLast = na
var int iLast = 0
var float pLast = 0
var bool isHighLast = true // otherwise the last pivot is a low pivot
var int linesCount = 0
var float sumVol = 0
var float sumVolLast = 0
pivotFound(dev, isHigh, index, price) =>
if isHighLast == isHigh and not na(lineLast)
// same direction
if isHighLast ? price > pLast : price < pLast
if linesCount <= 1
line.set_xy1(lineLast, index, price)
line.set_xy2(lineLast, index, price)
label.set_xy(labelLast, index, price)
label.set_text(labelLast, price_rotation_aggregate(price_rotation_diff(line.get_y1(lineLast), price), price, str.tostring(sumVol + sumVolLast, format.volume)))
[lineLast, labelLast, isHighLast, false, sumVol + sumVolLast]
else
[line(na), label(na), bool(na), false, float(na)]
else // reverse the direction (or create the very first line)
if na(lineLast)
id = line.new(index, price, index, price, xloc=xloc.bar_time, color=line_color, width=2)
lb = caption(isHigh, index, price, price_rotation_diff(pLast, price), str.tostring(sumVol, format.volume))
[id, lb, isHigh, true, sumVol]
else
// price move is significant
if math.abs(dev) >= dev_threshold
id = line.new(iLast, pLast, index, price, xloc=xloc.bar_time, color=line_color, width=2)
lb = caption(isHigh, index, price, price_rotation_diff(pLast, price), str.tostring(sumVol, format.volume))
[id, lb, isHigh, true, sumVol]
else
[line(na), label(na), bool(na), false, float(na)]
sumVol += nz(volume[math.floor(depth / 2)])
if not na(iH) and not na(iL) and iH == iL
dev1 = calc_dev(pLast, pH)
[id2, lb2, isHigh2, isNew2, sum2] = pivotFound(dev1, true, iH, pH)
if isNew2
linesCount := linesCount + 1
if not na(id2)
lineLast := id2
labelLast := lb2
isHighLast := isHigh2
iLast := iH
pLast := pH
sumVolLast := sum2
sumVol := 0
dev2 = calc_dev(pLast, pL)
[id1, lb1, isHigh1, isNew1, sum1] = pivotFound(dev2, false, iL, pL)
if isNew1
linesCount := linesCount + 1
if not na(id1)
lineLast := id1
labelLast := lb1
isHighLast := isHigh1
iLast := iL
pLast := pL
sumVolLast := sum1
sumVol := 0
else
if not na(iH)
dev1 = calc_dev(pLast, pH)
[id, lb, isHigh, isNew, sum] = pivotFound(dev1, true, iH, pH)
if isNew
linesCount := linesCount + 1
if not na(id)
lineLast := id
labelLast := lb
isHighLast := isHigh
iLast := iH
pLast := pH
sumVolLast := sum
sumVol := 0
else
if not na(iL)
dev2 = calc_dev(pLast, pL)
[id, lb, isHigh, isNew, sum] = pivotFound(dev2, false, iL, pL)
if isNew
linesCount := linesCount + 1
if not na(id)
lineLast := id
labelLast := lb
isHighLast := isHigh
iLast := iL
pLast := pL
sumVolLast := sum
sumVol := 0
var line extend_line = na
var label extend_label = na
if extend_to_last_bar == true and barstate.islast == true
isHighLastPoint = not isHighLast
curSeries = isHighLastPoint ? high : low
if na(extend_line) and na(extend_label)
extend_line := line.new(line.get_x2(lineLast), line.get_y2(lineLast), time, curSeries, xloc=xloc.bar_time, color=line_color, width=2)
extend_label := caption(not isHighLast, time, curSeries, price_rotation_diff(line.get_y2(lineLast), curSeries), str.tostring(sumVol, format.volume))
line.set_xy1(extend_line, line.get_x2(lineLast), line.get_y2(lineLast))
line.set_xy2(extend_line, time, curSeries)
price_rotation = price_rotation_diff(line.get_y1(extend_line), curSeries)
remaingRealTimeVol = 0.
for i = math.abs(math.floor(depth / 2) - 1) to 0
remaingRealTimeVol += volume[i]
label.set_xy(extend_label, time, curSeries)
label.set_text(extend_label, price_rotation_aggregate(price_rotation, curSeries, str.tostring(sumVol+remaingRealTimeVol, format.volume)))
label.set_textcolor(extend_label, isHighLastPoint? color.green : color.red)
label.set_yloc(extend_label, yloc= isHighLastPoint? yloc.abovebar : yloc.belowbar)
Related
I am trying to replicate the below R code to estimate parameters using Maximum Likelihood method in Python.
I want to obtain the same results using both the codes, but my estimated values are different, I am not sure if both the codes are optimising the same parameters.
R-Code:
ll <- function (prop, numerator, denominator) {
return(
lgamma(denominator + 1) -
lgamma( numerator + 1) -
lgamma(denominator - numerator + 1) +
numerator * log(prop) + (denominator - numerator) * log(1 - prop)
)
}
compLogLike <- function(pvec){
return(sum(ll(pvec, dat$C, dat$N)))
}
fct_p_ll <- function(a, c0, c1){
xa_ <- exp(c0 + c1*c(20, a))
return(1 - exp((xa_[1]-xa_[2])/c1))
}
fct_ll <- function(x){
pv <- sapply(22.5+5*(0:8), FUN = fct_p_ll, c0 = x[1], c1 = x[2])
return(compLogLike(pv))
}
opt.res <- optim(par = c(-9.2, 0.07), fn = fct_ll, control = list(fnscale = -1.0), hessian = TRUE)
fisherInfo <- solve(-opt.res$hessian)
propSigma <- sqrt(diag(fisherInfo))
upper <- opt.res$par+1.96*propSigma
lower <- opt.res$par-1.96*propSigma
interval <- data.frame(val = opt.res$par, ci.low=lower, ci.up = upper)
Python Code:
def ll(prop, numerator, denominator):
print(prop, numerator, denominator)
if prop > 0:
value = (math.lgamma(denominator + 1) -
math.lgamma( numerator + 1) -
math.lgamma(denominator - numerator + 1) +
numerator * math.log(prop) + (denominator - numerator) * math.log(1 - prop))
return value
return 0
def compLogLike(pvec):
p = list(pvec)
c = list(df["C"])
n = list(df["N"])
compLog = 0
for idx, val in enumerate(p):
compLog += ll(p[idx],c[idx],n[idx])
print(compLog)
return compLog
def fct_p_ll(a,c0,c1):
val_list = [c1 * val for val in [20, a]]
xa_ = np.exp([c0 + val for val in val_list])
return 1 - np.exp((xa_[0] -xa_[1])/c1)
def fct_ll(x):
ages_1 = np.arange(22.5, 67.5 , 5)
pv = [fct_p_ll(a=val,c0=x[0],c1=x[1]) for val in ages_1]
return compLogLike(pv)
opt = minimize(fct_ll, [-9.2, 0.07], method='Nelder-Mead', hess=Hessian(lambda x: fct_ll(x,a)))
Any inputs would be really helpful.
I use a jupyter notebook to do the filter.This is my code:
import os
import sys
import itertools
from sklearn.metrics import confusion_matrix,roc_curve,auc
import random
import numpy as np
import pandas as pd
import math
import glob
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from collections import defaultdict
from scipy.optimize import
#### INPUTS
path_to_predictor_runs_sc = r"C:\Users\z'y\Desktop\paper_predictor_combined.sc"
path_to_pilot_runs_sc = r"C:\Users\z'y\Desktop\paper_interface_design_pilot_combined.sc"
predictor_df = pd.read_csv(path_to_predictor_runs_sc, sep="\s+")
pilot_df = pd.read_csv(path_to_pilot_runs_sc, sep="\s+")
predictor_df = predictor_df[predictor_df['ddg_norepack'] <= 20]
predictor_df = predictor_df[predictor_df['score_per_res'] <= 0]
pilot_df = pilot_df[pilot_df['ddg'] <= 20]
pilot_df = pilot_df[pilot_df['score_per_res'] <= 0]
def suffix_all_columns(df, suffix):
cols = list(df.columns)
for i in range(len(cols)):
cols[i] = cols[i] + suffix
df.columns = cols
suffix_all_columns(predictor_df, "_pred")
predictor_df['description'] = predictor_df['description_pred']
pilot_df = pilot_df.merge(predictor_df, 'inner', 'description')
print("Length of predictor dataframe: ", len(predictor_df))
print("Length of pilot dataframe: ", len(pilot_df))
# The format here is cut value, higher better, name in predictor, is integer score-term
terms_and_cuts = {
# 'interface_buried_sasa': [1400, True, "interface_buried_sasa_pred", False],
'ddg': [-30, False, "ddg_norepack_pred", False],
'score_per_res' : [-2.4, False, "score_per_res_pred", False],
'mismatch_probability': [0.2, False, "mismatch_probability_pred", False],
'contact_molecular_surface':[450, True,"contact_molecular_surface_pred", False],
'contact_patch': [125, True, "contact_patch_pred", False]
}
score_df = pilot_df
# Filter all the terms and print the thresholds
ok_terms = []
for pilot_term in terms_and_cuts:
cut, good_high, term, is_int = terms_and_cuts[pilot_term]
ok_term = pilot_term.replace("_pilot", "") + "_ok"
if ( good_high ):
score_df[ok_term] = score_df[pilot_term] >= cut
else:
score_df[ok_term] = score_df[pilot_term] <= cut
ok_terms.append(ok_term)
print("%30s: %6.2f"%(pilot_term, cut))
# Print the pass rates for each term
print("")
score_df['orderable'] = True
for ok_term in ok_terms:
score_df['orderable'] &= score_df[ok_term]
print("%30s: %5.0f%% pass-rate"%(ok_term.replace("_ok", ""), score_df[ok_term].sum() / len(score_df) * 100))
# print the overall pass rate
subdf = score_df
print("Orderable: %i -- %.2f%%"%(subdf['orderable'].sum(), (100*subdf['orderable'].sum() / len(subdf))))
# Setup the Maximum Likelihood functions
eqs = []
def prob_of_1term(value, zdf, pilot_term, terms_and_cuts):
cut, good_high, term, is_int = terms_and_cuts[pilot_term]
cut_div = cut + 0.01
if (is_int):
cut_div = 1
# print(value)
representatives = zdf[abs( (zdf[term] - value) / cut_div ) < 0.02]
if ( good_high ):
fail = (representatives[pilot_term] < cut).sum()
ok = (representatives[pilot_term] >= cut).sum()
else:
fail = (representatives[pilot_term] > cut).sum()
ok = (representatives[pilot_term] <= cut).sum()
if (fail + ok < 5):
return np.nan, 1
return ok / (fail + ok), fail + ok
def get_low_high_bin_size_low_bin_num_bins(dfz, pilot_term, terms_and_cuts):
cut, good_high, term, is_int = terms_and_cuts[pilot_term]
cut_div = cut + 0.01
low = dfz[term].min()
high = dfz[term].max()
if (abs( (low - cut) / cut_div ) > 1000 ):
print("Crazy value!!!", pilot_term, low, high)
assert(False)
bin_size = abs(cut_div * 0.02)
if ( is_int ):
bin_size = 1
low_bin = math.floor( low / bin_size )
num_bins = math.floor( high / bin_size ) - low_bin + 1
return low, high, bin_size, low_bin, num_bins
# Find the index of xs that maximumizes the following equation
# np.sum( xs[:i] * flip < divisor ) + np.sum( xs[i:] * flip > divisor )
def find_ind_that_divides( divisor, array, flip ):
best_sum = 0
best_i = None
for i in range(len(array)):
value = np.sum( array[:i] * flip < flip * divisor ) + np.sum( array[i:] * flip > flip * divisor )
if ( value > best_sum ):
best_sum = value
best_i = i
return best_i
def sigmoid(x, a, b):
return 1 / ( 1 + np.exp( -( x - a) * b ) )
def smooth_empty_prob_array(arr, good_high, counts, graphs=False, low=0, bin_size=0, gd=None, term=""):
counts = list(counts)
x = list(range(0, len(arr)))
to_remove = []
for i in range(len(arr)):
if (math.isnan(arr[i])):
to_remove.append(i)
arr_copy = list(arr)
for i in reversed(to_remove):
x.pop(i)
arr_copy.pop(i)
counts.pop(i)
arr_copy = np.array(arr_copy)
# print(good_high)
# We're trying to fit a sigmoid here. I've found that while the
# function will often converge with a nonsense starting point
# if you want it to be robust, you need to calculate the parameters
# by hand first
# The xguess is where the sigmoid crosses 0.5
flip = 1 if good_high else -1
never_high = max(arr_copy) < 0.5
never_low = min(arr_copy) > 0.5
# Your data is totally garbage
if (never_high and never_low):
xguess = x[int(len(x)/2)]
# Your data is all below 0.5, assign xguess to edge
elif ( never_high ):
if ( good_high ):
xguess = x[-1]
else:
xguess = x[0]
# Your data is all above 0.5, assign xguess to edge
elif (never_low):
if ( good_high ):
xguess = x[0]
else:
xguess = x[-1]
else:
# here we have full range data
# pick x that maximizes the following function
# np.sum( arr_copy[:x] < 0.5 ) + np.sum( arr_copy[x:] > 0.5 )
best_ix = find_ind_that_divides(0.5, arr_copy, flip)
xguess = x[best_ix]
# Ok, now let's work on the slope guess
# Formula is: guess = ln( 1 / y - 1) / ( xdist from 0.5 to y)
# We'll use y = 0.2 and 0.8
never_high = max(arr_copy) < 0.8
never_low = min(arr_copy) > 0.2
# Data never goes above 0.8, assign xvalue to edge
if ( never_high ):
if ( good_high ):
ub = x[-1]
else:
lb = x[0]
else:
# Find xvalue that corresponds to graph crossing 0.8
best_ix = find_ind_that_divides(0.8, arr_copy, flip)
if ( good_high ):
ub = x[best_ix]
else:
lb = x[best_ix]
# Data never goes below 0.2, assign xvalue to edge
if ( never_low ):
if ( good_high ):
lb = x[0]
else:
ub = x[-1]
else:
# Find xvalue that corresponds to graph crossing 0.2
best_ix = find_ind_that_divides(0.2, arr_copy, flip)
if ( good_high ):
lb = x[best_ix]
else:
ub = x[best_ix]
# One side of the data is bad, just use the other side
if ( ub <= xguess ):
ub = xguess - lb + xguess
if ( lb >= xguess ):
lb = xguess - ( ub - xguess )
# The data is really bad here, just assign the ub and lb to the edges
if ( ub == lb ):
lb = x[0]
ub = x[-1]
# Average our two distances
critical_dist = (( ub - xguess ) + (xguess - lb )) / 2
# Find slope guess
slope_guess = np.abs( np.log( 1 / 0.2 - 1) / critical_dist ) * flip
# Curve fit
popt, pcov = curve_fit( sigmoid, x, arr_copy, p0=(xguess, slope_guess), maxfev=100000, sigma=1/np.sqrt(counts) )
# Uncomment this if you're debugging the guesses (They do really well tbh)
# popt = (xguess, slope_guess)
# Our new fitted data
arr2 = sigmoid(np.arange(0, len(arr), 1), popt[0], popt[1])
a_prime = popt[0]*bin_size+low
b_prime = popt[1]/bin_size
global eqs
eqs.append( " 1 / ( 1 + EXP( -( %s - %.5f ) * %.5f ) ) "%(term[:-5], a_prime, b_prime))
if (graphs):
plt.figure(figsize=(5,3))
sns.set(font_scale=1)
plt.plot(np.arange(0, len(arr), 1)*bin_size+low, arr)
plt.plot(np.arange(0, len(arr), 1)*bin_size+low, arr2)
if (gd):
plt.xlim([gd[0], gd[1]])
plt.xlabel(gd[2])
plt.axvline(gd[3], color='r')
plt.ylabel("P( passing filter )")
sns.set(font_scale=1.8)
plt.show()
for i in range(len(arr2)):
arr[i] = arr2[i]
def create_prob_array(low, high, low_bin, num_bins, bin_size, pilot_term, dfz, terms_and_cuts, graphs=False):
cut, good_high, term, is_int = terms_and_cuts[pilot_term]
arr = np.zeros(num_bins)
for i in range(len(arr)):
arr[i] = np.nan
counts = np.zeros(num_bins)
counts.fill(1)
print("%s from %.3f to %.3f"%(pilot_term, low, high))
for val in np.arange(low, high + bin_size, bin_size/2):
binn = math.floor( val / bin_size ) - low_bin
if (binn >= len(arr)):
continue
if (is_int):
val = round(val, 1)
# print(val)
prob, count = prob_of_1term(val, dfz, pilot_term, terms_and_cuts)
counts[binn] = count + 1
if ( math.isnan(prob)):
pass
else:
arr[binn] = prob
gd = None
try:
gd = graph_data[pilot_term]
except:
pass
smooth_empty_prob_array(arr, good_high, counts, graphs, low, bin_size, gd, term)
return arr
def apply_prob_arrays(dfz, prob_arrays, prob_name):
prob_terms = []
for term in prob_arrays:
print(term)
arr, bin_size, low_bin = prob_arrays[term]
prob_term = term + "_prob"
idx = (np.floor(dfz[term] / bin_size) - low_bin).astype("int")
is_low = (idx < 0)
is_high = (idx >= len(arr) ).sum()
low = np.min(idx)
high = np.max(idx)
# if (is_low.sum() > 0 or is_high.sum() > 0):
# print("Warning: bounds overshoot on %s [%.3f, %.3f]"%
# (term, low_bin * bin_size, (low_bin + len(arr)) * bin_size))
# print("Below: %i Below_median: %.3f Below_max: %.3f Above: %i Above_median: %.3f Above_max: %.3f"%(
# is_low.sum(), (0 - np.median(idx[is_low]))*bin_size, (0 - low)*bin_size,
# is_high.sum(), (np.median(idx[is_high]) - len(arr))*bin_size, (high-len(arr))*bin_size
# ))
idx = np.clip(idx, 0, len(arr)-1)
dfz[prob_term] = arr[ idx ]
prob_terms.append(prob_term)
dfz[prob_name] = 1
for prob_term in prob_terms:
dfz[prob_name] *= dfz[prob_term]
def train_and_predict_mle(df, all_indices, test_indices, terms_and_cuts, prob_name, whole_df, graphs=False):
use_indices = list(set(all_indices) - set(test_indices))
test_df = df.iloc[test_indices].copy(True)
use_df = df.iloc[use_indices].copy(True)
prob_arrays = {}
for pilot_term in terms_and_cuts:
cut, good_high, term, is_int = terms_and_cuts[pilot_term]
low, high, bin_size, low_bin, num_bins = get_low_high_bin_size_low_bin_num_bins(whole_df, pilot_term, terms_and_cuts)
# print(len(use_df))
prob_array = create_prob_array(low, high, low_bin, num_bins, bin_size, pilot_term, use_df, terms_and_cuts, graphs)
prob_arrays[term] = (prob_array, bin_size, low_bin)
apply_prob_arrays(test_df, prob_arrays, prob_name)
return test_df[[prob_name, 'description']], prob_arrays
# This code is a little out-dated, but it still works
# We have the test_indices set to 0.1% because the curve fitting removes memorization
train_df = pilot_df.copy(True)
all_indices = list(range(len(train_df)))
test_indices = random.sample(range(len(train_df)), int(len(train_df)*0.001))
# This sets up maximum likihood method
not_used, prob_arrays = train_and_predict_mle(train_df, all_indices, test_indices, terms_and_cuts, "predict", predictor_df, True)
print("")
print('-predictor_filters ' + ",".join( terms_and_cuts[x][2][:-5] for x in list(terms_and_cuts)))
print('-equation="-' + "*".join(eqs) + '"')
when I print the sigmoid graphs,I geting the following error:
KeyError Traceback (most recent call last)
~/eman2-sphire-sparx/lib/python3.7/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
3079 try:
-> 3080 return self._engine.get_loc(casted_key)
3081 except KeyError as err:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'mismatch_probability_pred'
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
<ipython-input-28-dc898cc73b6f> in <module>
7
8 # This sets up maximum likihood method
----> 9 not_used, prob_arrays = train_and_predict_mle(train_df, all_indices, test_indices, terms_and_cuts, "predict", predictor_df, True)
10
11 print("")
<ipython-input-27-26f3827163d5> in train_and_predict_mle(df, all_indices, test_indices, terms_and_cuts, prob_name, whole_df, graphs)
275 for pilot_term in terms_and_cuts:
276 cut, good_high, term, is_int = terms_and_cuts[pilot_term]
--> 277 low, high, bin_size, low_bin, num_bins = get_low_high_bin_size_low_bin_num_bins(whole_df, pilot_term, terms_and_cuts)
278
279 # print(len(use_df))
<ipython-input-27-26f3827163d5> in get_low_high_bin_size_low_bin_num_bins(dfz, pilot_term, terms_and_cuts)
28 cut_div = cut + 0.01
29
---> 30 low = dfz[term].min()
31 high = dfz[term].max()
32 if (abs( (low - cut) / cut_div ) > 1000 ):
~/eman2-sphire-sparx/lib/python3.7/site-packages/pandas/core/frame.py in __getitem__(self, key)
3022 if self.columns.nlevels > 1:
3023 return self._getitem_multilevel(key)
-> 3024 indexer = self.columns.get_loc(key)
3025 if is_integer(indexer):
3026 indexer = [indexer]
~/eman2-sphire-sparx/lib/python3.7/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
3080 return self._engine.get_loc(casted_key)
3081 except KeyError as err:
-> 3082 raise KeyError(key) from err
3083
3084 if tolerance is not None:
KeyError: 'mismatch_probability_pred'
Why does this happen and can anyone tell me how to solve this problem?
The code is solving an integral using the trapezium rule. I need to set limits for QH so if QH > 1 then QH = 1. I cant seem to get what I've done below to work properly.
## Solve ODE
QH = odeint(model, QH0, z, atol = 1.0e-8, rtol = 1.0e-8)
QHe = odeint(model1, QHe0, z, atol = 1.0e-8, rtol = 1.8e-8)
if QH > 1:
QH == 1
if QHe > 1:
QHe == 1
#Solving Thomson Optical Depth Integral for Hydrogen
def f_hydrogen(z_in):
Hz = H0*math.sqrt(OMEGAm*((1+z_in)**3)+OMEGAlam)
flatQH = QH.flatten()
QH_int = np.interp(z_in, z[::-1], flatQH[::-1])
return QH_int*(((1+z_in)**2)/Hz)
a = 0
z1 = 7
n = 1000
hei = (z1-a)/n
k = 0
#sum = 0
sum = np.zeros(n+1)
while (k<n):
x_in = a + (k*hei)
if k < n-1 :
sum[k + 1] = sum[k] + f_hydrogen(x_in)
k = k + 1
int_a = (hei/2)*((f_hydrogen(a) + f_hydrogen(z1)) + (2*sum))
tH = (c)*(sigma)*(nbarH)*(1+(y/(4*x)))*(int_a)
for index, val in enumerate(tH):
print("Thomson Optical Depth - Hydrogen = ", index, val)
I am trying to calculate the Bispectrum score (BSS) for the audio data frame array; the definition of this feature can be found here:
And the formulas can be found here:
The implementation I am using is:
import numpy as np
import numba as nb
from time import time
def bispectrum(*u, ntheta=None, kmin=None, kmax=None,
diagnostics=True, error=False,
nsamples=None, sample_thresh=None,
compute_fft=True, exclude_upper=False, use_pyfftw=False,
bench=False, progress=False, **kwargs):
shape, ndim = nb.typed.List(u[0].shape), u[0].ndim
ncomp = len(u)
if ncomp not in [1, 3]:
raise ValueError("Pass either 1 scalar field or 3 vector components.")
if ndim not in [2, 3]:
raise ValueError("Data must be 2D or 3D.")
# Geometry of output image
kmax = int(max(shape)/2) if kmax is None else int(kmax)
kmin = 1 if kmin is None else int(kmin)
kn = np.arange(kmin, kmax+1, 1, dtype=int)
dim = kn.size
theta = np.arange(0, np.pi, np.pi/ntheta) if ntheta is not None else None
# ...make costheta monotonically increase
costheta = np.flip(np.cos(theta)) if theta is not None else np.array([1.])
# theta = 0 should be included
if theta is not None:
costheta[-1] += 1e-5
if bench:
t0 = time()
# Get binned radial coordinates of FFT
kv = np.meshgrid(*([np.fft.fftfreq(Ni).astype(np.float32)*Ni
for Ni in shape]), indexing="ij")
kr = np.zeros_like(kv[0])
for i in range(ndim):
kr[...] += kv[i]**2
kr[...] = np.sqrt(kr)
kcoords = nb.typed.List()
for i in range(ndim):
temp = kv[i].astype(np.int16).ravel()
kcoords.append(temp)
del kv, temp
kbins = np.arange(int(np.ceil(kr.max())))
kbinned = (np.digitize(kr, kbins)-1).astype(np.int16)
del kr
# Enumerate indices in each bin
k1bins, k2bins = nb.typed.List(), nb.typed.List()
for ki in kn:
mask = kbinned == ki
temp1 = np.where(mask)
temp2 = np.where(mask[..., :shape[-1]//2+1])
k1bins.append(np.ravel_multi_index(temp1, shape))
k2bins.append(np.ravel_multi_index(temp2, shape))
del kbinned
# FFT
ffts = []
for i in range(ncomp):
if compute_fft:
temp = u[i]
if use_pyfftw:
fft = _fftn(temp, **kwargs)
else:
fft = np.fft.rfftn(temp, **kwargs)
del temp
else:
fft = u[i][..., :shape[-1]//2+1]
ffts.append(fft)
del fft
# Sampling settings
if sample_thresh is None:
sample_thresh = np.iinfo(np.int64).max
if nsamples is None:
nsamples = np.iinfo(np.int64).max
sample_thresh = np.iinfo(np.int64).max
# Sampling mask
if np.issubdtype(type(nsamples), np.integer):
nsamples = np.full((dim, dim), nsamples, dtype=np.int_)
elif np.issubdtype(type(nsamples), np.floating):
nsamples = np.full((dim, dim), nsamples)
elif type(nsamples) is np.ndarray:
if np.issubdtype(nsamples.dtype, np.integer):
nsamples = nsamples.astype(np.int_)
# Run main loop
compute_point = eval(f"_compute_point{ndim}D")
args = (k1bins, k2bins, kn, costheta, kcoords,
nsamples, sample_thresh, ndim, dim, shape,
progress, exclude_upper, error, compute_point, *ffts)
B, norm, omega, counts, stderr = _compute_bispectrum(*args)
# Set zero values to nan values for division
mask = counts == 0.
norm[mask] = np.nan
counts[mask] = np.nan
# Get bicoherence and average bispectrum
b = np.abs(B) / norm
B.real /= counts
B.imag /= counts
# Prepare diagnostics
if error:
stderr[counts <= 1.] = np.nan
# Switch back to theta monotonically increasing
if ntheta is not None:
B[...] = np.flip(B, axis=0)
b[...] = np.flip(b, axis=0)
if diagnostics:
counts[...] = np.flip(counts, axis=0)
if error:
stderr[...] = np.flip(stderr, axis=0)
else:
B, b = B[0], b[0]
if diagnostics:
counts = counts[0]
if error:
stderr = stderr[0]
if bench:
print(f"Time: {time() - t0:.04f} s")
result = [B, b, kn]
if ntheta is not None:
result.append(theta)
if diagnostics:
result.extend([counts, omega])
if error:
result.append(stderr)
return tuple(result)
def _fftn(image, overwrite_input=False, threads=-1, **kwargs):
"""
Calculate N-dimensional fft of image with pyfftw.
See pyfftw.builders.fftn for kwargs documentation.
Parameters
----------
image : np.ndarray
Real or complex-valued 2D or 3D image
overwrite_input : bool, optional
Specify whether input data can be destroyed.
This is useful for reducing memory usage.
See pyfftw.builders.fftn for more.
threads : int, optional
Number of threads for pyfftw to use. Default
is number of cores.
Returns
-------
fft : np.ndarray
The fft. Will be the shape of the input image
or the user specified shape.
"""
import pyfftw
if image.dtype in [np.complex64, np.complex128]:
dtype = 'complex128'
fftn = pyfftw.builders.fftn
elif image.dtype in [np.float32, np.float64]:
dtype = 'float64'
fftn = pyfftw.builders.rfftn
else:
raise ValueError(f"{data.dtype} is unrecognized data type.")
a = pyfftw.empty_aligned(image.shape, dtype=dtype)
f = fftn(a, threads=threads, overwrite_input=overwrite_input, **kwargs)
a[...] = image
fft = f()
del a, fftn
return fft
#nb.njit(parallel=True)
def _compute_bispectrum(k1bins, k2bins, kn, costheta, kcoords, nsamples,
sample_thresh, ndim, dim, shape, progress,
exclude, error, compute_point, *ffts):
knyq = max(shape) // 2
ntheta = costheta.size
nffts = len(ffts)
bispec = np.full((ntheta, dim, dim), np.nan+1.j*np.nan, dtype=np.complex128)
binorm = np.full((ntheta, dim, dim), np.nan, dtype=np.float64)
counts = np.full((ntheta, dim, dim), np.nan, dtype=np.float64)
omega = np.zeros((dim, dim), dtype=np.int64)
if error:
stderr = np.full((ntheta, dim, dim), np.nan, dtype=np.float64)
else:
stderr = np.zeros((1, 1, 1), dtype=np.float64)
for i in range(dim):
k1 = kn[i]
k1ind = k1bins[i]
nk1 = k1ind.size
dim2 = dim if nffts > 1 else i+1
for j in range(dim2):
k2 = kn[j]
if ntheta == 1 and (exclude and k1 + k2 > knyq):
continue
k2ind = k2bins[j]
nk2 = k2ind.size
nsamp = nsamples[i, j]
nsamp = int(nsamp) if type(nsamp) is np.int64 \
else max(int(nsamp*nk1*nk2), 1)
if nsamp < nk1*nk2 or nsamp > sample_thresh:
samp = np.random.randint(0, nk1*nk2, size=nsamp)
count = nsamp
else:
samp = np.arange(nk1*nk2)
count = nk1*nk2
bispecbuf = np.zeros(count, dtype=np.complex128)
binormbuf = np.zeros(count, dtype=np.float64)
cthetabuf = np.zeros(count, dtype=np.float64) if ntheta > 1 \
else np.array([0.], dtype=np.float64)
countbuf = np.zeros(count, dtype=np.float64)
compute_point(k1ind, k2ind, kcoords, ntheta,
nk1, nk2, shape, samp, count,
bispecbuf, binormbuf, cthetabuf, countbuf,
*ffts)
if ntheta == 1:
_fill_sum(i, j, bispec, binorm, counts, stderr,
bispecbuf, binormbuf, countbuf, nffts, error)
else:
binned = np.searchsorted(costheta, cthetabuf)
_fill_binned_sum(i, j, ntheta, binned, bispec, binorm,
counts, stderr, bispecbuf, binormbuf,
countbuf, nffts, error)
omega[i, j] = nk1*nk2
if nffts == 1:
omega[j, i] = nk1*nk2
if progress:
with nb.objmode():
_printProgressBar(i, dim-1)
return bispec, binorm, omega, counts, stderr
#nb.njit(parallel=True, cache=True)
def _fill_sum(i, j, bispec, binorm, counts, stderr,
bispecbuf, binormbuf, countbuf, nffts, error):
N = countbuf.sum()
norm = binormbuf.sum()
value = bispecbuf.sum()
bispec[0, i, j] = value
binorm[0, i, j] = norm
counts[0, i, j] = N
if nffts == 1:
bispec[0, j, i] = value
binorm[0, j, i] = norm
counts[0, j, i] = N
if error and N > 1:
variance = np.abs(bispecbuf - (value / N))**2
err = np.sqrt(variance.sum() / (N*(N - 1)))
stderr[0, i, j] = err
if nffts == 1:
stderr[0, j, i] = err
#nb.njit(parallel=True, cache=True)
def _fill_binned_sum(i, j, ntheta, binned, bispec, binorm, counts,
stderr, bispecbuf, binormbuf, countbuf, nffts, error):
N = np.bincount(binned, weights=countbuf, minlength=ntheta)
norm = np.bincount(binned, weights=binormbuf, minlength=ntheta)
value = np.bincount(binned, weights=bispecbuf.real, minlength=ntheta) +\
1.j*np.bincount(binned, weights=bispecbuf.imag, minlength=ntheta)
bispec[:, i, j] = value
binorm[:, i, j] = norm
counts[:, i, j] = N
if nffts == 1:
bispec[:, j, i] = value
binorm[:, j, i] = norm
counts[:, j, i] = N
if error:
variance = np.zeros_like(countbuf)
for n in range(ntheta):
if N[n] > 1:
idxs = np.where(binned == n)
mean = value[n] / N[n]
variance[idxs] = np.abs(bispecbuf[idxs] - mean)**2 / (N[n]*(N[n]-1))
err = np.sqrt(np.bincount(binned, weights=variance, minlength=ntheta))
stderr[:, i, j] = err
if nffts == 1:
stderr[:, j, i] = err
#nb.njit(parallel=True, cache=True)
def _compute_point3D(k1ind, k2ind, kcoords, ntheta, nk1, nk2, shape,
samp, count, bispecbuf, binormbuf,
cthetabuf, countbuf, *ffts):
kx, ky, kz = kcoords[0], kcoords[1], kcoords[2]
Nx, Ny, Nz = shape[0], shape[1], shape[2]
nffts = len(ffts)
fft1, fft2, fft3 = [ffts[0], ffts[0], ffts[0]] if nffts == 1 else ffts
for idx in nb.prange(count):
n, m = k1ind[samp[idx] % nk1], k2ind[samp[idx] // nk1]
k1x, k1y, k1z = kx[n], ky[n], kz[n]
k2x, k2y, k2z = kx[m], ky[m], kz[m]
k3x, k3y, k3z = k1x+k2x, k1y+k2y, k1z+k2z
if np.abs(k3x) > Nx//2 or np.abs(k3y) > Ny//2 or np.abs(k3z) > Nz//2:
continue
s1 = fft1[k1x, k1y, k1z] if k1z >= 0 \
else np.conj(fft1[-k1x, -k1y, -k1z])
s2 = fft2[k2x, k2y, k2z] if k2z >= 0 \
else np.conj(fft2[-k2x, -k2y, -k2z])
s3 = np.conj(fft3[k3x, k3y, k3z]) if k3z >= 0 \
else fft3[-k3x, -k3y, -k3z]
sample = s1*s2*s3
norm = np.abs(sample)
bispecbuf[idx] = sample
binormbuf[idx] = norm
countbuf[idx] = 1
if ntheta > 1:
k1dotk2 = k1x*k2x+k1y*k2y+k1z*k2z
k1norm, k2norm = np.sqrt(k1x**2+k1y**2+k1z**2), np.sqrt(k2x**2+k2y**2+k2z**2)
costheta = k1dotk2 / (k1norm*k2norm)
cthetabuf[idx] = costheta
#nb.njit(parallel=True, cache=True)
def _compute_point2D(k1ind, k2ind, kcoords, ntheta, nk1, nk2, shape,
samp, count, bispecbuf, binormbuf,
cthetabuf, countbuf, *ffts):
kx, ky = kcoords[0], kcoords[1]
Nx, Ny = shape[0], shape[1]
nffts = len(ffts)
fft1, fft2, fft3 = [ffts[0], ffts[0], ffts[0]] if nffts == 1 else ffts
for idx in nb.prange(count):
n, m = k1ind[samp[idx] % nk1], k2ind[samp[idx] // nk1]
k1x, k1y = kx[n], ky[n]
k2x, k2y = kx[m], ky[m]
k3x, k3y = k1x+k2x, k1y+k2y
if np.abs(k3x) > Nx//2 or np.abs(k3y) > Ny//2:
continue
s1 = fft1[k1x, k1y] if k1y >= 0 else np.conj(fft1[-k1x, -k1y])
s2 = fft2[k2x, k2y] if k2y >= 0 else np.conj(fft2[-k2x, -k2y])
s3 = np.conj(fft3[k3x, k3y]) if k3y >= 0 else fft3[-k3x, -k3y]
sample = s1*s2*s3
norm = np.abs(sample)
bispecbuf[idx] = sample
binormbuf[idx] = norm
countbuf[idx] = 1
if ntheta > 1:
k1dotk2 = k1x*k2x+k1y*k2y
k1norm, k2norm = np.sqrt(k1x**2+k1y**2), np.sqrt(k2x**2+k2y**2)
costheta = k1dotk2 / (k1norm*k2norm)
cthetabuf[idx] = costheta
#nb.jit(forceobj=True, cache=True)
def _printProgressBar(iteration, total, prefix='', suffix='', decimals=1,
length=50, fill='█', printEnd="\r"):
"""
Call in a loop to create terminal progress bar
Adapted from
https://stackoverflow.com/questions/3173320/text-progress-bar-in-the-console
"""
prefix = '(%d/%d)' % (iteration, total) if prefix == '' else prefix
percent = str("%."+str(decimals)+"f") % (100 * (iteration / float(total)))
filledLength = int(length * iteration // total)
bar = fill * filledLength + '-' * (length - filledLength)
prog = '\r%s |%s| %s%s %s' % (prefix, bar, percent, '%', suffix)
print(prog, end=printEnd, flush=True)
if iteration == total:
print()
Is this a correct implementation or is there any better and optimized method to calculate Bispectrum score (BSS) for audio data fragment array?
Thank you!
I'm wanting to know if matplotlib function spectrogram only takes into account the centre frequencies of a signal?
For example, plotting a spectrogram (in none decibels) here is the result:
Plotting a spectrogram normally:
Where are those points going from 0-50) and between (80-140) are they being removed? If so.. Why is this exactly?
EDIT: Source code :-
Here is "matplotlib" spectral_helper
def _spectral_helper2(x, y=None, NFFT=None, Fs=None, detrend_func=None,
window=None, noverlap=None, pad_to=None,
sides=None, scale_by_freq=None, mode=None):
'''
This is a helper function that implements the commonality between the
psd, csd, spectrogram and complex, magnitude, angle, and phase spectrums.
It is *NOT* meant to be used outside of mlab and may change at any time.
'''
if y is None:
# if y is None use x for y
same_data = True
else:
#The checks for if y is x are so that we can use the same function to
#implement the core of psd(), csd(), and spectrogram() without doing
#extra calculations. We return the unaveraged Pxy, freqs, and t.
same_data = y is x
if Fs is None:
Fs = 2
if noverlap is None:
noverlap = 0
if detrend_func is None:
detrend_func = detrend_none
if window is None:
window = window_hanning
# if NFFT is set to None use the whole signal
if NFFT is None:
NFFT = 256
if mode is None or mode == 'default':
mode = 'psd'
elif mode not in ['psd', 'complex', 'magnitude', 'angle', 'phase']:
raise ValueError("Unknown value for mode %s, must be one of: "
"'default', 'psd', 'complex', "
"'magnitude', 'angle', 'phase'" % mode)
if not same_data and mode != 'psd':
raise ValueError("x and y must be equal if mode is not 'psd'")
#Make sure we're dealing with a numpy array. If y and x were the same
#object to start with, keep them that way
x = np.asarray(x)
if not same_data:
y = np.asarray(y)
if sides is None or sides == 'default':
if np.iscomplexobj(x):
sides = 'twosided'
else:
sides = 'onesided'
elif sides not in ['onesided', 'twosided']:
raise ValueError("Unknown value for sides %s, must be one of: "
"'default', 'onesided', or 'twosided'" % sides)
# zero pad x and y up to NFFT if they are shorter than NFFT
if len(x) < NFFT:
n = len(x)
x = np.resize(x, (NFFT,))
x[n:] = 0
if not same_data and len(y) < NFFT:
n = len(y)
y = np.resize(y, (NFFT,))
y[n:] = 0
if pad_to is None:
pad_to = NFFT
if mode != 'psd':
scale_by_freq = False
elif scale_by_freq is None:
scale_by_freq = True
# For real x, ignore the negative frequencies unless told otherwise
if sides == 'twosided':
numFreqs = pad_to
if pad_to % 2:
freqcenter = (pad_to - 1)//2 + 1
else:
freqcenter = pad_to//2
scaling_factor = 1.
elif sides == 'onesided':
if pad_to % 2:
numFreqs = (pad_to + 1)//2
else:
numFreqs = pad_to//2 + 1
scaling_factor = 2.
result = stride_windows(x, NFFT, noverlap, axis=0)
result = detrend(result, detrend_func, axis=0)
result, windowVals = apply_window(result, window, axis=0,
return_window=True)
result = np.fft.fft(result, n=pad_to, axis=0)[:numFreqs, :]
freqs = np.fft.fftfreq(pad_to, 1/Fs)[:numFreqs]
if not same_data:
# if same_data is False, mode must be 'psd'
#resultY = stride_windows(y, NFFT, noverlap)
resultY = apply_window(resultY, window, axis=0)
resultY = detrend(resultY, detrend_func, axis=0)
resultY = np.fft.fft(resultY, n=pad_to, axis=0)[:numFreqs, :]
result = np.conjugate(result) * resultY
elif mode == 'psd':
result = np.conjugate(result) * result
elif mode == 'magnitude':
result = np.absolute(result)
elif mode == 'angle' or mode == 'phase':
# we unwrap the phase later to handle the onesided vs. twosided case
result = np.angle(result)
elif mode == 'complex':
pass
if mode == 'psd':
# Scale the spectrum by the norm of the window to compensate for
# windowing loss; see Bendat & Piersol Sec 11.5.2.
#result /= (np.abs(windowVals)**2).sum()
# Also include scaling factors for one-sided densities and dividing by
# the sampling frequency, if desired. Scale everything, except the DC
# component and the NFFT/2 component:
result[1:-1] *= 1554848
# MATLAB divides by the sampling frequency so that density function
# has units of dB/Hz and can be integrated by the plotted frequency
# values. Perform the same scaling here.
if scale_by_freq:
result /= Fs
t = np.arange(NFFT/2, len(x) - NFFT/2 + 1, NFFT - noverlap)/Fs
if sides == 'twosided':
# center the frequency range at zero
freqs = np.concatenate((freqs[freqcenter:], freqs[:freqcenter]))
# result = np.concatenate((result[freqcenter:, :],
#result[:freqcenter, :]), 0)
elif not pad_to % 2:
# get the last value correctly, it is negative otherwise
freqs[-1] *= -1
# we unwrap the phase here to handle the onesided vs. twosided case
if mode == 'phase':
pass
#result = np.unwrap(result, axis=0)
return result, freqs, t
Here is my attempt in C++
std::vector<std::vector<Complex::complex> > ComputeSTFT(std::vector<double> &vals,
std::size_t NFFT, std::size_t overlap)
{
std::vector<double> hanning = getHanningWindow(NFFT);
double NENBW = 0.0;
double ENBW = 0.0;
double fRes = 0.0;
double avg = 0.0;
for(unsigned i=0; (i < vals.size()); i++)
{
avg+= vals[i];
}
avg = avg / vals.size();
for(unsigned i=0; (i < vals.size()); i++)
{
vals[i] = vals[i] - avg;
}
std::vector<std::vector<double> > temp_vars = frame(vals, NFFT, overlap);
std::vector<std::vector<Complex::complex> > STFT(temp_vars.size());
for(unsigned i=0; (i < temp_vars.size()-1); i++)
{
for(unsigned j=0; (j < temp_vars[i].size()); j++) {
double value = 0.5 * (1 - cos(2 * PI * j / (NFFT - 1)));
S1 += value;
S2 += value * value;
double calculation = temp_vars[i][j] * value;
temp_vars[i][j] = calculation;
}
}
NENBW = NFFT * (S2 / S1*S1);
// This assume that the frequency is KNOWN.
fRes = 12000 / NFFT;
ENBW = NENBW * fRes;
std::vector<std::vector<Complex::complex> > fft_vars(temp_vars.size());
for(unsigned i=0; (i < temp_vars.size()); i++)
{
fft_vars.resize(temp_vars[i].size());
FFT f(temp_vars[i].begin(), temp_vars[i].end(), temp_vars[i].size());
std::vector<Complex::complex> temp_fft = f.transformed();
fft_vars[i] = temp_fft;
temp_fft.empty();
}
std::vector<std::vector<double> > RESULT(temp_vars.size());
for(unsigned i=0; (i < temp_vars.size()); i++)
{
STFT[i].resize(temp_vars[i].size()/2+1);
for(unsigned j=0; (j < temp_vars[i].size()/2 + 1); j++)
{
STFT[i][j].re = fft_vars[i][j].re;
STFT[i][j].im = fft_vars[i][j].im;
}
}
return STFT;
}
Where am I going wrong to produce such different results?