determine mean zero crossing - python

using numpy I have extracted the zero crossings of a signal.
Unfortunately the source of the data is noisy and thus there are multiple zero crossings.
If I filter the data before checking for zero crossings, aspects of the filter (gain-phase margin) will need to be justified while averaging the zero crossing points is slightly easier to justify
[123,125,127,1045,1049,1050,2147,2147,2151,2155]
consider the above list. what would be an appropriate way to create:
[125, 1048, 2149]
The aim is to find the phase shift between two sine waves

This code takes a simplistic approach of looking for a gap THRESHOLD between the transitions - exceeding this marks the end of a signal transition.
xings = [123,125,127,1045,1049,1050,2147,2147,2151,2155]
THRESHOLD = 100
xlast = -1000000
tot = 0
n = 0
results = []
i = 0
while i < len(xings):
x = xings[i]
if x-xlast > THRESHOLD:
# emit a transition, averaged for the
if n > 0:
results.append(tot/n)
tot = 0
n = 0
tot += x
n += 1
xlast = x
i += 1
if n > 0:
results.append(tot/n)
print results
prints:
[125, 1048, 2150]

I was hoping for a more elegant solution to just iterating over the list of zero crossings, but it seems that is the only solution.
I settled on:
def zero_crossing_avg(data):
output = []
running_total = data[0]
count = 1
for i in range(1,data.size):
val = data[i]
if val - data[i-1] < TOL:
running_total += val
count += 1
else:
output.append(round(running_total/count))
running_total = val
count = 1
return output
with example code of it in-use:
#!/usr/bin/env python
import numpy as np
from matplotlib import pyplot as plt
dt = 5e-6
TOL = 50
class DCfilt():
def __init__(self,dt,freq):
self.alpha = dt/(dt + 1/(2*np.pi*freq))
self.y = [0,0]
def step(self,x):
y = self.y[-1] + self.alpha*(x - self.y[-1])
self.y[-1] = y
return y
def zero_crossing_avg(data):
output = []
running_total = data[0]
count = 1
for i in range(1,data.size):
val = data[i]
if val - data[i-1] < TOL:
running_total += val
count += 1
else:
output.append(round(running_total/count))
running_total = val
count = 1
return output
t = np.arange(0,2,dt)
print(t.size)
rng = (np.random.random_sample(t.size) - 0.5)*0.1
s = 10*np.sin(2*np.pi*t*10 + np.pi/12)+rng
c = 10*np.cos(2*np.pi*t*10)+rng
filt_s = DCfilt(dt,16000)
filt_s.y[-1] =s[0]
filt_c = DCfilt(dt,1600)
filt_c.y[-1] =c[0]
# filter the RAW data first
for i in range(s.size):
s[i] = filt_s.step(s[i])
c[i] = filt_c.step(c[i])
# determine the zero crossings
s_z = np.where(np.diff(np.sign(s)))[0]
c_z = np.where(np.diff(np.sign(c)))[0]
sin_zc = zero_crossing_avg( np.where(np.diff(np.sign(s)))[0] )
cos_zc = zero_crossing_avg( np.where(np.diff(np.sign(c)))[0] )
HALF_PERIOD = (sin_zc[1] - sin_zc[0])
for i in range([len(sin_zc),len(cos_zc)][len(sin_zc) > len(cos_zc)]):
delta = abs(cos_zc[i]-sin_zc[i])
print(90 - (delta/HALF_PERIOD)*180)
plt.hold(True)
plt.grid(True)
plt.plot(s)
plt.plot(c)
plt.show()
This works well enough.

Related

Python ZigZag indicator function logic issue

Question Summary
I have tried to troubleshoot this python ZigZag indicator but have not resolved the issue highlighted below and would appreciate any help with the logic of this function.
Details
The following code excerpt is from the Python zigzag indicator for candlestick charts. I have copied a minimal version of the code directly below to highlight where the logic is implemented. As per the chart below the indicator is not detecting a new peak at 2020-05-28 which should replace the peak at 2020-05-21
if down_thresh > 0:
raise ValueError('The down_thresh must be negative.')
initial_pivot = _identify_initial_pivot(close, up_thresh, down_thresh)
t_n = len(close)
pivots = np.zeros(t_n, dtype='i1')
pivots[0] = initial_pivot
# Adding one to the relative change thresholds saves operations. Instead
# of computing relative change at each point as x_j / x_i - 1, it is
# computed as x_j / x_1. Then, this value is compared to the threshold + 1.
# This saves (t_n - 1) subtractions.
up_thresh += 1
down_thresh += 1
trend = -initial_pivot
last_pivot_t = 0
last_pivot_x = close[0]
for t in range(1, len(close)):
if trend == -1:
x = low[t]
r = x / last_pivot_x
if r >= up_thresh:
pivots[last_pivot_t] = trend#
trend = 1
#last_pivot_x = x
last_pivot_x = high[t]
last_pivot_t = t
elif x < last_pivot_x:
last_pivot_x = x
last_pivot_t = t
else:
x = high[t]
r = x / last_pivot_x
if r <= down_thresh:
pivots[last_pivot_t] = trend
trend = -1
#last_pivot_x = x
last_pivot_x = low[t]
last_pivot_t = t
elif x > last_pivot_x:
last_pivot_x = x
last_pivot_t = t
if last_pivot_t == t_n-1:
pivots[last_pivot_t] = trend
elif pivots[t_n-1] == 0:
pivots[t_n-1] = trend
Code to reproduce this example
The following code will provide the output shown in the image (Numpy seed value included) and the dataframe does not require any additional file be downloaded. Copy this into a Jupyter notebook to see the exact same output. The actual logic is in the smaller code example above.
import pandas as pd
import numpy as np
import plotly.graph_objects as go
def genMockDataFrame(days,startPrice,colName,startDate,seed=None):
periods = days*24
np.random.seed(seed)
steps = np.random.normal(loc=0, scale=0.0018, size=periods)
steps[0]=0
P = startPrice+np.cumsum(steps)
P = [round(i,4) for i in P]
fxDF = pd.DataFrame({
'ticker':np.repeat( [colName], periods ),
'date':np.tile( pd.date_range(startDate, periods=periods, freq='H'), 1 ),
'price':(P)})
fxDF.index = pd.to_datetime(fxDF.date)
fxDF = fxDF.price.resample('D').ohlc()
fxDF.columns = [i.title() for i in fxDF.columns]
return fxDF
df = genMockDataFrame(100,1.1904,'eurusd','19/3/2020',seed=200)
PEAK, VALLEY = 1, -1
def _identify_initial_pivot(X, up_thresh, down_thresh):
"""Quickly identify the X[0] as a peak or valley."""
x_0 = X[0]
max_x = x_0
max_t = 0
min_x = x_0
min_t = 0
up_thresh += 1
down_thresh += 1
for t in range(1, len(X)):
x_t = X[t]
if x_t / min_x >= up_thresh:
return VALLEY if min_t == 0 else PEAK
if x_t / max_x <= down_thresh:
return PEAK if max_t == 0 else VALLEY
if x_t > max_x:
max_x = x_t
max_t = t
if x_t < min_x:
min_x = x_t
min_t = t
t_n = len(X)-1
return VALLEY if x_0 < X[t_n] else PEAK
def peak_valley_pivots_candlestick(close, high, low, up_thresh, down_thresh):
"""
Finds the peaks and valleys of a series of HLC (open is not necessary).
TR: This is modified peak_valley_pivots function in order to find peaks and valleys for OHLC.
Parameters
----------
close : This is series with closes prices.
high : This is series with highs prices.
low : This is series with lows prices.
up_thresh : The minimum relative change necessary to define a peak.
down_thesh : The minimum relative change necessary to define a valley.
Returns
-------
an array with 0 indicating no pivot and -1 and 1 indicating valley and peak
respectively
Using Pandas
------------
For the most part, close, high and low may be a pandas series. However, the index must
either be [0,n) or a DateTimeIndex. Why? This function does X[t] to access
each element where t is in [0,n).
The First and Last Elements
---------------------------
The first and last elements are guaranteed to be annotated as peak or
valley even if the segments formed do not have the necessary relative
changes. This is a tradeoff between technical correctness and the
propensity to make mistakes in data analysis. The possible mistake is
ignoring data outside the fully realized segments, which may bias analysis.
"""
if down_thresh > 0:
raise ValueError('The down_thresh must be negative.')
initial_pivot = _identify_initial_pivot(close, up_thresh, down_thresh)
t_n = len(close)
pivots = np.zeros(t_n, dtype='i1')
pivots[0] = initial_pivot
# Adding one to the relative change thresholds saves operations. Instead
# of computing relative change at each point as x_j / x_i - 1, it is
# computed as x_j / x_1. Then, this value is compared to the threshold + 1.
# This saves (t_n - 1) subtractions.
up_thresh += 1
down_thresh += 1
trend = -initial_pivot
last_pivot_t = 0
last_pivot_x = close[0]
for t in range(1, len(close)):
if trend == -1:
x = low[t]
r = x / last_pivot_x
if r >= up_thresh:
pivots[last_pivot_t] = trend#
trend = 1
#last_pivot_x = x
last_pivot_x = high[t]
last_pivot_t = t
elif x < last_pivot_x:
last_pivot_x = x
last_pivot_t = t
else:
x = high[t]
r = x / last_pivot_x
if r <= down_thresh:
pivots[last_pivot_t] = trend
trend = -1
#last_pivot_x = x
last_pivot_x = low[t]
last_pivot_t = t
elif x > last_pivot_x:
last_pivot_x = x
last_pivot_t = t
if last_pivot_t == t_n-1:
pivots[last_pivot_t] = trend
elif pivots[t_n-1] == 0:
pivots[t_n-1] = trend
return pivots
df = df["2020-04-28":"2020-06-20"]
pivots = peak_valley_pivots_candlestick(df.Close, df.High, df.Low ,.01,-.01)
df['Pivots'] = pivots
df['Pivot Price'] = np.nan # This line clears old pivot prices
df.loc[df['Pivots'] == 1, 'Pivot Price'] = df.High
df.loc[df['Pivots'] == -1, 'Pivot Price'] = df.Low
df["Date"] = df.index
fig = go.Figure(data=[go.Candlestick(x=df['Date'],
open=df['Open'],
high=df['High'],
low=df['Low'],
close=df['Close'])])
df_diff = df['Pivot Price'].dropna().diff().copy()
fig.add_trace(
go.Scatter(mode = "lines+markers",
x=df['Date'],
y=df["Pivot Price"]
))
fig.update_layout(
autosize=False,
width=1000,
height=800,)
fig.add_trace(go.Scatter(x=df['Date'],
y=df['Pivot Price'].interpolate(),
mode = 'lines',
line = dict(color='black')))
def annot(value):
if np.isnan(value):
return ''
else:
return value
j = 0
for i, p in enumerate(df['Pivot Price']):
if not np.isnan(p):
fig.add_annotation(dict(font=dict(color='rgba(0,0,200,0.8)',size=12),
x=df['Date'].iloc[i],
y=p,
showarrow=False,
text=annot(round(abs(df_diff.iloc[j]),3)),
textangle=0,
xanchor='right',
xref="x",
yref="y"))
j = j + 1
fig.update_xaxes(type='category')
fig.show()
For further reference there was also a similar question here.

Implementing sub gradient Stochastic descent in python

I want to implement subgradient and Stochastic descent using a cost function, calculate the number of iterations that it takes to find a perfect classifier for the data and also the weights (w) and bias (b).
the dataset is in four dimension
this is my cost function
i have take the derivative of the cost function and here it is:
When i run my code i get a lot of errors, can someone please help.
Here is my Code in python
import numpy as np
learn_rate = 1
w = np.zeros((4,1))
b = 0
M = 1000
data = '/Users/labuew/Desktop/dataset.data'
#calculating the gradient
def cal_grad_w(data, w, b):
for i in range (M):
sample = data[i,:]
Ym = sample[-1]
Xm = sample[0:4]
if -Ym[i]*(w*Xm+b) >= 0:
tmp = 1.0
else:
tmp = 0
value = Ym[i]*Xm*tmp
sum = sum +value
return sum
def cal_grad_b(data, w, b):
for i in range (M):
sample = data[i,:]
Ym = sample[-1]
Xm = sample[0:4]
if -Ym*(w*Xm+b) >= 0:
tmp = 1.0
else:
tmp = 0
value = Ym[i]*x*tmp
sum = sum +value
return sum
if __name__ == '__main__':
counter = 0
while 1:
counter +=1
dw = cal_grad_w(data, w, b)
db = cal_grad_b(data, w, b)
if dw == 0 and db == 0:
break
w = w - learn_rate*dw
b = b - learn_rate *dw
print(counter,w,b)
are you missing the numpy load function?
data = np.load('/Users/labuew/Desktop/dataset.data')
It looks like you're doing the numerics on the string.
also
Ym = sample[-1]
Xm = sample[0:4]
Also 4 dimensions implies that Ym = Xm[3]? Is your data rank 2 with the second rank being dimension 5? [0:4] includes the forth dimension i.e.
z = [1,2,3,4]
z[0:4] = [1,2,3,4]
This would be my best guess. I'm taking a few educated guesses about your data format.
import numpy as np
learn_rate = 1
w = np.zeros((1,4))
b = 0
M = 1000
#Possible format
#data = np.load('/Users/labuew/Desktop/dataset.data')
#Assumed format
data = np.ones((1000,5))
#calculating the gradient
def cal_grad_w(data, w, b):
sum = 0
for i in range (M):
sample = data[i,:]
Ym = sample[-1]
Xm = sample[0:4]
if -1*Ym*(np.matmul(w,Xm.reshape(4,1))+b) >= 0:
tmp = 1.0
else:
tmp = 0
value = Ym*Xm*tmp
sum = sum +value
return sum.reshape(1,4)
def cal_grad_b(data, w, b):
sum = 0
for i in range (M):
sample = data[i,:]
Ym = sample[-1]
Xm = sample[0:4]
if -1*Ym*(np.matmul(w,Xm.reshape(4,1))+b) >= 0:
tmp = 1.0
else:
tmp = 0
value = Ym*tmp
sum = sum +value
return sum
if __name__ == '__main__':
counter = 0
while 1:
counter +=1
dw = cal_grad_w(data, w, b)
db = cal_grad_b(data, w, b)
if dw.all() == 0 and db == 0:
break
w = w - learn_rate*dw
b = b - learn_rate*db
print([counter,w,b])
Put in dummy data because I don't know the format.

Increasing performance with octant search algorithm

I am working on an octant search to find the n-number(e.g. 8) of points (+) closest to my circular point (o) in each octant. This would mean that my points (+) are reduced to only 64 (8 per octant).
The first thing I did is to divide my region into octants with my point (o) as reference.
data = array containing (x, y, z) for all points (+)
gdata = array containing (x, y) for point (o)
import tkinter as tk
from tkinter import filedialog
import pandas as pd
import numpy as np
from scipy.spatial.distance import cdist
from collections import defaultdict
root = tk.Tk()
root.withdraw()
file_path = filedialog.askopenfilename()
data = pd.read_excel(file_path)
data = np.array(data, dtype=np.float)
nrow, cols = data.shape
file_path1 = filedialog.askopenfilename()
gdata = pd.read_excel(file_path1)
gdata = np.array(gdata, dtype=np.float)
pwangle = np.zeros(nrow)
for j in range(nrow):
delta_x = gdata[:,0]-data[:,0][j]
delta_y = gdata[:,1]-data[:,1][j]
if delta_x != 0:
pwangle[j] = np.rad2deg(np.arctan(delta_y/delta_x))
else:
if delta_y > 0:
pwangle[j] = 90
elif delta_y < 0:
pwangle[j] = 270
if (delta_x < 0)&(delta_y > 0):
pwangle[j] = 180 + pwangle[j]
elif (delta_x < 0)&(delta_y < 0):
pwangle[j] = 270 - pwangle[j]
elif (delta_x > 0)&(delta_y < 0):
pwangle[j] = 360 + pwangle[j]
vecangle = pwangle.ravel()
sortdata = defaultdict(list)
count = -1
get_anglesector = 45
N = 8
d = cdist(data[:,:2], gdata)
P = np.hstack((data, d))
for j in range(0, 360, get_anglesector):
count += 1
get_data = []
for k, dummy_val in enumerate(vecangle):
if j <= vecangle[k] < j + get_anglesector:
get_data.append(P[k,::])
sortdata[count] = np.array(get_data)
After data have been grouped into various octant, I then sort data in each octant to obtain the closest 8 data to the point (o).
for i, j in enumerate(sortdata):
octantsort = defaultdict(list)
for i in range(8):
octantsort[i] = np.array(sortdata[i][sortdata[i][:,3].argsort()[:N]])
Is there an efficient and pythonic way of doing this do increase performance?
This works fine but when i have more than one 'o' point (e.g. 10000 points 'o') and I have run the above code for each point, it would be time consuming.
The job gets a lot easier if you use arctan2 instead of arctan. Then vectorizing for speed we may get something like this:
import numpy as np
from scipy.spatial.distance import cdist
delta = gdata - data[:,:2]
angles = np.arctan2(delta[:,1], delta[:,0])
bins = np.linspace(-np.pi, np.pi, 9)
bins[-1] = np.inf # handle edge case
octantsort = []
for i in range(8):
data_i = data[(bins[i] <= angles) & (angles < bins[i+1])]
dist_order = np.argsort(cdist(data_i, gdata))
octantsort.append(data_i[dist_order[:N]])
Thank you #user7138814, apart from making some slight changes, your code is faster
N=8
delta = gdata - data[:,:2]
angles = np.arctan2(delta[:,1], delta[:,0])
bins = np.linspace(-np.pi, np.pi, 9)
bins[-1] = np.inf # handle edge case
octantsort = []
for i in range(8):
data_i = data[(bins[i] <= angles) & (angles < bins[i+1])]
dist_order = np.argsort(cdist(data_i[:,:2], gdata), axis=0)
[octantsort.append(data_i[dist_order[:N][j]]) for j in range(8)]
final = np.vstack(octantsort)
Time of execution of the previous code (code in the question):
---- 0.021449804306030273 seconds ------
Time of execution of the code in this post:
---- 0.0015172958374023438 seconds ------

Python: List's first term does not start at 0

So I'm trying to make a code that simulates free fall its almost all done except that the code starts at '1' instead of '0'. My code is:
def simulateFreeFall(mass,deltaT,simulationTime):
acceleration = 9.81
velocity = 0
length = 0
velocity1 = 0
length1 = 0
times = []
l = []
v = []
a = []
x = 0
timeStep = simulationTime / deltaT
while x < timeStep:
elapsedTime = deltaT * x
Dvelocity = acceleration * deltaT
velocity1 = Dvelocity + velocity
velocity = velocity1
v.append(velocity1)
a.append(acceleration)
Dlength = velocity1 * deltaT
length1 = Dlength + length
length = length1
l.append(length1)
times.append(elapsedTime)
x += 1
plt.plot(times, l, 'rs')
plt.title("Free Fall - No Friction")
plt.xlabel("Time")
plt.ylabel("Fall Length")
plt.grid(True)
plt.show()
print(l[0])
simulateFreeFall(70,0.01,60)
When I run the code the first length in the list " l " is 0.000981 instead of 0 I'm not sure what I did wrong for it to start at technically what is supposed to be after 0.01 seconds.
You could use some prints to debug this. By using print along the flow you can see what is happening.
Nothing surprising is happening, l[0] is:
l[0] = length1 = Dlength + length = velocity1 * deltaT + lenght =
= (Dvelocity + velocity ) * deltaT + lenght
and the key thing is that
Dvelocity = acceleration * deltaT
which are non-zero
velocity1 is not 0 in the first loop, so length1 is not 0
The reason for this is you calculate the velocity out of the acceleration and your time step which is not 0 in the first step and therefore the first length is neither

Python - Cutting an array at a designated point based on value in row

I have a 300 x 4 matrix called X created by the odeint function. In the second column are y-values and I would like to cut the matrix when the y-value dips below 0. As a first step I was attempting to create a function that would read the second column and spit out the row number where the column first dips below 0.
X = odeint(func, X0, t)
Yval = X[:,1]
def indexer():
i = 0
if Yval[i] > 0:
i = i + 1
if Yval[i] < 0:
return i
Which is not working and conceptually I know this is wrong, I just couldn't think of another way to do this. Is there a way to cut out all the rows that contain and follow the first <0 y value?
This is my entire code:
import numpy as np
import math
from scipy.integrate import odeint
g = 9.8
theta = (45 * math.pi)/180
v0 = 10.0
k = 0.3
x0 = 0
y0 = 0
vx0 = v0*math.sin(theta)
vy0 = v0*math.cos(theta)
def func(i_state,time):
f = np.zeros(4)
f[0] = i_state[2]
f[1] = i_state[3]
f[2] = -k*(f[0]**2 + f[1]**2)**(.5)*f[0]
f[3] = -g - k*(f[0]**2 + f[1]**2)**(.5)*f[1]
return f
X0 = [x0, y0, vx0, vy0]
t0 = 0
tf = 3
timestep = 0.01
nsteps = (tf - t0)/timestep
t = np.linspace(t0, tf, num = nsteps)
X = odeint(func, X0, t)
Yval = X[:,1]
def indexer():
i = 0
if Yval[i] > 0:
i = i + 1
if Yval[i] < 0:
return i
Maybe you could use the takewhile function from the itertools package:
from itertools import takewhile
first_elements = list(takewhile(lambda x: x[1] >= 0, X))
Where X is your matrix. I used x[1] in the lambda predicate to compare the numbers in the second column.
Here, first_elements will be the rows of the matrix before the first row that contains a value less than zero. You can use len(first_elements) to know what the cutoff point was.
I converted it to a list but you don't have to if you are just going to iterate through the result.
I hope this works.
You could do something like this:
newVals = []
i = 0
while( i < len(X) and X[i][1] >= 0):
newVals.append(X[i])
i += 1
This would go through X and append values to the list newVals until you either reach the end of the list (i < len(X)) or you reach your condition (X[i][1] >= 0).

Categories

Resources