Question Summary
I have tried to troubleshoot this python ZigZag indicator but have not resolved the issue highlighted below and would appreciate any help with the logic of this function.
Details
The following code excerpt is from the Python zigzag indicator for candlestick charts. I have copied a minimal version of the code directly below to highlight where the logic is implemented. As per the chart below the indicator is not detecting a new peak at 2020-05-28 which should replace the peak at 2020-05-21
if down_thresh > 0:
raise ValueError('The down_thresh must be negative.')
initial_pivot = _identify_initial_pivot(close, up_thresh, down_thresh)
t_n = len(close)
pivots = np.zeros(t_n, dtype='i1')
pivots[0] = initial_pivot
# Adding one to the relative change thresholds saves operations. Instead
# of computing relative change at each point as x_j / x_i - 1, it is
# computed as x_j / x_1. Then, this value is compared to the threshold + 1.
# This saves (t_n - 1) subtractions.
up_thresh += 1
down_thresh += 1
trend = -initial_pivot
last_pivot_t = 0
last_pivot_x = close[0]
for t in range(1, len(close)):
if trend == -1:
x = low[t]
r = x / last_pivot_x
if r >= up_thresh:
pivots[last_pivot_t] = trend#
trend = 1
#last_pivot_x = x
last_pivot_x = high[t]
last_pivot_t = t
elif x < last_pivot_x:
last_pivot_x = x
last_pivot_t = t
else:
x = high[t]
r = x / last_pivot_x
if r <= down_thresh:
pivots[last_pivot_t] = trend
trend = -1
#last_pivot_x = x
last_pivot_x = low[t]
last_pivot_t = t
elif x > last_pivot_x:
last_pivot_x = x
last_pivot_t = t
if last_pivot_t == t_n-1:
pivots[last_pivot_t] = trend
elif pivots[t_n-1] == 0:
pivots[t_n-1] = trend
Code to reproduce this example
The following code will provide the output shown in the image (Numpy seed value included) and the dataframe does not require any additional file be downloaded. Copy this into a Jupyter notebook to see the exact same output. The actual logic is in the smaller code example above.
import pandas as pd
import numpy as np
import plotly.graph_objects as go
def genMockDataFrame(days,startPrice,colName,startDate,seed=None):
periods = days*24
np.random.seed(seed)
steps = np.random.normal(loc=0, scale=0.0018, size=periods)
steps[0]=0
P = startPrice+np.cumsum(steps)
P = [round(i,4) for i in P]
fxDF = pd.DataFrame({
'ticker':np.repeat( [colName], periods ),
'date':np.tile( pd.date_range(startDate, periods=periods, freq='H'), 1 ),
'price':(P)})
fxDF.index = pd.to_datetime(fxDF.date)
fxDF = fxDF.price.resample('D').ohlc()
fxDF.columns = [i.title() for i in fxDF.columns]
return fxDF
df = genMockDataFrame(100,1.1904,'eurusd','19/3/2020',seed=200)
PEAK, VALLEY = 1, -1
def _identify_initial_pivot(X, up_thresh, down_thresh):
"""Quickly identify the X[0] as a peak or valley."""
x_0 = X[0]
max_x = x_0
max_t = 0
min_x = x_0
min_t = 0
up_thresh += 1
down_thresh += 1
for t in range(1, len(X)):
x_t = X[t]
if x_t / min_x >= up_thresh:
return VALLEY if min_t == 0 else PEAK
if x_t / max_x <= down_thresh:
return PEAK if max_t == 0 else VALLEY
if x_t > max_x:
max_x = x_t
max_t = t
if x_t < min_x:
min_x = x_t
min_t = t
t_n = len(X)-1
return VALLEY if x_0 < X[t_n] else PEAK
def peak_valley_pivots_candlestick(close, high, low, up_thresh, down_thresh):
"""
Finds the peaks and valleys of a series of HLC (open is not necessary).
TR: This is modified peak_valley_pivots function in order to find peaks and valleys for OHLC.
Parameters
----------
close : This is series with closes prices.
high : This is series with highs prices.
low : This is series with lows prices.
up_thresh : The minimum relative change necessary to define a peak.
down_thesh : The minimum relative change necessary to define a valley.
Returns
-------
an array with 0 indicating no pivot and -1 and 1 indicating valley and peak
respectively
Using Pandas
------------
For the most part, close, high and low may be a pandas series. However, the index must
either be [0,n) or a DateTimeIndex. Why? This function does X[t] to access
each element where t is in [0,n).
The First and Last Elements
---------------------------
The first and last elements are guaranteed to be annotated as peak or
valley even if the segments formed do not have the necessary relative
changes. This is a tradeoff between technical correctness and the
propensity to make mistakes in data analysis. The possible mistake is
ignoring data outside the fully realized segments, which may bias analysis.
"""
if down_thresh > 0:
raise ValueError('The down_thresh must be negative.')
initial_pivot = _identify_initial_pivot(close, up_thresh, down_thresh)
t_n = len(close)
pivots = np.zeros(t_n, dtype='i1')
pivots[0] = initial_pivot
# Adding one to the relative change thresholds saves operations. Instead
# of computing relative change at each point as x_j / x_i - 1, it is
# computed as x_j / x_1. Then, this value is compared to the threshold + 1.
# This saves (t_n - 1) subtractions.
up_thresh += 1
down_thresh += 1
trend = -initial_pivot
last_pivot_t = 0
last_pivot_x = close[0]
for t in range(1, len(close)):
if trend == -1:
x = low[t]
r = x / last_pivot_x
if r >= up_thresh:
pivots[last_pivot_t] = trend#
trend = 1
#last_pivot_x = x
last_pivot_x = high[t]
last_pivot_t = t
elif x < last_pivot_x:
last_pivot_x = x
last_pivot_t = t
else:
x = high[t]
r = x / last_pivot_x
if r <= down_thresh:
pivots[last_pivot_t] = trend
trend = -1
#last_pivot_x = x
last_pivot_x = low[t]
last_pivot_t = t
elif x > last_pivot_x:
last_pivot_x = x
last_pivot_t = t
if last_pivot_t == t_n-1:
pivots[last_pivot_t] = trend
elif pivots[t_n-1] == 0:
pivots[t_n-1] = trend
return pivots
df = df["2020-04-28":"2020-06-20"]
pivots = peak_valley_pivots_candlestick(df.Close, df.High, df.Low ,.01,-.01)
df['Pivots'] = pivots
df['Pivot Price'] = np.nan # This line clears old pivot prices
df.loc[df['Pivots'] == 1, 'Pivot Price'] = df.High
df.loc[df['Pivots'] == -1, 'Pivot Price'] = df.Low
df["Date"] = df.index
fig = go.Figure(data=[go.Candlestick(x=df['Date'],
open=df['Open'],
high=df['High'],
low=df['Low'],
close=df['Close'])])
df_diff = df['Pivot Price'].dropna().diff().copy()
fig.add_trace(
go.Scatter(mode = "lines+markers",
x=df['Date'],
y=df["Pivot Price"]
))
fig.update_layout(
autosize=False,
width=1000,
height=800,)
fig.add_trace(go.Scatter(x=df['Date'],
y=df['Pivot Price'].interpolate(),
mode = 'lines',
line = dict(color='black')))
def annot(value):
if np.isnan(value):
return ''
else:
return value
j = 0
for i, p in enumerate(df['Pivot Price']):
if not np.isnan(p):
fig.add_annotation(dict(font=dict(color='rgba(0,0,200,0.8)',size=12),
x=df['Date'].iloc[i],
y=p,
showarrow=False,
text=annot(round(abs(df_diff.iloc[j]),3)),
textangle=0,
xanchor='right',
xref="x",
yref="y"))
j = j + 1
fig.update_xaxes(type='category')
fig.show()
For further reference there was also a similar question here.
Related
I'm simulating forest-fire, and one of my tasks is to plot the density of trees vs those currently burning and empty plots. I have the disparate parts, however I need assistance in putting them together as I can't work out how to put my code together. Currently, I have my initial conditions
p, f = 0.5, 0.3
nx, ny = 100, 100
X = np.zeros((ny, nx))
adjacent = ((-1,0), (0,-1), (0, 1), (1,0))
E, T, F = 0, 1, 2
xvalues = [0]
yvalues = [0]
my function that generates the next frame (distribution of fire) is
def iterate(X):
Xnew = np.zeros((ny, nx))
for ix in range(1,nx-1):
for iy in range(1,ny-1):
if X[iy,ix] == E and np.random.random() <= p:
Xnew[iy,ix] = T
if X[iy,ix] == T:
Xnew[iy,ix] = T
for dx,dy in adjacent:
if X[iy+dy,ix+dx] == F:
Xnew[iy,ix] = F
else:
if np.random.random() <= f:
Xnew[iy,ix] = F
return Xnew
print(Xnew)
The bit I'm struggling with is how to write the following correctly with the above material so that I could go up to Xn where n is about 1000
X1 = iterate(X)
X2 = iterate(X1)
X3 = iterate(X2) and so on
and for each iteration calculate
num_empty = (Xn == 0).sum()
num_tree = (Xn == 1).sum()
num_fire = (Xn == 2).sum()
density = num_tree/(num_fire+num_empty)
xvalues.append(i)
yvalues.append(density)
print(density)
Any help would be appreciated!
I think you need to iterate over the range of n ints rather than "function".
i = 0
_X = iteration(X)
num_empty = (_X == 0).sum()
num_tree = (_X == 1).sum()
num_fire = (_X == 2).sum()
density = num_tree / (num_fire + num_empty)
print(i, density)
xvalues.append(i)
yvalues.append(density)
n = 1000
for i in range(1, n):
_X = iteration(_X)
num_empty = (_X == 0).sum()
num_tree = (_X == 1).sum()
num_fire = (_X == 2).sum()
density = num_tree / (num_fire + num_empty)
print(i, density)
xvalues.append(i)
yvalues.append(density)
I am trying to find the mean, variance and confidence interval of the periodic/wrapped normal distribution (von Mises) but within a time interval (as opposed to the traditional interval of pi). I looked at a solution on stack overflow here, its close but I am not sure its exactly what I am looking for.
I found exactly what I was looking for here, which uses R (see below an extract of the code). I'm looking to replicate this in Python.
> data(timestamps)
> head(timestamps)
[1] "20:27:28" "21:08:41" "01:30:16" "00:57:04" "23:12:14" "22:54:16"
> library(lubridate)
> ts <- as.numeric(hms(timestamps)) / 3600
> head(ts)
[1] 20.4577778 21.1447222 1.5044444 0.9511111 23.2038889 22.9044444
> library(circular)
> ts <- circular(ts, units = "hours", template = "clock24")
> head(ts)
Circular Data:
[1] 20.457889 21.144607 1.504422 0.950982 23.203917 4.904397
> estimates <- mle.vonmises(ts)
> p_mean <- estimates$mu %% 24
> concentration <- estimates$kappa
> densities <- dvonmises(ts, mu = p_mean, kappa = concentration)
> alpha <- 0.90
> quantile <- qvonmises((1 - alpha)/2, mu = p_mean, kappa = concentration) %% 24
> cutoff <- dvonmises(quantile, mu = p_mean, kappa = concentration)
> time_feature <- densities >= cutoff
Like the library circular, python has a package scipy.stats.vonmises but lies within the interval pi instead of time. Are there any alternative packages that can help?
I built a python function which does what I need, taking the formulae from this pdf
Hope this helps the community. Please provide corrections if I am wrong.
Note: this works for values within the interval [0,2pi] or 360 degrees.
import pandas as pd
import numpy as np
from scipy.stats import chi2
def random_dates(start, end, n, unit='D', seed=None):
if not seed:
np.random.seed(0)
ndays = (end - start).days + 1
return pd.to_timedelta(np.random.rand(n) * ndays, unit=unit) + start
def vonmises(df, field):
N = len(df[field])
s = np.sum(np.sin(df[field]))
c = np.sum(np.cos(df[field]))
sbar = (1/N)*s
cbar = (1/N)*c
if cbar > 0:
if sbar >= 0:
df['mu_vm'] = np.arctan(sbar/cbar)
else:
df['mu_vm'] = np.arctan(sbar/cbar) + 2*np.pi
elif cbar < 0:
df['mu_vm'] = np.arctan(sbar/cbar) + np.pi
else:
df['mu_vm'] = np.nan
R = np.sqrt(c**2 + s**2)
Rbar = (1/N)*R
if Rbar < 0.53:
kstar = 2*Rbar + Rbar**3 + 5*(Rbar**5)/6
elif Rbar >= 0.85:
kstar = 1/(3*Rbar -4*(Rbar**2) + Rbar**3)
else:
kstar = -0.4 + 1.39*Rbar + 0.43/(1-Rbar)
if N<=15:
if kstar < 2:
df['kappa_vm'] = np.max([kstar - 2/(N*kstar),0])
else:
df['kappa_vm'] = ((N-1)**3)*kstar/(N*(N**2+1))
else:
df['kappa_vm'] = kstar
if Rbar <= 2/3:
df['vm_plus'] = df['mu_vm'] + np.arccos(np.sqrt(2*N*(2*(R**2) -
N*chi2.isf(0.9,1))/((R**2)*(4*N - chi2.isf(0.9,1)))))
df['vm_minus'] = df['mu_vm'] - np.arccos(np.sqrt(2*N*(2*(R**2) -
N*chi2.isf(0.9,1))/((R**2)*(4*N - chi2.isf(0.9,1)))))
else:
df['vm_plus'] = df['mu_vm'] + np.arccos(np.sqrt((N**2) -
((N**2) - (R**2))*np.exp(chi2.isf(0.9,1)/N))/R)
df['vm_minus'] = df['mu_vm'] - np.arccos(np.sqrt((N**2) -
((N**2) - (R**2))*np.exp(chi2.isf(0.9,1)/N))/R)
df['vm_conft'] = np.where((df['vm_plus'] < df[field]) |
(df['vm_minus'] > df[field]), True, False)
return df
df = pd.concat([pd.DataFrame({'A':[1,1,1,1,1,2,2,2,2,2]}), pd.DataFrame({'B':random_dates(pd.to_datetime('2015-01-01'), pd.to_datetime('2018-01-01'), 10)})],axis=1)
df['C'] = (df['B'].dt.hour*60+df['B'].dt.minute)*60 + df['B'].dt.second
df['D'] = df['C']*2*np.pi/(24*60*60)
df = df.groupby('A').apply(lambda x : vonmises(x, 'D'))
To get back to hours, for example, simply multiply by 24 and divide by 2pi
I want to implement subgradient and Stochastic descent using a cost function, calculate the number of iterations that it takes to find a perfect classifier for the data and also the weights (w) and bias (b).
the dataset is in four dimension
this is my cost function
i have take the derivative of the cost function and here it is:
When i run my code i get a lot of errors, can someone please help.
Here is my Code in python
import numpy as np
learn_rate = 1
w = np.zeros((4,1))
b = 0
M = 1000
data = '/Users/labuew/Desktop/dataset.data'
#calculating the gradient
def cal_grad_w(data, w, b):
for i in range (M):
sample = data[i,:]
Ym = sample[-1]
Xm = sample[0:4]
if -Ym[i]*(w*Xm+b) >= 0:
tmp = 1.0
else:
tmp = 0
value = Ym[i]*Xm*tmp
sum = sum +value
return sum
def cal_grad_b(data, w, b):
for i in range (M):
sample = data[i,:]
Ym = sample[-1]
Xm = sample[0:4]
if -Ym*(w*Xm+b) >= 0:
tmp = 1.0
else:
tmp = 0
value = Ym[i]*x*tmp
sum = sum +value
return sum
if __name__ == '__main__':
counter = 0
while 1:
counter +=1
dw = cal_grad_w(data, w, b)
db = cal_grad_b(data, w, b)
if dw == 0 and db == 0:
break
w = w - learn_rate*dw
b = b - learn_rate *dw
print(counter,w,b)
are you missing the numpy load function?
data = np.load('/Users/labuew/Desktop/dataset.data')
It looks like you're doing the numerics on the string.
also
Ym = sample[-1]
Xm = sample[0:4]
Also 4 dimensions implies that Ym = Xm[3]? Is your data rank 2 with the second rank being dimension 5? [0:4] includes the forth dimension i.e.
z = [1,2,3,4]
z[0:4] = [1,2,3,4]
This would be my best guess. I'm taking a few educated guesses about your data format.
import numpy as np
learn_rate = 1
w = np.zeros((1,4))
b = 0
M = 1000
#Possible format
#data = np.load('/Users/labuew/Desktop/dataset.data')
#Assumed format
data = np.ones((1000,5))
#calculating the gradient
def cal_grad_w(data, w, b):
sum = 0
for i in range (M):
sample = data[i,:]
Ym = sample[-1]
Xm = sample[0:4]
if -1*Ym*(np.matmul(w,Xm.reshape(4,1))+b) >= 0:
tmp = 1.0
else:
tmp = 0
value = Ym*Xm*tmp
sum = sum +value
return sum.reshape(1,4)
def cal_grad_b(data, w, b):
sum = 0
for i in range (M):
sample = data[i,:]
Ym = sample[-1]
Xm = sample[0:4]
if -1*Ym*(np.matmul(w,Xm.reshape(4,1))+b) >= 0:
tmp = 1.0
else:
tmp = 0
value = Ym*tmp
sum = sum +value
return sum
if __name__ == '__main__':
counter = 0
while 1:
counter +=1
dw = cal_grad_w(data, w, b)
db = cal_grad_b(data, w, b)
if dw.all() == 0 and db == 0:
break
w = w - learn_rate*dw
b = b - learn_rate*db
print([counter,w,b])
Put in dummy data because I don't know the format.
I am working on an octant search to find the n-number(e.g. 8) of points (+) closest to my circular point (o) in each octant. This would mean that my points (+) are reduced to only 64 (8 per octant).
The first thing I did is to divide my region into octants with my point (o) as reference.
data = array containing (x, y, z) for all points (+)
gdata = array containing (x, y) for point (o)
import tkinter as tk
from tkinter import filedialog
import pandas as pd
import numpy as np
from scipy.spatial.distance import cdist
from collections import defaultdict
root = tk.Tk()
root.withdraw()
file_path = filedialog.askopenfilename()
data = pd.read_excel(file_path)
data = np.array(data, dtype=np.float)
nrow, cols = data.shape
file_path1 = filedialog.askopenfilename()
gdata = pd.read_excel(file_path1)
gdata = np.array(gdata, dtype=np.float)
pwangle = np.zeros(nrow)
for j in range(nrow):
delta_x = gdata[:,0]-data[:,0][j]
delta_y = gdata[:,1]-data[:,1][j]
if delta_x != 0:
pwangle[j] = np.rad2deg(np.arctan(delta_y/delta_x))
else:
if delta_y > 0:
pwangle[j] = 90
elif delta_y < 0:
pwangle[j] = 270
if (delta_x < 0)&(delta_y > 0):
pwangle[j] = 180 + pwangle[j]
elif (delta_x < 0)&(delta_y < 0):
pwangle[j] = 270 - pwangle[j]
elif (delta_x > 0)&(delta_y < 0):
pwangle[j] = 360 + pwangle[j]
vecangle = pwangle.ravel()
sortdata = defaultdict(list)
count = -1
get_anglesector = 45
N = 8
d = cdist(data[:,:2], gdata)
P = np.hstack((data, d))
for j in range(0, 360, get_anglesector):
count += 1
get_data = []
for k, dummy_val in enumerate(vecangle):
if j <= vecangle[k] < j + get_anglesector:
get_data.append(P[k,::])
sortdata[count] = np.array(get_data)
After data have been grouped into various octant, I then sort data in each octant to obtain the closest 8 data to the point (o).
for i, j in enumerate(sortdata):
octantsort = defaultdict(list)
for i in range(8):
octantsort[i] = np.array(sortdata[i][sortdata[i][:,3].argsort()[:N]])
Is there an efficient and pythonic way of doing this do increase performance?
This works fine but when i have more than one 'o' point (e.g. 10000 points 'o') and I have run the above code for each point, it would be time consuming.
The job gets a lot easier if you use arctan2 instead of arctan. Then vectorizing for speed we may get something like this:
import numpy as np
from scipy.spatial.distance import cdist
delta = gdata - data[:,:2]
angles = np.arctan2(delta[:,1], delta[:,0])
bins = np.linspace(-np.pi, np.pi, 9)
bins[-1] = np.inf # handle edge case
octantsort = []
for i in range(8):
data_i = data[(bins[i] <= angles) & (angles < bins[i+1])]
dist_order = np.argsort(cdist(data_i, gdata))
octantsort.append(data_i[dist_order[:N]])
Thank you #user7138814, apart from making some slight changes, your code is faster
N=8
delta = gdata - data[:,:2]
angles = np.arctan2(delta[:,1], delta[:,0])
bins = np.linspace(-np.pi, np.pi, 9)
bins[-1] = np.inf # handle edge case
octantsort = []
for i in range(8):
data_i = data[(bins[i] <= angles) & (angles < bins[i+1])]
dist_order = np.argsort(cdist(data_i[:,:2], gdata), axis=0)
[octantsort.append(data_i[dist_order[:N][j]]) for j in range(8)]
final = np.vstack(octantsort)
Time of execution of the previous code (code in the question):
---- 0.021449804306030273 seconds ------
Time of execution of the code in this post:
---- 0.0015172958374023438 seconds ------
using numpy I have extracted the zero crossings of a signal.
Unfortunately the source of the data is noisy and thus there are multiple zero crossings.
If I filter the data before checking for zero crossings, aspects of the filter (gain-phase margin) will need to be justified while averaging the zero crossing points is slightly easier to justify
[123,125,127,1045,1049,1050,2147,2147,2151,2155]
consider the above list. what would be an appropriate way to create:
[125, 1048, 2149]
The aim is to find the phase shift between two sine waves
This code takes a simplistic approach of looking for a gap THRESHOLD between the transitions - exceeding this marks the end of a signal transition.
xings = [123,125,127,1045,1049,1050,2147,2147,2151,2155]
THRESHOLD = 100
xlast = -1000000
tot = 0
n = 0
results = []
i = 0
while i < len(xings):
x = xings[i]
if x-xlast > THRESHOLD:
# emit a transition, averaged for the
if n > 0:
results.append(tot/n)
tot = 0
n = 0
tot += x
n += 1
xlast = x
i += 1
if n > 0:
results.append(tot/n)
print results
prints:
[125, 1048, 2150]
I was hoping for a more elegant solution to just iterating over the list of zero crossings, but it seems that is the only solution.
I settled on:
def zero_crossing_avg(data):
output = []
running_total = data[0]
count = 1
for i in range(1,data.size):
val = data[i]
if val - data[i-1] < TOL:
running_total += val
count += 1
else:
output.append(round(running_total/count))
running_total = val
count = 1
return output
with example code of it in-use:
#!/usr/bin/env python
import numpy as np
from matplotlib import pyplot as plt
dt = 5e-6
TOL = 50
class DCfilt():
def __init__(self,dt,freq):
self.alpha = dt/(dt + 1/(2*np.pi*freq))
self.y = [0,0]
def step(self,x):
y = self.y[-1] + self.alpha*(x - self.y[-1])
self.y[-1] = y
return y
def zero_crossing_avg(data):
output = []
running_total = data[0]
count = 1
for i in range(1,data.size):
val = data[i]
if val - data[i-1] < TOL:
running_total += val
count += 1
else:
output.append(round(running_total/count))
running_total = val
count = 1
return output
t = np.arange(0,2,dt)
print(t.size)
rng = (np.random.random_sample(t.size) - 0.5)*0.1
s = 10*np.sin(2*np.pi*t*10 + np.pi/12)+rng
c = 10*np.cos(2*np.pi*t*10)+rng
filt_s = DCfilt(dt,16000)
filt_s.y[-1] =s[0]
filt_c = DCfilt(dt,1600)
filt_c.y[-1] =c[0]
# filter the RAW data first
for i in range(s.size):
s[i] = filt_s.step(s[i])
c[i] = filt_c.step(c[i])
# determine the zero crossings
s_z = np.where(np.diff(np.sign(s)))[0]
c_z = np.where(np.diff(np.sign(c)))[0]
sin_zc = zero_crossing_avg( np.where(np.diff(np.sign(s)))[0] )
cos_zc = zero_crossing_avg( np.where(np.diff(np.sign(c)))[0] )
HALF_PERIOD = (sin_zc[1] - sin_zc[0])
for i in range([len(sin_zc),len(cos_zc)][len(sin_zc) > len(cos_zc)]):
delta = abs(cos_zc[i]-sin_zc[i])
print(90 - (delta/HALF_PERIOD)*180)
plt.hold(True)
plt.grid(True)
plt.plot(s)
plt.plot(c)
plt.show()
This works well enough.