I would like to create a zigzag indicator for stocks in Python. I have this Metastock Formula.
I dicided to publish this problem here because I don't know any other foro.
I saw 2 stackoverflow posts with something like this but they are wrong.
As you can see, the indicator tooks the close prices.
Thanks for your help.
Python code:
from __future__ import division
import matplotlib.pyplot as plt
import numpy as np
def islocalmax(x):
"""Both neighbors are lower,
assumes a centered window of size 3"""
return (x[0] < x[1]) & (x[2] < x[1])
def islocalmin(x):
"""Both neighbors are higher,
assumes a centered window of size 3"""
return (x[0] > x[1]) & (x[2] > x[1])
def isextrema(x):
return islocalmax(x) or islocalmin(x)
def create_zigzag(col, p=0.05):
# Find the local min/max
# converting to bool converts NaN to True, which makes it include the endpoints
ext_loc = col.rolling(3, center=True).apply(isextrema, raw=False).astype(np.bool_)
# extract values at local min/max
ext_val = col[ext_loc]
# filter locations based on threshold
thres_ext_loc = (ext_val.diff().abs() > (ext_val.shift(-1).abs() * p))
# Keep the endpoints
thres_ext_loc.iloc[0] = True
thres_ext_loc.iloc[-1] = True
thres_ext_loc = thres_ext_loc[thres_ext_loc]
# extract values at filtered locations
thres_ext_val = col.loc[thres_ext_loc.index]
# again search the extrema to force the zigzag to always go from high > low or vice versa,
# never low > low, or high > high
ext_loc = thres_ext_val.rolling(3, center=True).apply(isextrema, raw=False).astype(np.bool_)
thres_ext_val =thres_ext_val[ext_loc]
return thres_ext_val
from pandas_datareader import data
# Only get the adjusted close.
serie = data.DataReader(
"AAPL", start='2018-1-1', end='2020-12-31', data_source='yahoo'
)
dfzigzag = serie.apply(create_zigzag)
data1_zigzag = dfzigzag['Close'].dropna()
fig, axs = plt.subplots(figsize=(10, 3))
axs.plot(serie.Close, '-', ms=4, label='original')
axs.plot(data1_zigzag, 'ro-', ms=4, label='zigzag')
axs.legend()
plt.show()
Python code Plot:
The indicator:
Metastock Formula:
{ Copyright (c) 2004, John Bruns and Financial Trading Inc. }
reversal:=Input("Reversal",0,100,5);
pc:=Input("Use Percentage?",0,1,1);
z:=If(pc,Zig(CLOSE,reversal,%),Zig(CLOSE,reversal,$));
peakbar:=LastValue(BarsSince((z>Ref(z,-1)AND Ref(Z,-1)<Ref(Z,-2)) OR (z<Ref(z,-1))AND Ref(Z,-1)>Ref(Z,-2)))+1;
lastpeak:=LastValue(Ref(z,-peakbar));
lastend:=LastValue(z);
bars:=Cum(1);
invalid:=If(pc,If(Abs(lastend-lastpeak)*100/lastpeak<reversal,1,0),If(Abs(lastend-lastpeak)<reversal,1,0));
If(bars>=LastValue(bars)-peakbar AND invalid,lastpeak,z);
Related
I am writing code to remove plateau outliers from time series data. I proceeded after receiving advice to use np.diff, but there was a problem that it could not be recognized if it was not the same value.
def find_plateaus(F, min_length=200, tolerance = 0.75, smoothing=15):
import numpy as np
from scipy.ndimage.filters import uniform_filter1d
# calculate smooth gradients
smoothF = uniform_filter1d(F, size = smoothing)
dF = uniform_filter1d(np.gradient(smoothF),size = smoothing)
d2F = uniform_filter1d(np.gradient(dF),size = smoothing)
def zero_runs(x):
iszero = np.concatenate(([0], np.equal(x, 0).view(np.int8), [0]))
absdiff = np.abs(np.diff(iszero))
ranges = np.where(absdiff == 1)[0].reshape(-1, 2)
return ranges
# Find ranges where second derivative is zero
# Values under eps are assumed to be zero.
eps = np.quantile(abs(d2F),tolerance)
smalld2F = (abs(d2F) <= eps)
# Find repititions in the mask "smalld2F" (i.e. ranges where d2F is constantly zero)
p = zero_runs(np.diff(smalld2F))
# np.diff(p) gives the length of each range found.
# only accept plateaus of min_length
plateaus = p[(np.diff(p) > min_length).flatten()]
return (plateaus)
plateaus = find_plateaus(test, min_length=5, tolerance = 0.02, smoothing=11)
plateaus = np.ravel(plateaus, order = 'A')
plateaus = plateaus.tolist()
print(plateaus)
test2['T&F'] = np.nan
for i in test2.index:
if i in plateaus:
test2.loc[i,['T&F']] = test2.loc[i,'data']
else :
test2.loc[i,['T&F']] = 0
fig, ax = plt.subplots(figsize=(15,6))
ax.plot(test2.index, test2['data'], color='black', label = 'time_series')
ax.scatter(test2.index,test2['T&F'], color='red', label = 'D910')
plt.legend()
plt.show();
Do you know any libraries or methods that can be used?
I want to recognize the parts marked in the picture below.
enter image description here
Still in progress, but found the answer.
First, make the np array multidimensional.
ex) time_step = 3
.....
Then, using np.std(), find the standard deviation,
After checking, you can set the standard deviation range to recognize the included range.
I would like to create a pyhton script that simulates sunrise for some of my Philips Hue lights connected to Home Assistant.
What I'm trying to achieve is to follow a sigmoid / s-shaped curve for the brightness and kelvin values.
I would like the brightness to go from 1 to 100 (%), and the Kelvin values to go from 2500 to 4000.
My current script is doing this in a linear way:
#import time
def sunrise(entity_id, minutes, updatesecs=10, startbrightness=1, endbrightness=100, startkelvin=2500, endkelvin=4000):
# Set current brightness and kelvin to the staring values
currentbrightness=startbrightness
currentkelvin=startkelvin
# Calculate the needed iterations for the while loop
numberofiterations=minutes*60/updatesecs
kelvinincreasebyiteration=(endkelvin-startkelvin)/numberofiterations
i=0
while(i<=numberofiterations):
# Set new brightness value
currentbrightness = currentbrightness+endbrightness/numberofiterations
currentkelvin = currentkelvin+kelvinincreasebyiteration
if currentbrightness <= endbrightness:
#print(round(currentbrightness)) # This value will be used for setting the brightness
#print(round(currentkelvin))
service_data = {"entity_id": entity_id, "kelvin": currentkelvin, "brightness_pct": currentbrightness, "transition": updatesecs-1}
hass.services.call("light", "turn_on", service_data, False)
time.sleep(updatesecs)
else:
break
entity_id = data.get("entity_id")
minutes = data.get("minutes")
updatesecs = data.get("updatesecs")
sunrise(entity_id,minutes,updatesecs)
Any ideas for setting the brightness/kelvin with s-shaped values instead of linear is appreciated.
You could be able to simply iterate over the final df and use brightness and kelvin values, sleeping for a minute or so each interval and call your api.
import numpy as np
import seaborn as sns
import pandas as pd
import math
import matplotlib.pyplot as plt
def sigmoid(x):
return math.exp(-np.logaddexp(0, -x))
# You could change 60 to some other interval if you want
t = [(i,sigmoid(x)) for i,x in enumerate(np.linspace(-10,10,60))]
# df of time interval and y value
df = pd.DataFrame(t)
df.columns = ['time','sig']
# multiply sig by 100 to scale up to a percent for brightness
df['brightness'] = (df['sig'] * 100).astype(int)+1
# Scale sig values to 2500,4000 for kelvin
a = df.sig.values
df['kelvin'] = np.interp(a, (a.min(), a.max()), (2500, 4000)).astype(int)
fig, (ax1, ax2) = plt.subplots(ncols=2, sharey=False)
sns.lineplot(data=df,x='time',y='brightness', ax=ax1)
sns.lineplot(data=df,x='time',y='kelvin', ax=ax2)
I want to test the low volatility factor for some market other than equities. Contradiccting finance 101, it has been Shown that low volatility stocks outperform high volatility stocks (see, for example, Baker, Malcolm, Brendan Bradley, and Jeffrey Wurgler (2011), “Benchmarks as Limits to Arbitrage: Understanding the Low-Volatility Anomaly”, Financial Analyst Journal, Vol. 67, No. 1, pp. 40–54.)
So what I want to do is construct the low vola factor by following the methodology of Jegadeesh and Titman (1993), namely raning stocks according to their previous j historical volatility and short top 30% (the most volatile) and Long the bottom 30% (the least volatile), and hold that Long-short Portfolio for k periods. Therefore, a 3-3 j-k Portfolio would mean, looking at the past 3 months of historical volatility (j), and hold that Portfolio for the following 3 months (k).
I have written some Code, and the j part Can be easily managed by simply increasing or decreasing the window of the rolling window vola calculation. The part I am struggling with is the k part, how this could be done. Unfortunately, I couldnt find many examples online.
In addition, I was wondering if my Code is correct or if I did any mistake, since it surprisingly did not work, regardless of the dataset I used. I am not sure whether this is the right place to ask, but if someone could take a look at it that would be great and might be helpful to others planning to implement a strategy like this as well.
Below is a simple working example with just 10 stocks. As I said, I want to implement it for some other assets, but this Code should work. You just have to use your own API key in line 16. Thanks a lot!
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import quandl
import pickle
import scipy.optimize as sco
from scipy.ndimage.interpolation import shift
import matplotlib.pyplot as plt
##################
# Low volatility #
##################
quandl.ApiConfig.api_key = 'Your key here'
stocks = ['MSFT','AAPL','AMZN','FB','BRK.B','JPM','GOOG','JNJ','V','PG','XOM']
data = quandl.get_table('WIKI/PRICES', ticker = stocks,
qopts = { 'columns': ['date', 'ticker', 'adj_close'] },
date = { 'gte': '2016-1-1', 'lte': '2019-11-3' }, paginate=True)
# with open("data.pkl", "wb") as pickle_file:
# pickle.dump(data, pickle_file)
# with open("data.pkl", "rb") as pickle_file:
# data = pickle.load(pickle_file)
data = data.pivot_table(index='date', columns='ticker', values='adj_close')
data = data.groupby(pd.Grouper(freq="M")).mean() # convert from daily to monthly prices
returns = (np.log(data) - np.log(data.shift(1))).dropna()
stds = returns.rolling(12).std()
stds = stds.values # convert to numpy array
list = []
for x in range(0, stds.shape[0]): # for each row in std matrix, create decile buckets (dec -> breakpoint to the next bucket)
for y in range(0,100,10):
dec = np.percentile(stds[x], y)
list.append(dec)
list = np.array(list) # convert list to numpy array
list = np.reshape(list, (stds.shape[0], -1)) # reshape the array such that it has the same format as returns (here: (26,10))
inds = []
for x in range(0, stds.shape[0]): # if the return is in the lower 30%, allocate a -1 to the asset. If it is in the upper 30%, allocate a 1. 0 otherwise.
ind = np.digitize(stds[x], list[x])
for x in range(0, ind.shape[0]):
if ind[x] <= 3:
ind[x] = 1
elif ind[x] >= 8:
ind[x] = -1
else:
ind[x] = 0
inds.append(ind)
inds = np.array(inds)
inds = inds.astype(np.float32)
for x in inds: # divide -1, 1 and 0 by the respective total number of counts of -1, 1 and 0, such that they sum up to -1 and 1 (beta neutral long-short)
ones = np.count_nonzero(x == 1) # count the number of 1
minus_ones = np.count_nonzero(x == -1) # count the number of -1
zeros = np.count_nonzero(x == 0) # count the number of 0
for y in range(0, inds.shape[1]):
if x[y] == 1:
x[y] = x[y] / ones
elif x[y] == -1:
x[y] = x[y] / minus_ones
else:
x[y] = x[y] / zeros
returns = returns.shift(periods=-1).values # shift returns one period back, and create numpy array
pf_returns = np.sum((inds*returns), axis=1) # multiply returns with weights, and sum up
pf_returns = pd.DataFrame(pf_returns)
print("---")
print(pf_returns.describe())
# Plot
pf_returns_indexed = 100 * (1 + pf_returns).cumprod()
pf_returns_indexed = pf_returns_indexed.plot(linewidth=1.2) # change line width
plt.show()
The Matplotlib or Seaborn box plot gives the interquartile range between the 25th percentile and 75th percentile. Is there a way to give custom interquartile range for the Boxplot ? I need to get the box plot such that the interquartile range is between 10th percentile and 90th percentile. Looked up on google and other sources, came to know about getting custom whiskers on the box plot but not custom interquartile range. Hoping would get some useful solutions here.
Yes, it is possible to plot a boxplot with box edges at any percentiles you desire.
Convention
With box and whisker plots it is convention to plot the 25th and 75th percentiles of the data. Thus, you should be aware that departing from this convention puts you at risk of misleading readers. You should also carefully consider what altering the box percentiles means to outlier classification and the whiskers of the boxplot.
Quick solution
A quick fix (ignoring any implications for whisker locations) is to compute the boxplot statistics we desire, alter the locations of q1 and q3, and then plot with ax.bxp:
import matplotlib.cbook as cbook
import matplotlib.pyplot as plt
import numpy as np
# Generate some random data to visualise
np.random.seed(2019)
data = np.random.normal(size=100)
stats = {}
# Compute the boxplot stats (as in the default matplotlib implementation)
stats['A'] = cbook.boxplot_stats(data, labels='A')[0]
stats['B'] = cbook.boxplot_stats(data, labels='B')[0]
stats['C'] = cbook.boxplot_stats(data, labels='C')[0]
# For box A compute the 1st and 99th percentiles
stats['A']['q1'], stats['A']['q3'] = np.percentile(data, [1, 99])
# For box B compute the 10th and 90th percentiles
stats['B']['q1'], stats['B']['q3'] = np.percentile(data, [10, 90])
# For box C compute the 25th and 75th percentiles (matplotlib default)
stats['C']['q1'], stats['C']['q3'] = np.percentile(data, [25, 75])
fig, ax = plt.subplots(1, 1)
# Plot boxplots from our computed statistics
ax.bxp([stats['A'], stats['B'], stats['C']], positions=range(3))
However, viewing the plot produced we see that altering q1 and q3 whilst leaving the whiskers unchanged may not be a sensible idea. You could counter this by recomputing eg. stats['A']['iqr'] and the whisker locations stats['A']['whishi'] and stats['A']['whislo'].
A more complete solution
Looking through matplotlib's source code we find that matplotlib uses matplotlib.cbook.boxplot_stats to compute the statistics used in the boxplot.
Within boxplot_stats we find the code q1, med, q3 = np.percentile(x, [25, 50, 75]). This is the line we can alter to change the plotted percentiles.
So a potential solution would be to make a copy of matplotlib.cbook.boxplot_stats and alter it as we desire. Here I call the function my_boxplot_stats and add an argument percents to make it easy to alter the locations of q1 and q3.
import itertools
from matplotlib.cbook import _reshape_2D
import matplotlib.pyplot as plt
import numpy as np
# Function adapted from matplotlib.cbook
def my_boxplot_stats(X, whis=1.5, bootstrap=None, labels=None,
autorange=False, percents=[25, 75]):
def _bootstrap_median(data, N=5000):
# determine 95% confidence intervals of the median
M = len(data)
percentiles = [2.5, 97.5]
bs_index = np.random.randint(M, size=(N, M))
bsData = data[bs_index]
estimate = np.median(bsData, axis=1, overwrite_input=True)
CI = np.percentile(estimate, percentiles)
return CI
def _compute_conf_interval(data, med, iqr, bootstrap):
if bootstrap is not None:
# Do a bootstrap estimate of notch locations.
# get conf. intervals around median
CI = _bootstrap_median(data, N=bootstrap)
notch_min = CI[0]
notch_max = CI[1]
else:
N = len(data)
notch_min = med - 1.57 * iqr / np.sqrt(N)
notch_max = med + 1.57 * iqr / np.sqrt(N)
return notch_min, notch_max
# output is a list of dicts
bxpstats = []
# convert X to a list of lists
X = _reshape_2D(X, "X")
ncols = len(X)
if labels is None:
labels = itertools.repeat(None)
elif len(labels) != ncols:
raise ValueError("Dimensions of labels and X must be compatible")
input_whis = whis
for ii, (x, label) in enumerate(zip(X, labels)):
# empty dict
stats = {}
if label is not None:
stats['label'] = label
# restore whis to the input values in case it got changed in the loop
whis = input_whis
# note tricksyness, append up here and then mutate below
bxpstats.append(stats)
# if empty, bail
if len(x) == 0:
stats['fliers'] = np.array([])
stats['mean'] = np.nan
stats['med'] = np.nan
stats['q1'] = np.nan
stats['q3'] = np.nan
stats['cilo'] = np.nan
stats['cihi'] = np.nan
stats['whislo'] = np.nan
stats['whishi'] = np.nan
stats['med'] = np.nan
continue
# up-convert to an array, just to be safe
x = np.asarray(x)
# arithmetic mean
stats['mean'] = np.mean(x)
# median
med = np.percentile(x, 50)
## Altered line
q1, q3 = np.percentile(x, (percents[0], percents[1]))
# interquartile range
stats['iqr'] = q3 - q1
if stats['iqr'] == 0 and autorange:
whis = 'range'
# conf. interval around median
stats['cilo'], stats['cihi'] = _compute_conf_interval(
x, med, stats['iqr'], bootstrap
)
# lowest/highest non-outliers
if np.isscalar(whis):
if np.isreal(whis):
loval = q1 - whis * stats['iqr']
hival = q3 + whis * stats['iqr']
elif whis in ['range', 'limit', 'limits', 'min/max']:
loval = np.min(x)
hival = np.max(x)
else:
raise ValueError('whis must be a float, valid string, or list '
'of percentiles')
else:
loval = np.percentile(x, whis[0])
hival = np.percentile(x, whis[1])
# get high extreme
wiskhi = np.compress(x <= hival, x)
if len(wiskhi) == 0 or np.max(wiskhi) < q3:
stats['whishi'] = q3
else:
stats['whishi'] = np.max(wiskhi)
# get low extreme
wisklo = np.compress(x >= loval, x)
if len(wisklo) == 0 or np.min(wisklo) > q1:
stats['whislo'] = q1
else:
stats['whislo'] = np.min(wisklo)
# compute a single array of outliers
stats['fliers'] = np.hstack([
np.compress(x < stats['whislo'], x),
np.compress(x > stats['whishi'], x)
])
# add in the remaining stats
stats['q1'], stats['med'], stats['q3'] = q1, med, q3
return bxpstats
With this in place we can compute our statistics and then plot with plt.bxp.
# Generate some random data to visualise
np.random.seed(2019)
data = np.random.normal(size=100)
stats = {}
# Compute the boxplot stats with our desired percentiles
stats['A'] = my_boxplot_stats(data, labels='A', percents=[1, 99])[0]
stats['B'] = my_boxplot_stats(data, labels='B', percents=[10, 90])[0]
stats['C'] = my_boxplot_stats(data, labels='C', percents=[25, 75])[0]
fig, ax = plt.subplots(1, 1)
# Plot boxplots from our computed statistics
ax.bxp([stats['A'], stats['B'], stats['C']], positions=range(3))
See that with this solution the whiskers are adjusted in our function based on our selected percentiles.:
I'm new to Python and Scipy. Currently I am trying to plot a p-type transistor transfer curve in matplotlib. It is sectionwise defined and I am struggeling to find a good way to get the resulting curve. What I have so far is:
import matplotlib.pyplot as plt
import numpy as np
from scipy.constants import epsilon_0
V_GS = np.linspace(-15, 10, 100) # V
V_th = 1.9 # V
V_DS = -10 # V
mu_p = 0.1e-4 # m²/Vs
epsilon_r = 7.1
W = 200e-6 # m
L = 10e-6 # m
d = 70e-9 # m
C_G = epsilon_0*epsilon_r/d
beta = -mu_p*C_G*W/L
Ids_cutoff = np.empty(100); Ids_cutoff.fill(-1e-12)
Ids_lin = beta*((V_GS-V_th)*V_DS-V_DS**2/2)
Ids_sat = beta*1/2*(V_GS-V_th)**2
plt.plot(V_GS, Ids_lin, label='lin')
plt.plot(V_GS, Ids_sat, label='sat')
plt.plot(V_GS, Ids_cutoff, label='cutoff')
plt.xlabel('V_GS [V]')
plt.ylabel('I [A]')
plt.legend(loc=0)
plt.show()
This gives me the three curves over the complete V_GS range. Now I would like to define
Ids = Ids_cutoff for V_GS >= V_th
Ids = Ids_lin for V_GS < V_th; V_DS >= V_GS - V_th
Ids = Ids_sat for V_GS < V_th; V_DS < V_GS - V_th
I found an example for np.vectorize() but somehow I am struggeling to understand how to work with these arrays. I could create a for loop that goes through all the values but I am pretty sure that there are more effective ways to do this.
Besides deriving a list of values for Ids and plotting it vs V_GS is there also a possibility to just sectionswise plot the three equations with matplotlib as one curve?
Do you want to fill the array Vds according to your selectors?
Vds = np.zeros_like(V_GS) # for the same shape
Vds[V_GS >= V_th] = Ids_cutoff
Vds[(V_GS < V_th) & (V_DS >= V_GS - V_th)] = Ids_lin
Vds[(V_GS < V_th) & (V_DS < V_GS - V_th)] = Ids_sat
By plotting sectionwise, you mean leaving out a certain range? You can use np.nan for that:
plt.plot([0,1,2,3,np.nan,10,11], np.arange(7))
results in:
As Not a Number is not plottable, no line will be drawn.
After having read more into the details of numpy I finally figured out a way to do this:
Ids_cutoff = -1e-12 # instead of creating an array as posted above
# create masks for range of validity for linear and saturation region
is_lin = np.zeros_like(V_GS, dtype=np.bool_)
is_lin[(V_GS < V_th) & (V_DS >= V_GS - V_th)] = 'TRUE'
is_sat = np.zeros_like(V_GS, dtype=np.bool_)
is_sat[(V_GS < V_th) & (V_DS < V_GS - V_th)] = 'TRUE'
# create final array and fill with off-current
Ids = np.zeros_like(V_GS); Ids.fill(Ids_cutoff)
# replace by values for linear and saturation region where valid
Ids = np.where(is_lin, Ids_lin, Ids)
Ids = np.where(is_sat, Ids_sat, Ids)
plt.plot(V_GS, Ids, '*', label='final')