Related
I have this Python code, but it's already running for 24h and doesn't seem to print the result for now.
I don't know how long it will take.
Can someone help me to optimize this code?
The code is to find the best performance for trading RSI divergence in a certain period.
It first defines some parameters for the RSI.
The code then goes through every possible combination to find the best combination of parameters to have the best performances.
I'm not really an expert.
I don't really know how i can change the code as i'm no expert.
Happy to learn.
Thank you guys.
import pandas as pd
import numpy as np
import ta
def load_data(file_path, start_date, end_date):
"""
Loads data for the specified symbol and date range from a CSV file
"""
df = pd.read_csv(file_path)
if 'Date' not in df.columns:
df['Date'] = pd.to_datetime(df.index)
df['Date'] = pd.to_datetime(df['Date'])
df = df.set_index('Date')
df = df[(df.index >= start_date) & (df.index <= end_date)]
return df
def calc_rsi(df, n):
"""
Calculates the relative strength index (RSI) for the given dataframe and window size
"""
delta = df["Close"].diff()
gain = delta.where(delta > 0, 0)
loss = abs(delta.where(delta < 0, 0))
avg_gain = gain.rolling(window=n).mean()
avg_loss = loss.rolling(window=n).mean()
rs = avg_gain / avg_loss
rsi = 100 - (100 / (1 + rs))
return rsi
def calc_pivot_point(df, pivot_point_type, pivot_point_n):
"""
Calculates the pivot point for the given dataframe and pivot point type
"""
if pivot_point_type == "Close":
pivot_point = df["Close"].rolling(window=pivot_point_n).mean()
elif pivot_point_type == "High/Low":
pivot_point = (df["High"].rolling(window=pivot_point_n).mean() + df["Low"].rolling(window=pivot_point_n).mean()) / 2
else:
raise ValueError("Invalid pivot point type")
return pivot_point
def calc_divergence(df, rsi, pivot_point, divergence_type, max_pivot_point, max_bars_to_check):
"""
Calculates the divergence for the given dataframe and parameters
"""
if divergence_type == "Regular":
pivot_point_delta = pivot_point.diff()
pivot_point_delta_sign = pivot_point_delta.where(pivot_point_delta > 0, -1)
pivot_point_delta_sign[pivot_point_delta_sign > 0] = 1
rsi_delta = rsi.diff()
rsi_delta_sign = rsi_delta.where(rsi_delta > 0, -1)
rsi_delta_sign[rsi_delta_sign > 0] = 1
divergence = pivot_point_delta_sign * rsi_delta_sign
divergence[divergence < 0] = -1
divergence = divergence.rolling(window=max_pivot_point).sum()
divergence = divergence.rolling(window=max_bars_to_check).sum()
divergence = divergence.where(divergence > 0, 0)
divergence[divergence < 0] = -1
else:
raise ValueError("Invalid divergence type")
return divergence
def backtest(df, rsi_period, pivot_point_type, pivot_point_n, divergence_type, max_pivot_point, max_bars_to_check, trailing_stop, starting_capital):
"""
Backtests the strategy for the given dataframe and parameters
"""
rsi = calc_rsi(df, rsi_period)
pivot_point = calc_pivot_point(df, pivot_point_type, pivot_point_n)
divergence = calc_divergence(df, rsi, pivot_point, divergence_type, max_pivot_point, max_bars_to_check)
positions = pd.DataFrame(index=df.index, columns=["Position", "Stop Loss"])
positions["Position"] = 0.0
positions["Stop Loss"] = 0.0
capital = starting_capital
for i, row in enumerate(df.iterrows()):
date = row[0]
close = row[1]["Close"]
rsi_val = rsi.loc[date]
pivot_val = pivot_point.loc[date]
divergence_val = divergence.loc[date]
if divergence_val > 0 and positions.loc[date]["Position"] == 0:
positions.at[date, "Position"] = capital / close
positions.at[date, "Stop Loss"] = close * (1 - trailing_stop)
elif divergence_val < 0 and positions.loc[date]["Position"] > 0:
capital = positions.loc[date]["Position"] * close
positions.at[date, "Position"] = 0.0
positions.at[date, "Stop Loss"] = 0.0
elif close < positions.loc[date]["Stop Loss"] and positions.loc[date]["Position"] > 0:
capital = positions.loc[date]["Position"] * close
positions.at[date, "Position"] = 0.0
positions.at[date, "Stop Loss"] = 0.0
return capital
def find_best_iteration(df, start_rsi_period, end_rsi_period, pivot_point_types, start_pivot_point_n, end_pivot_point_n, divergence_types, start_max_pivot_point, end_max_pivot_point, start_max_bars_to_check, end_max_bars_to_check, start_trailing_stop, end_trailing_stop, starting_capital):
"""
Finds the best iteration for the given parameters
"""
best_result = 0.0
best_params = None
for rsi_period in range(start_rsi_period, end_rsi_period + 1):
for pivot_point_type in pivot_point_types:
for pivot_point_n in range(start_pivot_point_n, end_pivot_point_n + 1):
for divergence_type in divergence_types:
for max_pivot_point in range(start_max_pivot_point, end_max_pivot_point + 1):
for max_bars_to_check in range(start_max_bars_to_check, end_max_bars_to_check + 1):
for trailing_stop in np.arange(start_trailing_stop, end_trailing_stop + 0.01, 0.01):
result = backtest(df, rsi_period, pivot_point_type, pivot_point_n, divergence_type, max_pivot_point, max_bars_to_check, trailing_stop, starting_capital)
if result > best_result:
best_result = result
best_params = (rsi_period, pivot_point_type, pivot_point_n, divergence_type, max_pivot_point, max_bars_to_check, trailing_stop)
return best_result, best_params
# Define the parameters
file_path = 'C:\\Users\\The Death\\Downloads\\Binance_BTCUSDT_spot.csv'
start_date = "2020-03-16"
end_date = "2021-04-12"
df = load_data(file_path, start_date, end_date)
def load_data(start_date, end_date):
# Your code to load the data for the specified date range
# ...
return df
# Define the parameters for the backtesting
start_rsi_period = 1
end_rsi_period = 30
pivot_point_types = ["Close", "High/Low"]
start_pivot_point_n = 1
end_pivot_point_n = 50
divergence_types = ["Regular"]
start_max_pivot_point = 1
end_max_pivot_point = 20
start_max_bars_to_check = 30
end_max_bars_to_check = 200
start_trailing_stop = 0.01
end_trailing_stop = 0.5
starting_capital = 10000
# Run the backtesting
df = load_data(start_date, end_date)
best_result, best_params = find_best_iteration(df, start_rsi_period, end_rsi_period, pivot_point_types, start_pivot_point_n, end_pivot_point_n, divergence_types, start_max_pivot_point, end_max_pivot_point, start_max_bars_to_check, end_max_bars_to_check, start_trailing_stop, end_trailing_stop, starting_capital)
# Print the results
print("Best result: ", best_result)
print("Best parameters: ", best_params)
I have two recommendations after I scroll up your code:
Reduce the usage of for loop. As you increase a layer of for loop (initial is O(n), the time complexity of your code will increase by a power. In your find_best_iteration() there is about 7 layers of for loop, this is extremely cost your time.
Save and process your data in numpy.array() instead of pd.dataframe(). Dataframe is a class that contains too many unused attributes, and its performance is also slower than numpy.array.
You can try the following methods to improve the performance:
The backtest() function is used many times inside the find_best_iteration() function under many for loops, thus the positions variable inside backtest() is being updated frequently which can be show when the positions variable is a Dataframe. You can consider using numpy array for the positions variable that is optimized for updates.
You can try using the multiprocessing module in Python to parallelize the calculation of the divergence variable.
Hope this help!
It's been days I spent trying to code (and search) a python function to get RSI that match TradingView results but without success (I'm new to Python).
The closest results I get for RSI is this function, but still different (and the fact exponential is used, sometimes result is pretty close, sometimes there is a pretty huge difference):
def rsi_tradingview(ohlc: pd.DataFrame, period: int = 14, round_rsi: bool = True):
delta = ohlc["close"].diff()
up = delta.copy()
up[up < 0] = 0
up = pd.Series.ewm(up, alpha=1/period).mean()
down = delta.copy()
down[down > 0] = 0
down *= -1
down = pd.Series.ewm(down, alpha=1/period).mean()
rsi = np.where(up == 0, 0, np.where(down == 0, 100, 100 - (100 / (1 + up / down))))
return np.round(rsi, 2) if round_rsi else rsi
My code looks like this:
pairs = ["BTCUSDT", "PONDUSDT"]
def get_historical_candles():
record = client.get_historical_klines(pair, Client.KLINE_INTERVAL_5MINUTE, "3 hour ago UTC")
myList = []
try:
for item in record:
n_item = []
int_ts = int(item[0] / 1000)
n_item.append(float(item[4])) # close
myList.append(n_item)
except Exception as error:
debug_logger.debug(error)
new_ohlc = pd.DataFrame(myList, columns=['close'])
return new_ohlc
def rsi_tradingview(ohlc: all_candles, period: int = 14, round_rsi: bool = False):
delta = all_candles.diff()
up = delta.copy()
up[up < 0] = 0
up = pd.Series.ewm(up, alpha=1/period).mean()
down = delta.copy()
down[down > 0] = 0
down *= -1
down = pd.Series.ewm(down, alpha=1/period).mean()
rsi = np.where(up == 0, 0, np.where(down == 0, 100, 100 - (100 / (1 + up / down))))
return np.round(rsi, 2) if round_rsi else rsi
for pair in pairs:
all_candles = get_historical_candles()
test_rsi = rsi_tradingview(all_candles, 14, False)
test_rsi_final = test_rsi[-1]
print(test_rsi_final)
I compare results with tradingview_ta this way, that give correct results (I can't just use this function to get RSI because I need the RSI to calculate StochRSI):
for pair in pairs:
test = TA_Handler(
symbol=pair,
screener="CRYPTO",
exchange="BINANCE",
interval=Interval.INTERVAL_5_MINUTES
)
print(test.get_analysis().indicators["RSI"])
If this can help, here are the codes on TradingView to get RSI and how is calculated RMA
# RSI
study(title="Relative Strength Index", shorttitle="RSI", format=format.price, precision=2, resolution="")
len = input(14, minval=1, title="Length")
src = input(close, "Source", type = input.source)
up = rma(max(change(src), 0), len)
down = rma(-min(change(src), 0), len)
rsi = down == 0 ? 100 : up == 0 ? 0 : 100 - (100 / (1 + up / down))
plot(rsi, "RSI", color=#7E57C2)
band1 = hline(70, "Upper Band", color=#787B86)
bandm = hline(50, "Middle Band", color=color.new(#787B86, 50))
band0 = hline(30, "Lower Band", color=#787B86)
fill(band1, band0, color=color.rgb(126, 87, 194, 90), title="Background")
# RMA
plot(rma(close, 15))
//the same on pine
pine_rma(src, length) =>
alpha = 1/length
sum = 0.0
sum := na(sum[1]) ? sma(src, length) : alpha * src + (1 - alpha) * nz(sum[1])
plot(pine_rma(close, 15))
Please guys, help me to find what is wrong. :(
And Thank you by advance! Thanks for reading!
I have the following R code that I wanted to change into Python but have ran into a slip. When running the code it seems when a cancel buy order or sell order is made it uses one of the number of buy orders available or number of sell orders available (nb,ns) so when the code runs it runs out of orders after many generation of events. So it seems the generation function I have tried to implement is not generating enough orders. So the orderbook is decreasing faster then increasing. That is why my code gets a keyerror because it ends up at posns not found in my book for canceling buy orders or sell orders. Below is the R code and then the Python code.
R code.
#Book setup
L <- 30 #Set number of price levels to be included in iterations
# Generate initial book
LL <- 1000 #Total number of levels in buy and sell books
# Initialize book with asymptotic depth of 5 shares
initializeBook5 <- function()
{
Price <<- -LL:LL
# Book shape is set to equal long-term average from simulation
buySize <<- c(rep(5,LL-8),5,4,4,3,3,2,2,1,rep(0,LL+1))
sellSize <<- c(rep(0,LL),0,1,2,2,3,3,4,4,5,rep(5,LL-8))
book <<- data.frame(Price, buySize, sellSize )
if(logging==T){eventLog <<- as.data.frame(matrix(0,nrow=numEvents,ncol=2))
colnames(eventLog)<<-c("Type","Price")
count <<- 0
eventType <<- c("LB","LS","CB","CS","MB","MS")
eventDescr <<- NA}
}
#Various utility functions
bestOffer <- function(){min(book$Price[book$sellSize>0])}
bestBid <- function(){max(book$Price[book$buySize>0])}
spread <- function(){bestOffer()-bestBid()}
mid <- function(){(bestOffer()+bestBid())/2}
#Functions to find mid-market
bidPosn<-function()length(book$buySize[book$Price<=bestBid()])
askPosn<-function()length(book$sellSize[book$Price<=bestOffer()])
midPosn<-function(){floor((bidPosn()+askPosn())/2)}
#Display center of book
go <- function(){book[(midPosn()-20):(midPosn()+20),]}
#Display book shape
bookShape<-function(band){c(book$buySize[midPosn()+(-band:0)],book$sellSize[midPosn()+1:band])}
bookPlot<-function(band){
plot((-band:band),bookShape(band),
col="red",type="l",xlab="Price",ylab="Quantity")
}
#Choose from L whole numbers in (1,...,L) with uniform probability
pick <- function(m){sample(1:m,1)}
# Switch logging on
logging <- T
#Buy limit order
limitBuyOrder <- function(price=NA){
if (is.na(price))
{prx <<- (bestOffer()-pick(L))}
else prx <<-price
if(logging==T){eventLog[count,]<<- c("LB",prx)}
book$buySize[book$Price==prx]<<-book$buySize[book$Price==prx]+1}
#Sell limit order
limitSellOrder <- function(price=NA){
if (is.na(price))
{prx <<- (bestBid()+pick(L))}
else prx <<-price
if(logging==T){eventLog[count,] <<- c("LS",prx)}
book$sellSize[book$Price==prx]<<-book$sellSize[book$Price==prx]+1}
#Cancel buy order
cancelBuyOrder<-function(price=NA){
q<-pick(nb)
tmp <- cumsum(rev(book$buySize)) #Cumulative buy size from 0
posn <- length(tmp[tmp>=q]) #gives position in list where cumulative size >q
prx <<- book$Price[posn]
if (!is.na(price)) {prx <<-price}
if(logging==T){eventLog[count,]<<- c("CB",prx)}
book$buySize[posn]<<-book$buySize[posn]-1}
#Cancel sell order
cancelSellOrder<-function(price=NA){
q<-pick(ns)
tmp <- cumsum(book$sellSize) #Cumulative sell size from 0
posn <- length(tmp[tmp<q])+1
prx <<- book$Price[posn]
if (!is.na(price)) {prx <<-price}
if(logging==T){eventLog[count,]<<- c("CS",prx)}
book$sellSize[posn]<<-book$sellSize[posn]-1}
#Market buy order
marketBuyOrder <- function(){
prx <<- bestOffer()
if(logging==T){eventLog[count,]<<- c("MB",prx)}
book$sellSize[book$Price==prx]<<-book$sellSize[book$Price==prx]-1}
#Market sell order
marketSellOrder <- function(){
prx <<- bestBid()
if(logging==T){eventLog[count,]<<- c("MS",prx)}
book$buySize[book$Price==prx]<<-book$buySize[book$Price==prx]-1}
#Generate an event and update the buy and sell books
#Note that limit orders may be placed inside the spread
generateEvent <- function()
{
nb <<- sum(book$buySize[book$Price>=(bestOffer()-L)]); # Number of cancelable buy orders
ns <<- sum(book$sellSize[book$Price<=(bestBid()+L)]); # Number of cancelable sell orders
eventRate <- nb*delta+ns*delta + mu +2*L*alpha;
probEvent <- c(L*alpha,L*alpha,nb*delta,ns*delta,mu/2,mu/2)/eventRate;
m <- sample(1:6, 1, replace = TRUE, probEvent); #Choose event type
switch(m,
limitBuyOrder(),
limitSellOrder(),
cancelBuyOrder(),
cancelSellOrder(),
marketBuyOrder(),
marketSellOrder()
);
}
logging <- F
lambda <- 1
mus <- c(10,8,10,10)
nus <- c(1/5,1/5,1/6,1/8)
avgBookShapes<-as.data.frame(matrix(0,nrow=41,ncol=4))
for(i in 1:4){
mu<-mus[i]
nu<-nus[i]
initializeBook5()
numEvents <- 100000 # Average over 100,000 events
avgBookShape <- bookShape(20)/numEvents
for(count in 2:numEvents){
generateEvent()
avgBookShape <- avgBookShape+bookShape(20)/numEvents
}
avgBookShapes[,i]<-avgBookShape
}
Python code
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import random
import math
class Zibook():
def __init__(self,ll,l,alpha,mu,delta,num_events,logging=False):
self.ll = ll #total number of levels in buy and sell
self.l = l # set number of price levels to be included in iterations
self.alpha = alpha
self.mu = mu
self.delta = delta
self.num_events = num_events
self.logging = logging
price = np.array(list(range(-self.ll,self.ll +1,1)))
buy_size = np.array([5]*(self.ll-8) + [5,4,4,3,3,2,2,1] + [0]*(self.ll+1))
sell_size = np.array([0]*(self.ll) + [0,1,2,2,3,3,4,4,5] +[5]*(self.ll-8))
book = pd.DataFrame(index=price,columns=['Price','Buy Size','Sell Size','Type'])
book['Price'] = price
book['Buy Size'] = buy_size
book['Sell Size'] = sell_size
book = book.reset_index(drop=True)
self.book = book
event_type = ['LB','LS','CB','CS','MB','MS']
event_descr = np.nan
x = list(range(0,self.num_events,1))
event_log = pd.DataFrame(index=x,columns=['Type','Price'])
self.event_log = event_log
nb = sum(self.book.loc[self.book.Price >= (self.best_offer()-self.l), 'Buy Size']) #number of cancellable buy orders
ns = sum(self.book.loc[self.book.Price <= (self.best_bid()+self.l), 'Sell Size']) #number of cancellable sell order
self.nb = nb
self.ns = ns
def best_offer(self):
df = self.book
a = df.loc[df['Sell Size'] > 0,'Price'].min()
return a
def best_bid(self):
df = self.book
b = df.loc[df['Buy Size']>0 ,'Price'].max()
return b
def spread(self):
spread = (self.best_offer() - self.best_bid())/2
return spread
def mid(self):
mid = (self.best_offer() + self.best_bid())/2
return mid
def bidposn(self):
df = self.book
a = len(df.loc[df.Price <= self.best_bid(),'Buy Size'])-1
return a
def askposn(self):
df = self.book
a = len(df[df['Price'] <= self.best_offer()]['Sell Size']) -1
return a
def midposn(self):
df = self.book
a = ((self.bidposn()+self.askposn())//2)
return a
def centerbook(self):
df = self.book
mid = self.midposn()
return df[mid-20:mid +21]
def bookshape(self,band):
df = self.book
mid = self.midposn()
x = np.arange(-band,0)
y = [df.loc[(mid+el),'Buy Size'] for el in x]
x1 = np.arange(0,band+1 )
z = [df.loc[(mid + el), 'Sell Size'] for el in x1]
seq3 = np.concatenate((y,z),axis=0)
return seq3
def bookplot(self,band):
x = list(range(-band,band+1,1))
seq3 = self.bookshape(band)
plt.plot(x,seq3, color='red')
plt.xlabel('Price')
plt.ylabel('Quantity')
return plt.show()
def pick(self,l):
a= np.random.choice(l,1,replace=True,p=[1/l]*l)
return a[0]
def limitbuyorder(self,price=None):
if price == None :
price = (self.best_offer() - self.pick(self.l))
else:
price = price
df = self.book
if self.logging == True:
count = 0
eventlog = self.event_log
eventlog.loc[count,'Price'] = price
eventlog.loc[count,'Type'] = 'LB'
df.loc[df.Price==price,'Buy Size'] += 1
def limitsellorder(self,price=None):
if price == None :
price = (self.best_bid() + self.pick(self.l))
else:
price = price
df = self.book
if self.logging == True:
count = 0
eventlog = self.event_log
eventlog.loc[count,'Price'] = price
eventlog.loc[count,'Type'] = 'LS'
df.loc[df.Price==price,'Sell Size'] += 1
def cancelbuyorder(self,price=None):
df = self.book
if price == None:
q = self.pick(self.nb)
tmp = np.array(df['Buy Size'].to_list()[::-1]).cumsum()
posn = len(tmp[tmp>=q]) - 1
price = df.Price[posn]
df.loc[posn,'Buy Size'] -= 1
else:
price = price
df.loc[df.Price==price,'Buy Size'] -= 1
if self.logging == True:
count = 0
eventlog = self.event_log
eventlog.loc[count,'Price'] = price
eventlog.loc[count,'Type'] = 'CB'
def cancelsellorder(self,price=None):
#global ns
df = self.book
if price == None:
q = self.pick(self.ns)
tmp = np.array(df['Sell Size'].to_list()).cumsum()
posn = len(tmp[tmp<q])
price = df.Price[posn]
df.loc[posn,'Sell Size'] -= 1
else:
price = price
df.loc[df.Price==price,'Sell Size'] -= 1
if self.logging == True:
count = 0
eventlog = self.event_log
eventlog.loc[count,'Price'] = price
eventlog.loc[count,'Type'] = 'CS'
def marketbuyorder(self,price=None):
df = self.book
price = self.best_offer()
if self.logging == True:
count = 0
eventlog = self.event_log
eventlog.loc[count,'Price'] = price
eventlog.loc[count,'Type'] = 'MB'
df.loc[df.Price==price,'Sell Size'] -= 1
def marketsellorder(self,price=None):
df = self.book
price = self.best_bid()
if self.logging == True:
count = 0
eventlog = self.event_log
eventlog.loc[count,'Price'] = price
eventlog.loc[count,'Type'] = 'MS'
df.loc[df.Price==price,'Buy Size'] -= 1
def generateevent(self):
df = self.book
nb = sum(df.loc[df.Price >= (self.best_offer()-self.l), 'Buy Size']) #number of cancellable buy orders
ns = sum(df.loc[df.Price <= (self.best_bid()+self.l), 'Sell Size']) #number of cancellable sell order
eventRate = nb*self.delta +ns*self.delta + self.mu + 2*self.l*self.alpha
probEvent = (self.l*self.alpha + self.l*self.alpha + nb*self.delta + ns*self.delta + self.mu*0.5 + self.mu*0.5)/eventRate
a = np.random.choice(6,1,replace=True,p=[probEvent/6]*6)
idx = a[0]
z= [self.limitbuyorder(),self.limitsellorder(),self.cancelbuyorder(),self.cancelsellorder(),self.marketbuyorder(),self.marketsellorder()]
return z[idx]
alpha = 1
mu = 10
delta = 1/5
num_events = 100000
'''
a = Zibook(1000,30,alpha,mu,delta,num_events,logging=False)
a.limitbuyorder(price =a.best_bid())
a.limitbuyorder(price =a.best_bid())
a.bookplot(20)
#print(a.generateevent())
#print(a.cancelbuyorder(price=None))
'''
lalpha = [1,1,1,1]
lmu = [10,8,10,10]
ldelta = [1/5,1/5,1/6,1/8]
length = len(lalpha)
Avgbookshapes = np.array([])
num_events = 100000
for i in range(0,length,1):
alpha = lalpha[i]
mu = lmu[i]
delta=ldelta[i]
a = Zibook(1000,30,alpha,mu,delta,num_events,logging=False)
for i1 in range(0,100,1):
a.generateevent()
#print(i1)
avgbookshape = a.bookshape(20)/num_events
#print(avgbookshape.shape)
for i3 in range(2,num_events+1,1):
a.generateevent()
#print(i3)
avgbookshape2 = a.bookshape(20)/num_events
#print(avgbookshape2.shape)
avgbookshape += avgbookshape2
Avgbookshapes = np.append(Avgbookshapes,avgbookshape)
np.save('my_array',Avgbookshapes)
Any help will be greatly appreciated (background on this algo is its a zero intelligence simulation of micromarketstructures https://github.com/zecophy/MTH9879-Market-Microstructure-Models/blob/master/HW1/9879HW1_Chenyu_Zhao_graded.ipynb )
just realized i didnt have the probabilities written out correctly in the generateevent function . But the code still decays to 0 orders below is the modified code in python anyone know why the orders decay instead of grow like in the R code? :
def generateevent(self):
df = self.book
nb = sum(df.loc[df.Price >= (self.best_offer()-self.l), 'Buy Size'])
ns = sum(df.loc[df.Price <= (self.best_bid()+self.l), 'Sell Size']) #number of cancellable sell order
eventRate = nb*self.delta +ns*self.delta + self.mu + 2*self.l*self.alpha
probEvent = np.array([self.l*self.alpha, self.l*self.alpha , nb*self.delta, ns*self.delta,self.mu*0.5,self.mu*0.5])/eventRate
#print(probEvent)
a = np.random.choice(6,1,replace=True,p=probEvent)
idx = a[0]
z= [self.limitbuyorder(),self.limitsellorder(),self.cancelbuyorder(),self.cancelsellorder(),self.marketbuyorder(),self.marketsellorder()]
return z[idx]
I have a dataframe of OHLCV data. I would like to know if anyone knows any tutorial or any way of finding ADX(Average directional movement ) using pandas?
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import datetime as dt
import numpy as nm
start=dt.datetime.today()-dt.timedelta(59)
end=dt.datetime.today()
df=pd.DataFrame(yf.download("MSFT", start=start, end=end))
The average directional index, or ADX, is the primary technical indicator among the five indicators that make up a technical trading system developed by J. Welles Wilder, Jr. and is calculated using the other indicators that make up the trading system. The ADX is primarily used as an indicator of momentum, or trend strength, but the total ADX system is also used as a directional indicator.
Directional movement is calculated by comparing the difference between two consecutive lows with the difference between their respective highs.
For the excel calculation of ADX this is a really good video:
https://www.youtube.com/watch?v=LKDJQLrXedg&t=387s
I was playing with this a little bit and found something that can help you with the issue:
def ADX(data: pd.DataFrame, period: int):
"""
Computes the ADX indicator.
"""
df = data.copy()
alpha = 1/period
# TR
df['H-L'] = df['High'] - df['Low']
df['H-C'] = np.abs(df['High'] - df['Close'].shift(1))
df['L-C'] = np.abs(df['Low'] - df['Close'].shift(1))
df['TR'] = df[['H-L', 'H-C', 'L-C']].max(axis=1)
del df['H-L'], df['H-C'], df['L-C']
# ATR
df['ATR'] = df['TR'].ewm(alpha=alpha, adjust=False).mean()
# +-DX
df['H-pH'] = df['High'] - df['High'].shift(1)
df['pL-L'] = df['Low'].shift(1) - df['Low']
df['+DX'] = np.where(
(df['H-pH'] > df['pL-L']) & (df['H-pH']>0),
df['H-pH'],
0.0
)
df['-DX'] = np.where(
(df['H-pH'] < df['pL-L']) & (df['pL-L']>0),
df['pL-L'],
0.0
)
del df['H-pH'], df['pL-L']
# +- DMI
df['S+DM'] = df['+DX'].ewm(alpha=alpha, adjust=False).mean()
df['S-DM'] = df['-DX'].ewm(alpha=alpha, adjust=False).mean()
df['+DMI'] = (df['S+DM']/df['ATR'])*100
df['-DMI'] = (df['S-DM']/df['ATR'])*100
del df['S+DM'], df['S-DM']
# ADX
df['DX'] = (np.abs(df['+DMI'] - df['-DMI'])/(df['+DMI'] + df['-DMI']))*100
df['ADX'] = df['DX'].ewm(alpha=alpha, adjust=False).mean()
del df['DX'], df['ATR'], df['TR'], df['-DX'], df['+DX'], df['+DMI'], df['-DMI']
return df
At the beginning the values aren't correct (as always with the EWM approach) but after several computations it converges to the correct value.
Math was taken from here.
def ADX(df):
def getCDM(df):
dmpos = df["High"][-1] - df["High"][-2]
dmneg = df["Low"][-2] - df["Low"][-1]
if dmpos > dmneg:
return dmpos
else:
return dmneg
def getDMnTR(df):
DMpos = []
DMneg = []
TRarr = []
n = round(len(df)/14)
idx = n
while n <= (len(df)):
dmpos = df["High"][n-1] - df["High"][n-2]
dmneg = df["Low"][n-2] - df["Low"][n-1]
DMpos.append(dmpos)
DMneg.append(dmneg)
a1 = df["High"][n-1] - df["High"][n-2]
a2 = df["High"][n-1] - df["Close"][n-2]
a3 = df["Low"][n-1] - df["Close"][n-2]
TRarr.append(max(a1,a2,a3))
n = idx + n
return DMpos, DMneg, TRarr
def getDI(df):
DMpos, DMneg, TR = getDMnTR(df)
CDM = getCDM(df)
POSsmooth = (sum(DMpos) - sum(DMpos)/len(DMpos) + CDM)
NEGsmooth = (sum(DMneg) - sum(DMneg)/len(DMneg) + CDM)
DIpos = (POSsmooth / (sum(TR)/len(TR))) *100
DIneg = (NEGsmooth / (sum(TR)/len(TR))) *100
return DIpos, DIneg
def getADX(df):
DIpos, DIneg = getDI(df)
dx = (abs(DIpos- DIneg) / abs(DIpos + DIneg)) * 100
ADX = dx/14
return ADX
return(getADX(df))
print(ADX(df))
This gives you the exact numbers as Tradingview and Thinkorswim.
import numpy as np
def ema(arr, periods=14, weight=1, init=None):
leading_na = np.where(~np.isnan(arr))[0][0]
arr = arr[leading_na:]
alpha = weight / (periods + (weight-1))
alpha_rev = 1 - alpha
n = arr.shape[0]
pows = alpha_rev**(np.arange(n+1))
out1 = np.array([])
if 0 in pows:
out1 = ema(arr[:int(len(arr)/2)], periods)
arr = arr[int(len(arr)/2) - 1:]
init = out1[-1]
n = arr.shape[0]
pows = alpha_rev**(np.arange(n+1))
scale_arr = 1/pows[:-1]
if init:
offset = init * pows[1:]
else:
offset = arr[0]*pows[1:]
pw0 = alpha*alpha_rev**(n-1)
mult = arr*pw0*scale_arr
cumsums = mult.cumsum()
out = offset + cumsums*scale_arr[::-1]
out = out[1:] if len(out1) > 0 else out
out = np.concatenate([out1, out])
out[:periods] = np.nan
out = np.concatenate(([np.nan]*leading_na, out))
return out
def atr(highs, lows, closes, periods=14, ema_weight=1):
hi = np.array(highs)
lo = np.array(lows)
c = np.array(closes)
tr = np.vstack([np.abs(hi[1:]-c[:-1]),
np.abs(lo[1:]-c[:-1]),
(hi-lo)[1:]]).max(axis=0)
atr = ema(tr, periods=periods, weight=ema_weight)
atr = np.concatenate([[np.nan], atr])
return atr
def adx(highs, lows, closes, periods=14):
highs = np.array(highs)
lows = np.array(lows)
closes = np.array(closes)
up = highs[1:] - highs[:-1]
down = lows[:-1] - lows[1:]
up_idx = up > down
down_idx = down > up
updm = np.zeros(len(up))
updm[up_idx] = up[up_idx]
updm[updm < 0] = 0
downdm = np.zeros(len(down))
downdm[down_idx] = down[down_idx]
downdm[downdm < 0] = 0
_atr = atr(highs, lows, closes, periods)[1:]
updi = 100 * ema(updm, periods) / _atr
downdi = 100 * ema(downdm, periods) / _atr
zeros = (updi + downdi == 0)
downdi[zeros] = .0000001
adx = 100 * np.abs(updi - downdi) / (updi + downdi)
adx = ema(np.concatenate([[np.nan], adx]), periods)
return adx
I have made the following custom environment:
class Market(gym.Env):
"""This env is for training a BUYING vwap beating algo, with
OpenAI gym reinforcemnt learning algorithms"""
metadata = {'render.modes': ['human']}
def __init__(self, list_of_df):
super(Market, self).__init__()
self.list_of_df = list_of_df
self.current_day = list_of_df[0]
self.reward_range = (-2147483647, 2147483647)
# self.A_Vol = 0
self.current_step = 0
self.last_ind_in_day = len(list_of_df[0]) - 1
# self.trade_size = 10
self.A_VWAP = 0
self.A_rolling_vol = 0
self.A_rolling_price = 0
self.A_vol_left = 1000
self.reward = 0
self.done = False
# To keep track of the AGENTS VWAP:
self.cum_VbyP = 0
self.cum_vol_traded = 0
self.purchase_vol = 80
self.action_space = spaces.Box(low=np.array([0, 0]), high=np.array([3, 1]), dtype=np.float16)
# Prices contains the OHLC for the 5 min interval
# Miliseconds from midnight
# Rolling VWAP for this time period
# The agents Rolling VWAP, A_VWAP
# The Vol of securities left to still buy, A_trgt_vol
# The Vol traded this time step in the market
self.observation_space = spaces.Box(
low=-2147483647, high=2147483647, shape=(1, len(list_of_df[1].iloc[2])), dtype=np.float16)
def _take_action(self, a):
# Only buy if there are still shares to be bought today,
if (self.A_vol_left > 0):
# Purchase a * volume of a trade
vol = self.purchase_vol * a[0]
print(vol)
# But if there arent enough shares still to buy
if (vol > self.A_vol_left):
vol = self.A_vol_left
self.A_vol_left = self.A_vol_left - vol
# Increase the volume of shares traded:
self.cum_vol_traded = self.cum_vol_traded + vol
if (vol > 0):
# Sample a random price between high and low for this interval:
price = round( random.uniform(self.current_day['Low'].iloc[self.current_step],
self.current_day['High'].iloc[self.current_step]))
# Update cumulative price multiplied by volume:
self.cum_VbyP = self.cum_VbyP + (vol * price)
# Update the Agents VWAP, A_VWAP
self.A_VWAP = self.cum_VbyP / self.cum_vol_traded
def _next_observation(self):
frame = np.array([ self.current_day.iloc[self.current_step]])
frame[:,-1] = self.A_VWAP
frame[:,-2] = self.A_vol_left
return frame
def step(self, action):
# Execute one time step within the environment
print(action)
self._take_action(action)
self.current_step += 1
reward = 0 # always return zero until the last day
if (self.current_step==self.last_ind_in_day):
if(self.A_vol_left<1):
reward = self.current_day['VWAP'].iloc[self.current_step] - self.A_VWAP
else: reward =-999999
self.done = True
obs = self._next_observation()
return obs, reward, self.done, {}
def reset(self):
# Reset the state of the environment to an initial random day
ind = random.randrange(0,len(self.list_of_df))
self.current_day = self.list_of_df[ind]
# Set the current step to a random point within the data frame
self.current_step = 1
# self.last_ind_in_day = len(self.list_of_df[0]) - 1
self.A_VWAP = 0
self.A_rolling_vol = 0
self.A_rolling_price = 0
self.A_vol_left = 1000
self.reward = 0
self.done = False
self.last_ind_in_day = len(self.list_of_df[ind]) - 1
# To keep track of the AGENTS VWAP:
self.cum_VbyP = 0
self.cum_vol_traded = 0
return self._next_observation()
# ====================== End of MARKET class =======================
When I use a PPO2 agent to step through this environment, it always chooses [nan,nan] as the action.
This is the code used to train the mdoel
train_env = DummyVecEnv([lambda: Market(train_df)])
test_env = DummyVecEnv([lambda: Market(test_df)])
model = PPO2('MlpLstmPolicy', train_env, nminibatches=1, verbose=0)
n_stepss = 2000
for i in range(1000):
model.learn(n_stepss)
I have added a print in the environments take action and step methods to show which actions are being taken, and they are always:
[nan nan]
This is my github with the full ipython notebook that I run from google colab:
https://github.com/maaxnaax/rl_ipython/blob/master/Copy_of_VWAP_Env.ipynb
I had a similar problem as yours, then I just deleted the train_df and test_df parts cause I don't actually need them. Then my problem is fixed. I guess your problem may also have something to do with the dataset.
Besides, there is a blog: https://towardsdatascience.com/creating-a-custom-openai-gym-environment-for-stock-trading-be532be3910e in which the author has a similar problem setup as yours.