Need help improving backtest class and find optimal combinations

Need help improving backtest class and find optimal combinations - python

I'm trying to create a backtest class. The code i have for the class atm is this:
class Backtest:
def __init__(self, df, signals, initial_capital, max_position_size, stop_loss, conditions):
self.df = df
self.signals = signals[signals.index.isin(self.df.index)]
self.initial_capital = initial_capital
self.positions = pd.DataFrame(index=signals.index).fillna(0.0)
self.max_position_size = max_position_size
self.stop_loss = stop_loss
self.conditions = conditions
self.stop_loss_levels = None
self.portfolio_value = None
self.portfolio_returns = None
self.portfolio_risk = None
self.results=None
#this section applies conditions for buying, selling, closes
def generate_trades(self):
self.positions['buy'] = np.where(self.conditions['buy'], 1000 / self.df['close'], 0)
self.positions['sell'] = np.where(self.conditions['sell'], -1000 / self.df['close'], 0)
self.positions = self.positions.where(self.conditions['close'], 0)
"""
conditions = {
'buy': (self.signals['signal_rsi_oversold'] == 1) & (self.signals['signal_stochastic_oversold'] == 1),
'sell': (self.signals['signal_rsi_overbought'] == 1) & (self.signals['signal_stochastic_overbought'] == 1),
'close': self.df['close'] > self.stop_loss_levels
}"""
def calculate_exposure_time(self):
self.positions['open_time'] = np.where(self.positions['buy'] > 0, self.df['date'], np.nan)
self.positions['close_time'] = np.where(self.positions['sell'] > 0, self.df['date'], np.nan)
self.positions['exposure_time'] = self.positions['close_time'] - self.positions['open_time']
def backtest(self):
self.generate_trades()
#self.positions['buy'] = np.where(self.signals[buys].sum(axis=1) > 0, 1000 / self.df['close'], 0)
#self.positions['sell'] = np.where(self.signals[sells].sum(axis=1) > 0, -1000 / self.df['close'], 0)
portfolio = self.positions.multiply(self.df['close'], axis=0)
pos_diff = self.positions.diff()
portfolio['holdings'] = (self.positions.multiply(self.df['close'], axis=0)).sum(axis=1)
portfolio['cash'] = self.initial_capital - (pos_diff.multiply(self.df['close'], axis=0)).sum(axis=1).cumsum()
portfolio['total'] = portfolio['cash'] + portfolio['holdings']
portfolio['returns'] = portfolio['total'].pct_change()
self.results = portfolio
# Calculate the average exposure time
average_exposure_time = self.positions['exposure_time'].mean()
# Calculate the Sharpe ratio
sharpe_ratio = np.sqrt(252) * (self.results['returns'].mean() / self.results['returns'].std())
# Calculate the maximum drawdown
max_dd = (self.results['total'].cummax() - self.results['total']).max()
# Calculate the maximum drawdown duration
max_dd_duration = (self.results['total'].cummax() - self.results['total']).argmax()
# Calculate the profit factor
profit_factor = (self.results[self.results['returns'] > 0]['returns'].sum() / abs(self.results[self.results['returns'] < 0]['returns'].sum()))
# Calculate the average loss and average profit
trades = self.results[self.results['returns'] != 0]['returns']
average_loss = trades[trades < 0].mean()
average_profit = trades[trades > 0].mean()
# Calculate the Calmar ratio
calmar_ratio = self.results['returns'].mean() / max_dd
# Calculate the number of trades taken
num_trades = trades.count()
# Calculate the number of winning trades and losing trades
num_winning_trades = 0
num_losing_trades = 0
for i, row in self.results.iterrows():
if row['returns'] > 0:
num_winning_trades += 1
elif row['returns'] < 0:
num_losing_trades += 1
# Calculate the win/loss ratio
win_loss_ratio = num_winning_trades / num_losing_trades
self.statistics = {
'calmar_ratio': calmar_ratio,
'sharpe_ratio': sharpe_ratio,
'max_drawdown': max_dd,
'max_drawdown_duration': max_dd_duration,
'profit_factor': profit_factor,
'average_loss': average_loss,
'average_profit': average_profit,
'num_trades': num_trades,
'num_winning_trades':num_winning_trades,
'num_losing_trades':num_losing_trades,
'win_loss_ratio':win_loss_ratio,
'exposure_time': average_exposure_time
}
I also have this code that I'm trying to use to analyse results. I'm not sure how to integrate:
def backtest_by_trade(self, buys, sells):
# Initialize a dictionary to store the results for each trade
self.results_by_trade = {}
# Iterate over the combinations of buy and sell values
for buy in buys:
for sell in sells:
# Create a copy of the signals dataframe
signals = self.signals.copy()
# Update the signals dataframe with the buy and sell values
signals['buy'] = np.where(signals['signal_rsi_oversold'] == 1, buy, 0)
signals['sell'] = np.where(signals['signal_rsi_overbought'] == 1, sell, 0)
# Create a new Backtest object using the updated signals dataframe
bt = Backtest(self.df, signals, initial_capital=10000)
# Run the backtest
bt.backtest()
# Store the results in the results_by_trade dictionary
self.results_by_trade[(buy, sell)] = bt.results
# Calculate the profit or loss for each trade
self.results_by_trade['profit_loss'] = self.results_by_trade['total_profit'] - self.results_by_trade['total_loss']
# Calculate the return on investment for each trade
self.results_by_trade['return_on_investment'] = self.results_by_trade['total_profit'] / self.results_by_trade['total_loss']
# Calculate the profit factor for each trade
self.results_by_trade['profit_factor'] = self.results_by_trade['total_profit'] / abs(self.results_by_trade['total_loss'])
# Calculate the percentage of profitable trades for each combination
self.results_by_trade['percent_profitable'] = self.results_by_trade['num_winning_trades'] / self.results_by_trade['num_trades']
# Calculate the average profit per trade for each combination
self.results_by_trade['avg_profit_per_trade'] = self.results_by_trade['total_profit'] / self.results_by_trade['num_winning_trades']
# Calculate the average loss per trade for each combination
self.results_by_trade['avg_loss_per_trade'] = self.results_by_trade['total_loss'] / self.results_by_trade['num_losing_trades']
# Calculate the maximum consecutive winning trades for each combination
self.results_by_trade['max_consecutive_winning_trades'] = self.results_by_trade['consecutive_winning_trades'].max()
# Calculate the maximum consecutive losing trades for each combination
self.results_by_trade['max_consecutive_losing_trades'] = self.results_by_trade['consecutive_losing_trades'].max()
# Calculate the average consecutive winning trades for each combination
self.results_by_trade['avg_consecutive_winning_trades']
In the end, i want to use different combinations of the condictions dictionary and analyse what might be the best combination of signals. How can i do this ?

Related

How do i optimize code when the execution time is too slow?

I have this Python code, but it's already running for 24h and doesn't seem to print the result for now.
I don't know how long it will take.
Can someone help me to optimize this code?
The code is to find the best performance for trading RSI divergence in a certain period.
It first defines some parameters for the RSI.
The code then goes through every possible combination to find the best combination of parameters to have the best performances.
I'm not really an expert.
I don't really know how i can change the code as i'm no expert.
Happy to learn.
Thank you guys.
import pandas as pd
import numpy as np
import ta
def load_data(file_path, start_date, end_date):
"""
Loads data for the specified symbol and date range from a CSV file
"""
df = pd.read_csv(file_path)
if 'Date' not in df.columns:
df['Date'] = pd.to_datetime(df.index)
df['Date'] = pd.to_datetime(df['Date'])
df = df.set_index('Date')
df = df[(df.index >= start_date) & (df.index <= end_date)]
return df
def calc_rsi(df, n):
"""
Calculates the relative strength index (RSI) for the given dataframe and window size
"""
delta = df["Close"].diff()
gain = delta.where(delta > 0, 0)
loss = abs(delta.where(delta < 0, 0))
avg_gain = gain.rolling(window=n).mean()
avg_loss = loss.rolling(window=n).mean()
rs = avg_gain / avg_loss
rsi = 100 - (100 / (1 + rs))
return rsi
def calc_pivot_point(df, pivot_point_type, pivot_point_n):
"""
Calculates the pivot point for the given dataframe and pivot point type
"""
if pivot_point_type == "Close":
pivot_point = df["Close"].rolling(window=pivot_point_n).mean()
elif pivot_point_type == "High/Low":
pivot_point = (df["High"].rolling(window=pivot_point_n).mean() + df["Low"].rolling(window=pivot_point_n).mean()) / 2
else:
raise ValueError("Invalid pivot point type")
return pivot_point
def calc_divergence(df, rsi, pivot_point, divergence_type, max_pivot_point, max_bars_to_check):
"""
Calculates the divergence for the given dataframe and parameters
"""
if divergence_type == "Regular":
pivot_point_delta = pivot_point.diff()
pivot_point_delta_sign = pivot_point_delta.where(pivot_point_delta > 0, -1)
pivot_point_delta_sign[pivot_point_delta_sign > 0] = 1
rsi_delta = rsi.diff()
rsi_delta_sign = rsi_delta.where(rsi_delta > 0, -1)
rsi_delta_sign[rsi_delta_sign > 0] = 1
divergence = pivot_point_delta_sign * rsi_delta_sign
divergence[divergence < 0] = -1
divergence = divergence.rolling(window=max_pivot_point).sum()
divergence = divergence.rolling(window=max_bars_to_check).sum()
divergence = divergence.where(divergence > 0, 0)
divergence[divergence < 0] = -1
else:
raise ValueError("Invalid divergence type")
return divergence
def backtest(df, rsi_period, pivot_point_type, pivot_point_n, divergence_type, max_pivot_point, max_bars_to_check, trailing_stop, starting_capital):
"""
Backtests the strategy for the given dataframe and parameters
"""
rsi = calc_rsi(df, rsi_period)
pivot_point = calc_pivot_point(df, pivot_point_type, pivot_point_n)
divergence = calc_divergence(df, rsi, pivot_point, divergence_type, max_pivot_point, max_bars_to_check)
positions = pd.DataFrame(index=df.index, columns=["Position", "Stop Loss"])
positions["Position"] = 0.0
positions["Stop Loss"] = 0.0
capital = starting_capital
for i, row in enumerate(df.iterrows()):
date = row[0]
close = row[1]["Close"]
rsi_val = rsi.loc[date]
pivot_val = pivot_point.loc[date]
divergence_val = divergence.loc[date]
if divergence_val > 0 and positions.loc[date]["Position"] == 0:
positions.at[date, "Position"] = capital / close
positions.at[date, "Stop Loss"] = close * (1 - trailing_stop)
elif divergence_val < 0 and positions.loc[date]["Position"] > 0:
capital = positions.loc[date]["Position"] * close
positions.at[date, "Position"] = 0.0
positions.at[date, "Stop Loss"] = 0.0
elif close < positions.loc[date]["Stop Loss"] and positions.loc[date]["Position"] > 0:
capital = positions.loc[date]["Position"] * close
positions.at[date, "Position"] = 0.0
positions.at[date, "Stop Loss"] = 0.0
return capital
def find_best_iteration(df, start_rsi_period, end_rsi_period, pivot_point_types, start_pivot_point_n, end_pivot_point_n, divergence_types, start_max_pivot_point, end_max_pivot_point, start_max_bars_to_check, end_max_bars_to_check, start_trailing_stop, end_trailing_stop, starting_capital):
"""
Finds the best iteration for the given parameters
"""
best_result = 0.0
best_params = None
for rsi_period in range(start_rsi_period, end_rsi_period + 1):
for pivot_point_type in pivot_point_types:
for pivot_point_n in range(start_pivot_point_n, end_pivot_point_n + 1):
for divergence_type in divergence_types:
for max_pivot_point in range(start_max_pivot_point, end_max_pivot_point + 1):
for max_bars_to_check in range(start_max_bars_to_check, end_max_bars_to_check + 1):
for trailing_stop in np.arange(start_trailing_stop, end_trailing_stop + 0.01, 0.01):
result = backtest(df, rsi_period, pivot_point_type, pivot_point_n, divergence_type, max_pivot_point, max_bars_to_check, trailing_stop, starting_capital)
if result > best_result:
best_result = result
best_params = (rsi_period, pivot_point_type, pivot_point_n, divergence_type, max_pivot_point, max_bars_to_check, trailing_stop)
return best_result, best_params
# Define the parameters
file_path = 'C:\\Users\\The Death\\Downloads\\Binance_BTCUSDT_spot.csv'
start_date = "2020-03-16"
end_date = "2021-04-12"
df = load_data(file_path, start_date, end_date)
def load_data(start_date, end_date):
# Your code to load the data for the specified date range
# ...
return df
# Define the parameters for the backtesting
start_rsi_period = 1
end_rsi_period = 30
pivot_point_types = ["Close", "High/Low"]
start_pivot_point_n = 1
end_pivot_point_n = 50
divergence_types = ["Regular"]
start_max_pivot_point = 1
end_max_pivot_point = 20
start_max_bars_to_check = 30
end_max_bars_to_check = 200
start_trailing_stop = 0.01
end_trailing_stop = 0.5
starting_capital = 10000
# Run the backtesting
df = load_data(start_date, end_date)
best_result, best_params = find_best_iteration(df, start_rsi_period, end_rsi_period, pivot_point_types, start_pivot_point_n, end_pivot_point_n, divergence_types, start_max_pivot_point, end_max_pivot_point, start_max_bars_to_check, end_max_bars_to_check, start_trailing_stop, end_trailing_stop, starting_capital)
# Print the results
print("Best result: ", best_result)
print("Best parameters: ", best_params)

I have two recommendations after I scroll up your code:
Reduce the usage of for loop. As you increase a layer of for loop (initial is O(n), the time complexity of your code will increase by a power. In your find_best_iteration() there is about 7 layers of for loop, this is extremely cost your time.
Save and process your data in numpy.array() instead of pd.dataframe(). Dataframe is a class that contains too many unused attributes, and its performance is also slower than numpy.array.

You can try the following methods to improve the performance:
The backtest() function is used many times inside the find_best_iteration() function under many for loops, thus the positions variable inside backtest() is being updated frequently which can be show when the positions variable is a Dataframe. You can consider using numpy array for the positions variable that is optimized for updates.
You can try using the multiprocessing module in Python to parallelize the calculation of the divergence variable.
Hope this help!

RSI results different than in TradingView (coded in Python)

It's been days I spent trying to code (and search) a python function to get RSI that match TradingView results but without success (I'm new to Python).
The closest results I get for RSI is this function, but still different (and the fact exponential is used, sometimes result is pretty close, sometimes there is a pretty huge difference):
def rsi_tradingview(ohlc: pd.DataFrame, period: int = 14, round_rsi: bool = True):
delta = ohlc["close"].diff()
up = delta.copy()
up[up < 0] = 0
up = pd.Series.ewm(up, alpha=1/period).mean()
down = delta.copy()
down[down > 0] = 0
down *= -1
down = pd.Series.ewm(down, alpha=1/period).mean()
rsi = np.where(up == 0, 0, np.where(down == 0, 100, 100 - (100 / (1 + up / down))))
return np.round(rsi, 2) if round_rsi else rsi
My code looks like this:
pairs = ["BTCUSDT", "PONDUSDT"]
def get_historical_candles():
record = client.get_historical_klines(pair, Client.KLINE_INTERVAL_5MINUTE, "3 hour ago UTC")
myList = []
try:
for item in record:
n_item = []
int_ts = int(item[0] / 1000)
n_item.append(float(item[4])) # close
myList.append(n_item)
except Exception as error:
debug_logger.debug(error)
new_ohlc = pd.DataFrame(myList, columns=['close'])
return new_ohlc
def rsi_tradingview(ohlc: all_candles, period: int = 14, round_rsi: bool = False):
delta = all_candles.diff()
up = delta.copy()
up[up < 0] = 0
up = pd.Series.ewm(up, alpha=1/period).mean()
down = delta.copy()
down[down > 0] = 0
down *= -1
down = pd.Series.ewm(down, alpha=1/period).mean()
rsi = np.where(up == 0, 0, np.where(down == 0, 100, 100 - (100 / (1 + up / down))))
return np.round(rsi, 2) if round_rsi else rsi
for pair in pairs:
all_candles = get_historical_candles()
test_rsi = rsi_tradingview(all_candles, 14, False)
test_rsi_final = test_rsi[-1]
print(test_rsi_final)
I compare results with tradingview_ta this way, that give correct results (I can't just use this function to get RSI because I need the RSI to calculate StochRSI):
for pair in pairs:
test = TA_Handler(
symbol=pair,
screener="CRYPTO",
exchange="BINANCE",
interval=Interval.INTERVAL_5_MINUTES
)
print(test.get_analysis().indicators["RSI"])
If this can help, here are the codes on TradingView to get RSI and how is calculated RMA
# RSI
study(title="Relative Strength Index", shorttitle="RSI", format=format.price, precision=2, resolution="")
len = input(14, minval=1, title="Length")
src = input(close, "Source", type = input.source)
up = rma(max(change(src), 0), len)
down = rma(-min(change(src), 0), len)
rsi = down == 0 ? 100 : up == 0 ? 0 : 100 - (100 / (1 + up / down))
plot(rsi, "RSI", color=#7E57C2)
band1 = hline(70, "Upper Band", color=#787B86)
bandm = hline(50, "Middle Band", color=color.new(#787B86, 50))
band0 = hline(30, "Lower Band", color=#787B86)
fill(band1, band0, color=color.rgb(126, 87, 194, 90), title="Background")
# RMA
plot(rma(close, 15))
//the same on pine
pine_rma(src, length) =>
alpha = 1/length
sum = 0.0
sum := na(sum[1]) ? sma(src, length) : alpha * src + (1 - alpha) * nz(sum[1])
plot(pine_rma(close, 15))
Please guys, help me to find what is wrong. :(
And Thank you by advance! Thanks for reading!

Converting R to Python for Micro Market Structure Zero Intelligence Algorithm, cant get the generation of market events to work properly

I have the following R code that I wanted to change into Python but have ran into a slip. When running the code it seems when a cancel buy order or sell order is made it uses one of the number of buy orders available or number of sell orders available (nb,ns) so when the code runs it runs out of orders after many generation of events. So it seems the generation function I have tried to implement is not generating enough orders. So the orderbook is decreasing faster then increasing. That is why my code gets a keyerror because it ends up at posns not found in my book for canceling buy orders or sell orders. Below is the R code and then the Python code.
R code.
#Book setup
L <- 30 #Set number of price levels to be included in iterations
# Generate initial book
LL <- 1000 #Total number of levels in buy and sell books
# Initialize book with asymptotic depth of 5 shares
initializeBook5 <- function()
{
Price <<- -LL:LL
# Book shape is set to equal long-term average from simulation
buySize <<- c(rep(5,LL-8),5,4,4,3,3,2,2,1,rep(0,LL+1))
sellSize <<- c(rep(0,LL),0,1,2,2,3,3,4,4,5,rep(5,LL-8))
book <<- data.frame(Price, buySize, sellSize )
if(logging==T){eventLog <<- as.data.frame(matrix(0,nrow=numEvents,ncol=2))
colnames(eventLog)<<-c("Type","Price")
count <<- 0
eventType <<- c("LB","LS","CB","CS","MB","MS")
eventDescr <<- NA}
}
#Various utility functions
bestOffer <- function(){min(book$Price[book$sellSize>0])}
bestBid <- function(){max(book$Price[book$buySize>0])}
spread <- function(){bestOffer()-bestBid()}
mid <- function(){(bestOffer()+bestBid())/2}
#Functions to find mid-market
bidPosn<-function()length(book$buySize[book$Price<=bestBid()])
askPosn<-function()length(book$sellSize[book$Price<=bestOffer()])
midPosn<-function(){floor((bidPosn()+askPosn())/2)}
#Display center of book
go <- function(){book[(midPosn()-20):(midPosn()+20),]}
#Display book shape
bookShape<-function(band){c(book$buySize[midPosn()+(-band:0)],book$sellSize[midPosn()+1:band])}
bookPlot<-function(band){
plot((-band:band),bookShape(band),
col="red",type="l",xlab="Price",ylab="Quantity")
}
#Choose from L whole numbers in (1,...,L) with uniform probability
pick <- function(m){sample(1:m,1)}
# Switch logging on
logging <- T
#Buy limit order
limitBuyOrder <- function(price=NA){
if (is.na(price))
{prx <<- (bestOffer()-pick(L))}
else prx <<-price
if(logging==T){eventLog[count,]<<- c("LB",prx)}
book$buySize[book$Price==prx]<<-book$buySize[book$Price==prx]+1}
#Sell limit order
limitSellOrder <- function(price=NA){
if (is.na(price))
{prx <<- (bestBid()+pick(L))}
else prx <<-price
if(logging==T){eventLog[count,] <<- c("LS",prx)}
book$sellSize[book$Price==prx]<<-book$sellSize[book$Price==prx]+1}
#Cancel buy order
cancelBuyOrder<-function(price=NA){
q<-pick(nb)
tmp <- cumsum(rev(book$buySize)) #Cumulative buy size from 0
posn <- length(tmp[tmp>=q]) #gives position in list where cumulative size >q
prx <<- book$Price[posn]
if (!is.na(price)) {prx <<-price}
if(logging==T){eventLog[count,]<<- c("CB",prx)}
book$buySize[posn]<<-book$buySize[posn]-1}
#Cancel sell order
cancelSellOrder<-function(price=NA){
q<-pick(ns)
tmp <- cumsum(book$sellSize) #Cumulative sell size from 0
posn <- length(tmp[tmp<q])+1
prx <<- book$Price[posn]
if (!is.na(price)) {prx <<-price}
if(logging==T){eventLog[count,]<<- c("CS",prx)}
book$sellSize[posn]<<-book$sellSize[posn]-1}
#Market buy order
marketBuyOrder <- function(){
prx <<- bestOffer()
if(logging==T){eventLog[count,]<<- c("MB",prx)}
book$sellSize[book$Price==prx]<<-book$sellSize[book$Price==prx]-1}
#Market sell order
marketSellOrder <- function(){
prx <<- bestBid()
if(logging==T){eventLog[count,]<<- c("MS",prx)}
book$buySize[book$Price==prx]<<-book$buySize[book$Price==prx]-1}
#Generate an event and update the buy and sell books
#Note that limit orders may be placed inside the spread
generateEvent <- function()
{
nb <<- sum(book$buySize[book$Price>=(bestOffer()-L)]); # Number of cancelable buy orders
ns <<- sum(book$sellSize[book$Price<=(bestBid()+L)]); # Number of cancelable sell orders
eventRate <- nb*delta+ns*delta + mu +2*L*alpha;
probEvent <- c(L*alpha,L*alpha,nb*delta,ns*delta,mu/2,mu/2)/eventRate;
m <- sample(1:6, 1, replace = TRUE, probEvent); #Choose event type
switch(m,
limitBuyOrder(),
limitSellOrder(),
cancelBuyOrder(),
cancelSellOrder(),
marketBuyOrder(),
marketSellOrder()
);
}
logging <- F
lambda <- 1
mus <- c(10,8,10,10)
nus <- c(1/5,1/5,1/6,1/8)
avgBookShapes<-as.data.frame(matrix(0,nrow=41,ncol=4))
for(i in 1:4){
mu<-mus[i]
nu<-nus[i]
initializeBook5()
numEvents <- 100000 # Average over 100,000 events
avgBookShape <- bookShape(20)/numEvents
for(count in 2:numEvents){
generateEvent()
avgBookShape <- avgBookShape+bookShape(20)/numEvents
}
avgBookShapes[,i]<-avgBookShape
}
Python code
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import random
import math
class Zibook():
def __init__(self,ll,l,alpha,mu,delta,num_events,logging=False):
self.ll = ll #total number of levels in buy and sell
self.l = l # set number of price levels to be included in iterations
self.alpha = alpha
self.mu = mu
self.delta = delta
self.num_events = num_events
self.logging = logging
price = np.array(list(range(-self.ll,self.ll +1,1)))
buy_size = np.array([5]*(self.ll-8) + [5,4,4,3,3,2,2,1] + [0]*(self.ll+1))
sell_size = np.array([0]*(self.ll) + [0,1,2,2,3,3,4,4,5] +[5]*(self.ll-8))
book = pd.DataFrame(index=price,columns=['Price','Buy Size','Sell Size','Type'])
book['Price'] = price
book['Buy Size'] = buy_size
book['Sell Size'] = sell_size
book = book.reset_index(drop=True)
self.book = book
event_type = ['LB','LS','CB','CS','MB','MS']
event_descr = np.nan
x = list(range(0,self.num_events,1))
event_log = pd.DataFrame(index=x,columns=['Type','Price'])
self.event_log = event_log
nb = sum(self.book.loc[self.book.Price >= (self.best_offer()-self.l), 'Buy Size']) #number of cancellable buy orders
ns = sum(self.book.loc[self.book.Price <= (self.best_bid()+self.l), 'Sell Size']) #number of cancellable sell order
self.nb = nb
self.ns = ns
def best_offer(self):
df = self.book
a = df.loc[df['Sell Size'] > 0,'Price'].min()
return a
def best_bid(self):
df = self.book
b = df.loc[df['Buy Size']>0 ,'Price'].max()
return b
def spread(self):
spread = (self.best_offer() - self.best_bid())/2
return spread
def mid(self):
mid = (self.best_offer() + self.best_bid())/2
return mid
def bidposn(self):
df = self.book
a = len(df.loc[df.Price <= self.best_bid(),'Buy Size'])-1
return a
def askposn(self):
df = self.book
a = len(df[df['Price'] <= self.best_offer()]['Sell Size']) -1
return a
def midposn(self):
df = self.book
a = ((self.bidposn()+self.askposn())//2)
return a
def centerbook(self):
df = self.book
mid = self.midposn()
return df[mid-20:mid +21]
def bookshape(self,band):
df = self.book
mid = self.midposn()
x = np.arange(-band,0)
y = [df.loc[(mid+el),'Buy Size'] for el in x]
x1 = np.arange(0,band+1 )
z = [df.loc[(mid + el), 'Sell Size'] for el in x1]
seq3 = np.concatenate((y,z),axis=0)
return seq3
def bookplot(self,band):
x = list(range(-band,band+1,1))
seq3 = self.bookshape(band)
plt.plot(x,seq3, color='red')
plt.xlabel('Price')
plt.ylabel('Quantity')
return plt.show()
def pick(self,l):
a= np.random.choice(l,1,replace=True,p=[1/l]*l)
return a[0]
def limitbuyorder(self,price=None):
if price == None :
price = (self.best_offer() - self.pick(self.l))
else:
price = price
df = self.book
if self.logging == True:
count = 0
eventlog = self.event_log
eventlog.loc[count,'Price'] = price
eventlog.loc[count,'Type'] = 'LB'
df.loc[df.Price==price,'Buy Size'] += 1
def limitsellorder(self,price=None):
if price == None :
price = (self.best_bid() + self.pick(self.l))
else:
price = price
df = self.book
if self.logging == True:
count = 0
eventlog = self.event_log
eventlog.loc[count,'Price'] = price
eventlog.loc[count,'Type'] = 'LS'
df.loc[df.Price==price,'Sell Size'] += 1
def cancelbuyorder(self,price=None):
df = self.book
if price == None:
q = self.pick(self.nb)
tmp = np.array(df['Buy Size'].to_list()[::-1]).cumsum()
posn = len(tmp[tmp>=q]) - 1
price = df.Price[posn]
df.loc[posn,'Buy Size'] -= 1
else:
price = price
df.loc[df.Price==price,'Buy Size'] -= 1
if self.logging == True:
count = 0
eventlog = self.event_log
eventlog.loc[count,'Price'] = price
eventlog.loc[count,'Type'] = 'CB'
def cancelsellorder(self,price=None):
#global ns
df = self.book
if price == None:
q = self.pick(self.ns)
tmp = np.array(df['Sell Size'].to_list()).cumsum()
posn = len(tmp[tmp<q])
price = df.Price[posn]
df.loc[posn,'Sell Size'] -= 1
else:
price = price
df.loc[df.Price==price,'Sell Size'] -= 1
if self.logging == True:
count = 0
eventlog = self.event_log
eventlog.loc[count,'Price'] = price
eventlog.loc[count,'Type'] = 'CS'
def marketbuyorder(self,price=None):
df = self.book
price = self.best_offer()
if self.logging == True:
count = 0
eventlog = self.event_log
eventlog.loc[count,'Price'] = price
eventlog.loc[count,'Type'] = 'MB'
df.loc[df.Price==price,'Sell Size'] -= 1
def marketsellorder(self,price=None):
df = self.book
price = self.best_bid()
if self.logging == True:
count = 0
eventlog = self.event_log
eventlog.loc[count,'Price'] = price
eventlog.loc[count,'Type'] = 'MS'
df.loc[df.Price==price,'Buy Size'] -= 1
def generateevent(self):
df = self.book
nb = sum(df.loc[df.Price >= (self.best_offer()-self.l), 'Buy Size']) #number of cancellable buy orders
ns = sum(df.loc[df.Price <= (self.best_bid()+self.l), 'Sell Size']) #number of cancellable sell order
eventRate = nb*self.delta +ns*self.delta + self.mu + 2*self.l*self.alpha
probEvent = (self.l*self.alpha + self.l*self.alpha + nb*self.delta + ns*self.delta + self.mu*0.5 + self.mu*0.5)/eventRate
a = np.random.choice(6,1,replace=True,p=[probEvent/6]*6)
idx = a[0]
z= [self.limitbuyorder(),self.limitsellorder(),self.cancelbuyorder(),self.cancelsellorder(),self.marketbuyorder(),self.marketsellorder()]
return z[idx]
alpha = 1
mu = 10
delta = 1/5
num_events = 100000
'''
a = Zibook(1000,30,alpha,mu,delta,num_events,logging=False)
a.limitbuyorder(price =a.best_bid())
a.limitbuyorder(price =a.best_bid())
a.bookplot(20)
#print(a.generateevent())
#print(a.cancelbuyorder(price=None))
'''
lalpha = [1,1,1,1]
lmu = [10,8,10,10]
ldelta = [1/5,1/5,1/6,1/8]
length = len(lalpha)
Avgbookshapes = np.array([])
num_events = 100000
for i in range(0,length,1):
alpha = lalpha[i]
mu = lmu[i]
delta=ldelta[i]
a = Zibook(1000,30,alpha,mu,delta,num_events,logging=False)
for i1 in range(0,100,1):
a.generateevent()
#print(i1)
avgbookshape = a.bookshape(20)/num_events
#print(avgbookshape.shape)
for i3 in range(2,num_events+1,1):
a.generateevent()
#print(i3)
avgbookshape2 = a.bookshape(20)/num_events
#print(avgbookshape2.shape)
avgbookshape += avgbookshape2
Avgbookshapes = np.append(Avgbookshapes,avgbookshape)
np.save('my_array',Avgbookshapes)
Any help will be greatly appreciated (background on this algo is its a zero intelligence simulation of micromarketstructures https://github.com/zecophy/MTH9879-Market-Microstructure-Models/blob/master/HW1/9879HW1_Chenyu_Zhao_graded.ipynb )

just realized i didnt have the probabilities written out correctly in the generateevent function . But the code still decays to 0 orders below is the modified code in python anyone know why the orders decay instead of grow like in the R code? :
def generateevent(self):
df = self.book
nb = sum(df.loc[df.Price >= (self.best_offer()-self.l), 'Buy Size'])
ns = sum(df.loc[df.Price <= (self.best_bid()+self.l), 'Sell Size']) #number of cancellable sell order
eventRate = nb*self.delta +ns*self.delta + self.mu + 2*self.l*self.alpha
probEvent = np.array([self.l*self.alpha, self.l*self.alpha , nb*self.delta, ns*self.delta,self.mu*0.5,self.mu*0.5])/eventRate
#print(probEvent)
a = np.random.choice(6,1,replace=True,p=probEvent)
idx = a[0]
z= [self.limitbuyorder(),self.limitsellorder(),self.cancelbuyorder(),self.cancelsellorder(),self.marketbuyorder(),self.marketsellorder()]
return z[idx]

How to find Average directional movement for stocks using Pandas?

I have a dataframe of OHLCV data. I would like to know if anyone knows any tutorial or any way of finding ADX(Average directional movement ) using pandas?
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import datetime as dt
import numpy as nm
start=dt.datetime.today()-dt.timedelta(59)
end=dt.datetime.today()
df=pd.DataFrame(yf.download("MSFT", start=start, end=end))
The average directional index, or ADX, is the primary technical indicator among the five indicators that make up a technical trading system developed by J. Welles Wilder, Jr. and is calculated using the other indicators that make up the trading system. The ADX is primarily used as an indicator of momentum, or trend strength, but the total ADX system is also used as a directional indicator.
Directional movement is calculated by comparing the difference between two consecutive lows with the difference between their respective highs.
For the excel calculation of ADX this is a really good video:
https://www.youtube.com/watch?v=LKDJQLrXedg&t=387s

I was playing with this a little bit and found something that can help you with the issue:
def ADX(data: pd.DataFrame, period: int):
"""
Computes the ADX indicator.
"""
df = data.copy()
alpha = 1/period
# TR
df['H-L'] = df['High'] - df['Low']
df['H-C'] = np.abs(df['High'] - df['Close'].shift(1))
df['L-C'] = np.abs(df['Low'] - df['Close'].shift(1))
df['TR'] = df[['H-L', 'H-C', 'L-C']].max(axis=1)
del df['H-L'], df['H-C'], df['L-C']
# ATR
df['ATR'] = df['TR'].ewm(alpha=alpha, adjust=False).mean()
# +-DX
df['H-pH'] = df['High'] - df['High'].shift(1)
df['pL-L'] = df['Low'].shift(1) - df['Low']
df['+DX'] = np.where(
(df['H-pH'] > df['pL-L']) & (df['H-pH']>0),
df['H-pH'],
0.0
)
df['-DX'] = np.where(
(df['H-pH'] < df['pL-L']) & (df['pL-L']>0),
df['pL-L'],
0.0
)
del df['H-pH'], df['pL-L']
# +- DMI
df['S+DM'] = df['+DX'].ewm(alpha=alpha, adjust=False).mean()
df['S-DM'] = df['-DX'].ewm(alpha=alpha, adjust=False).mean()
df['+DMI'] = (df['S+DM']/df['ATR'])*100
df['-DMI'] = (df['S-DM']/df['ATR'])*100
del df['S+DM'], df['S-DM']
# ADX
df['DX'] = (np.abs(df['+DMI'] - df['-DMI'])/(df['+DMI'] + df['-DMI']))*100
df['ADX'] = df['DX'].ewm(alpha=alpha, adjust=False).mean()
del df['DX'], df['ATR'], df['TR'], df['-DX'], df['+DX'], df['+DMI'], df['-DMI']
return df
At the beginning the values aren't correct (as always with the EWM approach) but after several computations it converges to the correct value.

Math was taken from here.
def ADX(df):
def getCDM(df):
dmpos = df["High"][-1] - df["High"][-2]
dmneg = df["Low"][-2] - df["Low"][-1]
if dmpos > dmneg:
return dmpos
else:
return dmneg
def getDMnTR(df):
DMpos = []
DMneg = []
TRarr = []
n = round(len(df)/14)
idx = n
while n <= (len(df)):
dmpos = df["High"][n-1] - df["High"][n-2]
dmneg = df["Low"][n-2] - df["Low"][n-1]
DMpos.append(dmpos)
DMneg.append(dmneg)
a1 = df["High"][n-1] - df["High"][n-2]
a2 = df["High"][n-1] - df["Close"][n-2]
a3 = df["Low"][n-1] - df["Close"][n-2]
TRarr.append(max(a1,a2,a3))
n = idx + n
return DMpos, DMneg, TRarr
def getDI(df):
DMpos, DMneg, TR = getDMnTR(df)
CDM = getCDM(df)
POSsmooth = (sum(DMpos) - sum(DMpos)/len(DMpos) + CDM)
NEGsmooth = (sum(DMneg) - sum(DMneg)/len(DMneg) + CDM)
DIpos = (POSsmooth / (sum(TR)/len(TR))) *100
DIneg = (NEGsmooth / (sum(TR)/len(TR))) *100
return DIpos, DIneg
def getADX(df):
DIpos, DIneg = getDI(df)
dx = (abs(DIpos- DIneg) / abs(DIpos + DIneg)) * 100
ADX = dx/14
return ADX
return(getADX(df))
print(ADX(df))

This gives you the exact numbers as Tradingview and Thinkorswim.
import numpy as np
def ema(arr, periods=14, weight=1, init=None):
leading_na = np.where(~np.isnan(arr))[0][0]
arr = arr[leading_na:]
alpha = weight / (periods + (weight-1))
alpha_rev = 1 - alpha
n = arr.shape[0]
pows = alpha_rev**(np.arange(n+1))
out1 = np.array([])
if 0 in pows:
out1 = ema(arr[:int(len(arr)/2)], periods)
arr = arr[int(len(arr)/2) - 1:]
init = out1[-1]
n = arr.shape[0]
pows = alpha_rev**(np.arange(n+1))
scale_arr = 1/pows[:-1]
if init:
offset = init * pows[1:]
else:
offset = arr[0]*pows[1:]
pw0 = alpha*alpha_rev**(n-1)
mult = arr*pw0*scale_arr
cumsums = mult.cumsum()
out = offset + cumsums*scale_arr[::-1]
out = out[1:] if len(out1) > 0 else out
out = np.concatenate([out1, out])
out[:periods] = np.nan
out = np.concatenate(([np.nan]*leading_na, out))
return out
def atr(highs, lows, closes, periods=14, ema_weight=1):
hi = np.array(highs)
lo = np.array(lows)
c = np.array(closes)
tr = np.vstack([np.abs(hi[1:]-c[:-1]),
np.abs(lo[1:]-c[:-1]),
(hi-lo)[1:]]).max(axis=0)
atr = ema(tr, periods=periods, weight=ema_weight)
atr = np.concatenate([[np.nan], atr])
return atr
def adx(highs, lows, closes, periods=14):
highs = np.array(highs)
lows = np.array(lows)
closes = np.array(closes)
up = highs[1:] - highs[:-1]
down = lows[:-1] - lows[1:]
up_idx = up > down
down_idx = down > up
updm = np.zeros(len(up))
updm[up_idx] = up[up_idx]
updm[updm < 0] = 0
downdm = np.zeros(len(down))
downdm[down_idx] = down[down_idx]
downdm[downdm < 0] = 0
_atr = atr(highs, lows, closes, periods)[1:]
updi = 100 * ema(updm, periods) / _atr
downdi = 100 * ema(downdm, periods) / _atr
zeros = (updi + downdi == 0)
downdi[zeros] = .0000001
adx = 100 * np.abs(updi - downdi) / (updi + downdi)
adx = ema(np.concatenate([[np.nan], adx]), periods)
return adx

Openai's PPO2 model is returning NANs when stepping through my custom environment (Python)

I have made the following custom environment:
class Market(gym.Env):
"""This env is for training a BUYING vwap beating algo, with
OpenAI gym reinforcemnt learning algorithms"""
metadata = {'render.modes': ['human']}
def __init__(self, list_of_df):
super(Market, self).__init__()
self.list_of_df = list_of_df
self.current_day = list_of_df[0]
self.reward_range = (-2147483647, 2147483647)
# self.A_Vol = 0
self.current_step = 0
self.last_ind_in_day = len(list_of_df[0]) - 1
# self.trade_size = 10
self.A_VWAP = 0
self.A_rolling_vol = 0
self.A_rolling_price = 0
self.A_vol_left = 1000
self.reward = 0
self.done = False
# To keep track of the AGENTS VWAP:
self.cum_VbyP = 0
self.cum_vol_traded = 0
self.purchase_vol = 80
self.action_space = spaces.Box(low=np.array([0, 0]), high=np.array([3, 1]), dtype=np.float16)
# Prices contains the OHLC for the 5 min interval
# Miliseconds from midnight
# Rolling VWAP for this time period
# The agents Rolling VWAP, A_VWAP
# The Vol of securities left to still buy, A_trgt_vol
# The Vol traded this time step in the market
self.observation_space = spaces.Box(
low=-2147483647, high=2147483647, shape=(1, len(list_of_df[1].iloc[2])), dtype=np.float16)
def _take_action(self, a):
# Only buy if there are still shares to be bought today,
if (self.A_vol_left > 0):
# Purchase a * volume of a trade
vol = self.purchase_vol * a[0]
print(vol)
# But if there arent enough shares still to buy
if (vol > self.A_vol_left):
vol = self.A_vol_left
self.A_vol_left = self.A_vol_left - vol
# Increase the volume of shares traded:
self.cum_vol_traded = self.cum_vol_traded + vol
if (vol > 0):
# Sample a random price between high and low for this interval:
price = round( random.uniform(self.current_day['Low'].iloc[self.current_step],
self.current_day['High'].iloc[self.current_step]))
# Update cumulative price multiplied by volume:
self.cum_VbyP = self.cum_VbyP + (vol * price)
# Update the Agents VWAP, A_VWAP
self.A_VWAP = self.cum_VbyP / self.cum_vol_traded
def _next_observation(self):
frame = np.array([ self.current_day.iloc[self.current_step]])
frame[:,-1] = self.A_VWAP
frame[:,-2] = self.A_vol_left
return frame
def step(self, action):
# Execute one time step within the environment
print(action)
self._take_action(action)
self.current_step += 1
reward = 0 # always return zero until the last day
if (self.current_step==self.last_ind_in_day):
if(self.A_vol_left<1):
reward = self.current_day['VWAP'].iloc[self.current_step] - self.A_VWAP
else: reward =-999999
self.done = True
obs = self._next_observation()
return obs, reward, self.done, {}
def reset(self):
# Reset the state of the environment to an initial random day
ind = random.randrange(0,len(self.list_of_df))
self.current_day = self.list_of_df[ind]
# Set the current step to a random point within the data frame
self.current_step = 1
# self.last_ind_in_day = len(self.list_of_df[0]) - 1
self.A_VWAP = 0
self.A_rolling_vol = 0
self.A_rolling_price = 0
self.A_vol_left = 1000
self.reward = 0
self.done = False
self.last_ind_in_day = len(self.list_of_df[ind]) - 1
# To keep track of the AGENTS VWAP:
self.cum_VbyP = 0
self.cum_vol_traded = 0
return self._next_observation()
# ====================== End of MARKET class =======================
When I use a PPO2 agent to step through this environment, it always chooses [nan,nan] as the action.
This is the code used to train the mdoel
train_env = DummyVecEnv([lambda: Market(train_df)])
test_env = DummyVecEnv([lambda: Market(test_df)])
model = PPO2('MlpLstmPolicy', train_env, nminibatches=1, verbose=0)
n_stepss = 2000
for i in range(1000):
model.learn(n_stepss)
I have added a print in the environments take action and step methods to show which actions are being taken, and they are always:
[nan nan]
This is my github with the full ipython notebook that I run from google colab:
https://github.com/maaxnaax/rl_ipython/blob/master/Copy_of_VWAP_Env.ipynb

I had a similar problem as yours, then I just deleted the train_df and test_df parts cause I don't actually need them. Then my problem is fixed. I guess your problem may also have something to do with the dataset.
Besides, there is a blog: https://towardsdatascience.com/creating-a-custom-openai-gym-environment-for-stock-trading-be532be3910e in which the author has a similar problem setup as yours.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Need help improving backtest class and find optimal combinations - python

Related

How do i optimize code when the execution time is too slow?

RSI results different than in TradingView (coded in Python)

Converting R to Python for Micro Market Structure Zero Intelligence Algorithm, cant get the generation of market events to work properly

How to find Average directional movement for stocks using Pandas?

Openai's PPO2 model is returning NANs when stepping through my custom environment (Python)

Categories

Resources