RSI results different than in TradingView (coded in Python) - python

It's been days I spent trying to code (and search) a python function to get RSI that match TradingView results but without success (I'm new to Python).
The closest results I get for RSI is this function, but still different (and the fact exponential is used, sometimes result is pretty close, sometimes there is a pretty huge difference):
def rsi_tradingview(ohlc: pd.DataFrame, period: int = 14, round_rsi: bool = True):
delta = ohlc["close"].diff()
up = delta.copy()
up[up < 0] = 0
up = pd.Series.ewm(up, alpha=1/period).mean()
down = delta.copy()
down[down > 0] = 0
down *= -1
down = pd.Series.ewm(down, alpha=1/period).mean()
rsi = np.where(up == 0, 0, np.where(down == 0, 100, 100 - (100 / (1 + up / down))))
return np.round(rsi, 2) if round_rsi else rsi
My code looks like this:
pairs = ["BTCUSDT", "PONDUSDT"]
def get_historical_candles():
record = client.get_historical_klines(pair, Client.KLINE_INTERVAL_5MINUTE, "3 hour ago UTC")
myList = []
try:
for item in record:
n_item = []
int_ts = int(item[0] / 1000)
n_item.append(float(item[4])) # close
myList.append(n_item)
except Exception as error:
debug_logger.debug(error)
new_ohlc = pd.DataFrame(myList, columns=['close'])
return new_ohlc
def rsi_tradingview(ohlc: all_candles, period: int = 14, round_rsi: bool = False):
delta = all_candles.diff()
up = delta.copy()
up[up < 0] = 0
up = pd.Series.ewm(up, alpha=1/period).mean()
down = delta.copy()
down[down > 0] = 0
down *= -1
down = pd.Series.ewm(down, alpha=1/period).mean()
rsi = np.where(up == 0, 0, np.where(down == 0, 100, 100 - (100 / (1 + up / down))))
return np.round(rsi, 2) if round_rsi else rsi
for pair in pairs:
all_candles = get_historical_candles()
test_rsi = rsi_tradingview(all_candles, 14, False)
test_rsi_final = test_rsi[-1]
print(test_rsi_final)
I compare results with tradingview_ta this way, that give correct results (I can't just use this function to get RSI because I need the RSI to calculate StochRSI):
for pair in pairs:
test = TA_Handler(
symbol=pair,
screener="CRYPTO",
exchange="BINANCE",
interval=Interval.INTERVAL_5_MINUTES
)
print(test.get_analysis().indicators["RSI"])
If this can help, here are the codes on TradingView to get RSI and how is calculated RMA
# RSI
study(title="Relative Strength Index", shorttitle="RSI", format=format.price, precision=2, resolution="")
len = input(14, minval=1, title="Length")
src = input(close, "Source", type = input.source)
up = rma(max(change(src), 0), len)
down = rma(-min(change(src), 0), len)
rsi = down == 0 ? 100 : up == 0 ? 0 : 100 - (100 / (1 + up / down))
plot(rsi, "RSI", color=#7E57C2)
band1 = hline(70, "Upper Band", color=#787B86)
bandm = hline(50, "Middle Band", color=color.new(#787B86, 50))
band0 = hline(30, "Lower Band", color=#787B86)
fill(band1, band0, color=color.rgb(126, 87, 194, 90), title="Background")
# RMA
plot(rma(close, 15))
//the same on pine
pine_rma(src, length) =>
alpha = 1/length
sum = 0.0
sum := na(sum[1]) ? sma(src, length) : alpha * src + (1 - alpha) * nz(sum[1])
plot(pine_rma(close, 15))
Please guys, help me to find what is wrong. :(
And Thank you by advance! Thanks for reading!

Related

How do i optimize code when the execution time is too slow?

I have this Python code, but it's already running for 24h and doesn't seem to print the result for now.
I don't know how long it will take.
Can someone help me to optimize this code?
The code is to find the best performance for trading RSI divergence in a certain period.
It first defines some parameters for the RSI.
The code then goes through every possible combination to find the best combination of parameters to have the best performances.
I'm not really an expert.
I don't really know how i can change the code as i'm no expert.
Happy to learn.
Thank you guys.
import pandas as pd
import numpy as np
import ta
def load_data(file_path, start_date, end_date):
"""
Loads data for the specified symbol and date range from a CSV file
"""
df = pd.read_csv(file_path)
if 'Date' not in df.columns:
df['Date'] = pd.to_datetime(df.index)
df['Date'] = pd.to_datetime(df['Date'])
df = df.set_index('Date')
df = df[(df.index >= start_date) & (df.index <= end_date)]
return df
def calc_rsi(df, n):
"""
Calculates the relative strength index (RSI) for the given dataframe and window size
"""
delta = df["Close"].diff()
gain = delta.where(delta > 0, 0)
loss = abs(delta.where(delta < 0, 0))
avg_gain = gain.rolling(window=n).mean()
avg_loss = loss.rolling(window=n).mean()
rs = avg_gain / avg_loss
rsi = 100 - (100 / (1 + rs))
return rsi
def calc_pivot_point(df, pivot_point_type, pivot_point_n):
"""
Calculates the pivot point for the given dataframe and pivot point type
"""
if pivot_point_type == "Close":
pivot_point = df["Close"].rolling(window=pivot_point_n).mean()
elif pivot_point_type == "High/Low":
pivot_point = (df["High"].rolling(window=pivot_point_n).mean() + df["Low"].rolling(window=pivot_point_n).mean()) / 2
else:
raise ValueError("Invalid pivot point type")
return pivot_point
def calc_divergence(df, rsi, pivot_point, divergence_type, max_pivot_point, max_bars_to_check):
"""
Calculates the divergence for the given dataframe and parameters
"""
if divergence_type == "Regular":
pivot_point_delta = pivot_point.diff()
pivot_point_delta_sign = pivot_point_delta.where(pivot_point_delta > 0, -1)
pivot_point_delta_sign[pivot_point_delta_sign > 0] = 1
rsi_delta = rsi.diff()
rsi_delta_sign = rsi_delta.where(rsi_delta > 0, -1)
rsi_delta_sign[rsi_delta_sign > 0] = 1
divergence = pivot_point_delta_sign * rsi_delta_sign
divergence[divergence < 0] = -1
divergence = divergence.rolling(window=max_pivot_point).sum()
divergence = divergence.rolling(window=max_bars_to_check).sum()
divergence = divergence.where(divergence > 0, 0)
divergence[divergence < 0] = -1
else:
raise ValueError("Invalid divergence type")
return divergence
def backtest(df, rsi_period, pivot_point_type, pivot_point_n, divergence_type, max_pivot_point, max_bars_to_check, trailing_stop, starting_capital):
"""
Backtests the strategy for the given dataframe and parameters
"""
rsi = calc_rsi(df, rsi_period)
pivot_point = calc_pivot_point(df, pivot_point_type, pivot_point_n)
divergence = calc_divergence(df, rsi, pivot_point, divergence_type, max_pivot_point, max_bars_to_check)
positions = pd.DataFrame(index=df.index, columns=["Position", "Stop Loss"])
positions["Position"] = 0.0
positions["Stop Loss"] = 0.0
capital = starting_capital
for i, row in enumerate(df.iterrows()):
date = row[0]
close = row[1]["Close"]
rsi_val = rsi.loc[date]
pivot_val = pivot_point.loc[date]
divergence_val = divergence.loc[date]
if divergence_val > 0 and positions.loc[date]["Position"] == 0:
positions.at[date, "Position"] = capital / close
positions.at[date, "Stop Loss"] = close * (1 - trailing_stop)
elif divergence_val < 0 and positions.loc[date]["Position"] > 0:
capital = positions.loc[date]["Position"] * close
positions.at[date, "Position"] = 0.0
positions.at[date, "Stop Loss"] = 0.0
elif close < positions.loc[date]["Stop Loss"] and positions.loc[date]["Position"] > 0:
capital = positions.loc[date]["Position"] * close
positions.at[date, "Position"] = 0.0
positions.at[date, "Stop Loss"] = 0.0
return capital
def find_best_iteration(df, start_rsi_period, end_rsi_period, pivot_point_types, start_pivot_point_n, end_pivot_point_n, divergence_types, start_max_pivot_point, end_max_pivot_point, start_max_bars_to_check, end_max_bars_to_check, start_trailing_stop, end_trailing_stop, starting_capital):
"""
Finds the best iteration for the given parameters
"""
best_result = 0.0
best_params = None
for rsi_period in range(start_rsi_period, end_rsi_period + 1):
for pivot_point_type in pivot_point_types:
for pivot_point_n in range(start_pivot_point_n, end_pivot_point_n + 1):
for divergence_type in divergence_types:
for max_pivot_point in range(start_max_pivot_point, end_max_pivot_point + 1):
for max_bars_to_check in range(start_max_bars_to_check, end_max_bars_to_check + 1):
for trailing_stop in np.arange(start_trailing_stop, end_trailing_stop + 0.01, 0.01):
result = backtest(df, rsi_period, pivot_point_type, pivot_point_n, divergence_type, max_pivot_point, max_bars_to_check, trailing_stop, starting_capital)
if result > best_result:
best_result = result
best_params = (rsi_period, pivot_point_type, pivot_point_n, divergence_type, max_pivot_point, max_bars_to_check, trailing_stop)
return best_result, best_params
# Define the parameters
file_path = 'C:\\Users\\The Death\\Downloads\\Binance_BTCUSDT_spot.csv'
start_date = "2020-03-16"
end_date = "2021-04-12"
df = load_data(file_path, start_date, end_date)
def load_data(start_date, end_date):
# Your code to load the data for the specified date range
# ...
return df
# Define the parameters for the backtesting
start_rsi_period = 1
end_rsi_period = 30
pivot_point_types = ["Close", "High/Low"]
start_pivot_point_n = 1
end_pivot_point_n = 50
divergence_types = ["Regular"]
start_max_pivot_point = 1
end_max_pivot_point = 20
start_max_bars_to_check = 30
end_max_bars_to_check = 200
start_trailing_stop = 0.01
end_trailing_stop = 0.5
starting_capital = 10000
# Run the backtesting
df = load_data(start_date, end_date)
best_result, best_params = find_best_iteration(df, start_rsi_period, end_rsi_period, pivot_point_types, start_pivot_point_n, end_pivot_point_n, divergence_types, start_max_pivot_point, end_max_pivot_point, start_max_bars_to_check, end_max_bars_to_check, start_trailing_stop, end_trailing_stop, starting_capital)
# Print the results
print("Best result: ", best_result)
print("Best parameters: ", best_params)
I have two recommendations after I scroll up your code:
Reduce the usage of for loop. As you increase a layer of for loop (initial is O(n), the time complexity of your code will increase by a power. In your find_best_iteration() there is about 7 layers of for loop, this is extremely cost your time.
Save and process your data in numpy.array() instead of pd.dataframe(). Dataframe is a class that contains too many unused attributes, and its performance is also slower than numpy.array.
You can try the following methods to improve the performance:
The backtest() function is used many times inside the find_best_iteration() function under many for loops, thus the positions variable inside backtest() is being updated frequently which can be show when the positions variable is a Dataframe. You can consider using numpy array for the positions variable that is optimized for updates.
You can try using the multiprocessing module in Python to parallelize the calculation of the divergence variable.
Hope this help!

Need help improving backtest class and find optimal combinations

I'm trying to create a backtest class. The code i have for the class atm is this:
class Backtest:
def __init__(self, df, signals, initial_capital, max_position_size, stop_loss, conditions):
self.df = df
self.signals = signals[signals.index.isin(self.df.index)]
self.initial_capital = initial_capital
self.positions = pd.DataFrame(index=signals.index).fillna(0.0)
self.max_position_size = max_position_size
self.stop_loss = stop_loss
self.conditions = conditions
self.stop_loss_levels = None
self.portfolio_value = None
self.portfolio_returns = None
self.portfolio_risk = None
self.results=None
#this section applies conditions for buying, selling, closes
def generate_trades(self):
self.positions['buy'] = np.where(self.conditions['buy'], 1000 / self.df['close'], 0)
self.positions['sell'] = np.where(self.conditions['sell'], -1000 / self.df['close'], 0)
self.positions = self.positions.where(self.conditions['close'], 0)
"""
conditions = {
'buy': (self.signals['signal_rsi_oversold'] == 1) & (self.signals['signal_stochastic_oversold'] == 1),
'sell': (self.signals['signal_rsi_overbought'] == 1) & (self.signals['signal_stochastic_overbought'] == 1),
'close': self.df['close'] > self.stop_loss_levels
}"""
def calculate_exposure_time(self):
self.positions['open_time'] = np.where(self.positions['buy'] > 0, self.df['date'], np.nan)
self.positions['close_time'] = np.where(self.positions['sell'] > 0, self.df['date'], np.nan)
self.positions['exposure_time'] = self.positions['close_time'] - self.positions['open_time']
def backtest(self):
self.generate_trades()
#self.positions['buy'] = np.where(self.signals[buys].sum(axis=1) > 0, 1000 / self.df['close'], 0)
#self.positions['sell'] = np.where(self.signals[sells].sum(axis=1) > 0, -1000 / self.df['close'], 0)
portfolio = self.positions.multiply(self.df['close'], axis=0)
pos_diff = self.positions.diff()
portfolio['holdings'] = (self.positions.multiply(self.df['close'], axis=0)).sum(axis=1)
portfolio['cash'] = self.initial_capital - (pos_diff.multiply(self.df['close'], axis=0)).sum(axis=1).cumsum()
portfolio['total'] = portfolio['cash'] + portfolio['holdings']
portfolio['returns'] = portfolio['total'].pct_change()
self.results = portfolio
# Calculate the average exposure time
average_exposure_time = self.positions['exposure_time'].mean()
# Calculate the Sharpe ratio
sharpe_ratio = np.sqrt(252) * (self.results['returns'].mean() / self.results['returns'].std())
# Calculate the maximum drawdown
max_dd = (self.results['total'].cummax() - self.results['total']).max()
# Calculate the maximum drawdown duration
max_dd_duration = (self.results['total'].cummax() - self.results['total']).argmax()
# Calculate the profit factor
profit_factor = (self.results[self.results['returns'] > 0]['returns'].sum() / abs(self.results[self.results['returns'] < 0]['returns'].sum()))
# Calculate the average loss and average profit
trades = self.results[self.results['returns'] != 0]['returns']
average_loss = trades[trades < 0].mean()
average_profit = trades[trades > 0].mean()
# Calculate the Calmar ratio
calmar_ratio = self.results['returns'].mean() / max_dd
# Calculate the number of trades taken
num_trades = trades.count()
# Calculate the number of winning trades and losing trades
num_winning_trades = 0
num_losing_trades = 0
for i, row in self.results.iterrows():
if row['returns'] > 0:
num_winning_trades += 1
elif row['returns'] < 0:
num_losing_trades += 1
# Calculate the win/loss ratio
win_loss_ratio = num_winning_trades / num_losing_trades
self.statistics = {
'calmar_ratio': calmar_ratio,
'sharpe_ratio': sharpe_ratio,
'max_drawdown': max_dd,
'max_drawdown_duration': max_dd_duration,
'profit_factor': profit_factor,
'average_loss': average_loss,
'average_profit': average_profit,
'num_trades': num_trades,
'num_winning_trades':num_winning_trades,
'num_losing_trades':num_losing_trades,
'win_loss_ratio':win_loss_ratio,
'exposure_time': average_exposure_time
}
I also have this code that I'm trying to use to analyse results. I'm not sure how to integrate:
def backtest_by_trade(self, buys, sells):
# Initialize a dictionary to store the results for each trade
self.results_by_trade = {}
# Iterate over the combinations of buy and sell values
for buy in buys:
for sell in sells:
# Create a copy of the signals dataframe
signals = self.signals.copy()
# Update the signals dataframe with the buy and sell values
signals['buy'] = np.where(signals['signal_rsi_oversold'] == 1, buy, 0)
signals['sell'] = np.where(signals['signal_rsi_overbought'] == 1, sell, 0)
# Create a new Backtest object using the updated signals dataframe
bt = Backtest(self.df, signals, initial_capital=10000)
# Run the backtest
bt.backtest()
# Store the results in the results_by_trade dictionary
self.results_by_trade[(buy, sell)] = bt.results
# Calculate the profit or loss for each trade
self.results_by_trade['profit_loss'] = self.results_by_trade['total_profit'] - self.results_by_trade['total_loss']
# Calculate the return on investment for each trade
self.results_by_trade['return_on_investment'] = self.results_by_trade['total_profit'] / self.results_by_trade['total_loss']
# Calculate the profit factor for each trade
self.results_by_trade['profit_factor'] = self.results_by_trade['total_profit'] / abs(self.results_by_trade['total_loss'])
# Calculate the percentage of profitable trades for each combination
self.results_by_trade['percent_profitable'] = self.results_by_trade['num_winning_trades'] / self.results_by_trade['num_trades']
# Calculate the average profit per trade for each combination
self.results_by_trade['avg_profit_per_trade'] = self.results_by_trade['total_profit'] / self.results_by_trade['num_winning_trades']
# Calculate the average loss per trade for each combination
self.results_by_trade['avg_loss_per_trade'] = self.results_by_trade['total_loss'] / self.results_by_trade['num_losing_trades']
# Calculate the maximum consecutive winning trades for each combination
self.results_by_trade['max_consecutive_winning_trades'] = self.results_by_trade['consecutive_winning_trades'].max()
# Calculate the maximum consecutive losing trades for each combination
self.results_by_trade['max_consecutive_losing_trades'] = self.results_by_trade['consecutive_losing_trades'].max()
# Calculate the average consecutive winning trades for each combination
self.results_by_trade['avg_consecutive_winning_trades']
In the end, i want to use different combinations of the condictions dictionary and analyse what might be the best combination of signals. How can i do this ?

For loop cumulating and selecting data per region and year

I have data, from 16 different regions. I would like to find the day that I see 5th and 95th % of the area being green (NDVI).
So far, I have done it manually, but I would like to do it in a for loop for every region, every year.
First, I extract the different regions and years.
Second, I find the points being at the 5% and 95% greening.
Third, I extract the minimum day found at 5 % greening and at 95 % greening.
Fourth, I collect all of these into one dataframe per ecoregion, containing all the years and the difference between day at 95 % greening and data at 5 % greening.
This is done for each region (16) and each year (19). So it is a lot of manual labor and heavy for the computer with a long script, see part of it below:
x = pd.read_csv('D:/data.csv')
x = x[x['means'] > 0]
x = x[x['diff'] > -1.5]
x = x[x['area'] > 9318]
x = x.sort_values(by = 'doy')
AKB = x[x['name'] == 'Region1'].drop_duplicates(subset=['ID', 'Year'], keep = 'first')
AKB['cummulative_area'] = AKB.groupby(['Year'])['area'].cumsum()
AKT = x[x['name'] == 'Region2'].drop_duplicates(subset=['ID', 'Year'], keep = 'first')
AKT['cummulative_area'] = AKT.groupby(['Year'])['area'].cumsum()
#Find 5% and 95 % of the burned area, the respective days and subtract them to see development in fire season
AKB01 = AKB[AKB['Year'] == 2001]
AKB01fifth = AKB01[AKB01['cumulative_area'] > AKB01['area'].sum() * 0.05]
AKB01ninefifth = AKB01[AKB01['cumulative_area'] > AKB01['area'].sum() * 0.95]
AKB01 = AKB01ninefifth.doy.min() - AKB01fifth.doy.min()
AKB02 = AKB[AKB['Year'] == 2002]
AKB02fifth = AKB02[AKB02['cumulative_area'] > AKB02['area'].sum() * 0.05]
AKB02ninefifth = AKB02[AKB02['cumulative_area'] > AKB02['area'].sum() * 0.95]
AKB02 = AKB02ninefifth.doy.min() - AKB02fifth.doy.min()
AKB03 = AKB[AKB['Year'] == 2003]
AKB03fifth = AKB03[AKB03['cumulative_area'] > AKB03['area'].sum() * 0.05]
AKB03ninefifth = AKB03[AKB03['cumulative_area'] > AKB03['area'].sum() * 0.95]
AKB03 = AKB03ninefifth.doy.min() - AKB03fifth.doy.min()
AKB04 = AKB[AKB['Year'] == 2004]
AKB04fifth = AKB04[AKB04['cumulative_area'] > AKB04['area'].sum() * 0.05]
AKB04ninefifth = AKB04[AKB04['cumulative_area'] > AKB04['area'].sum() * 0.95]
AKB04 = AKB04ninefifth.doy.min() - AKB04fifth.doy.min()
...
AKB18 = AKB[AKB['Year'] == 2018]
AKB18fifth = AKB18[AKB18['cumulative_area'] > AKB18['area'].sum() * 0.05]
AKB18ninefifth = AKB18[AKB18['cumulative_area'] > AKB18['area'].sum() * 0.95]
AKB18 = AKB18ninefifth.doy.min() - AKB18fifth.doy.min()
AKB19 = AKB[AKB['Year'] == 2019]
AKB19fifth = AKB19[AKB19['cumulative_area'] > AKB19['area'].sum() * 0.05]
AKB19ninefifth = AKB19[AKB19['cumulative_area'] > AKB19['area'].sum() * 0.95]
AKB19 = AKB19ninefifth.doy.min() - AKB19fifth.doy.min()
AKT01 = AKB[AKB['Year'] == 2001]
AKT01fifth = AKB01[AKB01['cumulative_area'] > AKB01['area'].sum() * 0.05]
AKT01ninefifth = AKB01[AKB01['cumulative_area'] > AKB01['area'].sum() * 0.95]
AKT01 = AKT01ninefifth.doy.min() - AKT01fifth.doy.min()
AKT02 = AKT[AKT['Year'] == 2002]
AKT02fifth = AKT02[AKT02['cumulative_area'] > AKT02['area'].sum() * 0.05]
AKT02ninefifth = AKT02[AKT02['cumulative_area'] > AKT02['area'].sum() * 0.95]
AKT02 = AKT02ninefifth.doy.min() - AKT02fifth.doy.min()
...
AKBign = pd.DataFrame()
AKBign['year'] = np.arange(2001,2020,1)
AKBign['difference'] = [AKB01,AKB02,AKB03,AKB04,AKB05,AKB06,AKB07,AKB08,AKB09,AKB10,AKB11,AKB12,AKB13,AKB14,AKB15,AKB16,AKB17,AKB18,AKB19]
I would like to make this into a for loop that does my abovementioned steps for each region, each year, and collect it into one large dataframe.
How do I compute that in Python?
I think you want something like this, which will give you a dictionary whose keys are regions and values are dictionaries with keys of the year and values of the difference:
from collections import defaultdict
regions = ['Region1', 'Region2'] # expand as required
years = range(2001,2020)
result = defaultdict(dict)
for region in regions:
xr = x[x['name'] == region].drop_duplicates(subset=['ID', 'Year'], keep = 'first')
xr['cumulative_area'] = xr.groupby(['Year'])['area'].cumsum()
for year in years:
xry = xr[xr['Year'] == year]
xryfifth = xry[xry['cumulative_area'] > xry['area'].sum() * 0.05]
xryninefifth = xry[xry['cumulative_area'] > xry['area'].sum() * 0.95]
result[region][year] = xryninefifth.doy.min() - xryfifth.doy.min()

Why is python being slow? And how can I make it faster?

import numpy as np
import random
import matplotlib.pyplot as plt
# set grid size, M*N (row, col)
M: int = 5
N: int = 5
def moves(pos: tuple, dpos: tuple) -> tuple:
return (pos[0] + dpos[0], pos[1] + dpos[1])
def check_neighbors(white_pos: tuple, black_pos: tuple) -> bool:
stationary = white_pos
up = (white_pos[0], white_pos[1] + 1)
upper_right = (white_pos[0] + 1, white_pos[1] + 1)
upper_left = (white_pos[0] - 1, white_pos[1] + 1)
left = (white_pos[0] - 1, white_pos[1])
right = (white_pos[0] + 1, white_pos[1])
lower_left = (white_pos[0] - 1, white_pos[1] - 1)
down = (white_pos[0], white_pos[1] - 1)
lower_right = (white_pos[0] + 1, white_pos[1] - 1)
if (black_pos == stationary) or (black_pos == up) or (black_pos == upper_right) or (black_pos == upper_left) or (black_pos == left) or (black_pos == right) or (black_pos == lower_left) or (black_pos == down) or (black_pos == lower_right):
return True
else:
return False
def run_sim():
w_x0 = random.sample([i for i in range(0,M)], 1)
w_y0 = random.sample([j for j in range(0,N)], 1)
b_x0 = random.sample([i for i in range(0,M)], 1)
b_y0 = random.sample([j for j in range(0,N)], 1)
white = [(x,y) for x, y in zip(w_x0, w_y0)]
black = [(x,y) for x, y in zip(b_x0, b_y0)]
stop: bool = False
n: int = 0
t: int = 0
while stop != True:
if check_neighbors(white[n], black[n]) == True:
stop = True
else:
dt_w = random.sample([i for i in range(-1,2)], 2)
dt_bl = random.sample([i for i in range(-1,2)], 2)
white.append(moves(white[n], dt_w))
black.append(moves(black[n], dt_bl))
t = t+1
n = n+1
return n
t_dist = [run_sim() for i in range(100)]
print(t_dist)
excuse the terrible formatting
when it gets to t_dist = [run_sim() for i in range(100)] it takes FOREVER to run (I mean like wayy over 60 seconds). How can I get it to run faster and get my results? Why is it so computationally expensive?
I am using a jupyter notebook. I also tried just plain running it as a .py file and it is still slow. I tried using the debugger and after setting a breakpoint at t_dist = , it only shows a few iterations and then stops. If I set it to range(5), it works just fine but starts spazzing with bigger numbers (i.e. 100, 10000), which is what I want to run the simulation as.
If you want to know only the distribution, you should define max_t and please adjust the value.
def check_neighbors(a,b):
return abs(a[0]-b[0]) + abs(a[1]-b[1]) <= 1 or\
(abs(a[0]-b[0])==1 and abs(a[1]-b[1])==1)
def moves(a,b):
return (a[0]+b[0], a[1]+b[1])
def run_sim(M=5, N=5, max_t=10**6):
w_co = [random.randrange(0,M), random.randrange(0,N)]# to produce random integer, you can use randrange
b_co = [random.randrange(0,M), random.randrange(0,N)]
t: int = 0
while t<max_t:
if check_neighbors(w_co, b_co) is True: # for bool, `is True` is strict
break
else:
dt_w = [random.randrange(-1,2) for _ in range(2)]
dt_b = [random.randrange(-1,2) for _ in range(2)]
w_co=moves(w_co, dt_w)
b_co=moves(b_co, dt_b)
t+=1
if (t%(max_t//20)==0):
print(f"\t{t}") # check if the program is processing or not
return t
t_dist = []
for i in range(100):
if (i%10==1):
print(i) # check the process
t_dist.append(run_sim(M=5, N=5, max_t=10**6))
print(t_dist)
Additionally, this is a histogram of exapmle.
The graph shows some cases take very, very long time.

How to find Average directional movement for stocks using Pandas?

I have a dataframe of OHLCV data. I would like to know if anyone knows any tutorial or any way of finding ADX(Average directional movement ) using pandas?
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import datetime as dt
import numpy as nm
start=dt.datetime.today()-dt.timedelta(59)
end=dt.datetime.today()
df=pd.DataFrame(yf.download("MSFT", start=start, end=end))
The average directional index, or ADX, is the primary technical indicator among the five indicators that make up a technical trading system developed by J. Welles Wilder, Jr. and is calculated using the other indicators that make up the trading system. The ADX is primarily used as an indicator of momentum, or trend strength, but the total ADX system is also used as a directional indicator.
Directional movement is calculated by comparing the difference between two consecutive lows with the difference between their respective highs.
For the excel calculation of ADX this is a really good video:
https://www.youtube.com/watch?v=LKDJQLrXedg&t=387s
I was playing with this a little bit and found something that can help you with the issue:
def ADX(data: pd.DataFrame, period: int):
"""
Computes the ADX indicator.
"""
df = data.copy()
alpha = 1/period
# TR
df['H-L'] = df['High'] - df['Low']
df['H-C'] = np.abs(df['High'] - df['Close'].shift(1))
df['L-C'] = np.abs(df['Low'] - df['Close'].shift(1))
df['TR'] = df[['H-L', 'H-C', 'L-C']].max(axis=1)
del df['H-L'], df['H-C'], df['L-C']
# ATR
df['ATR'] = df['TR'].ewm(alpha=alpha, adjust=False).mean()
# +-DX
df['H-pH'] = df['High'] - df['High'].shift(1)
df['pL-L'] = df['Low'].shift(1) - df['Low']
df['+DX'] = np.where(
(df['H-pH'] > df['pL-L']) & (df['H-pH']>0),
df['H-pH'],
0.0
)
df['-DX'] = np.where(
(df['H-pH'] < df['pL-L']) & (df['pL-L']>0),
df['pL-L'],
0.0
)
del df['H-pH'], df['pL-L']
# +- DMI
df['S+DM'] = df['+DX'].ewm(alpha=alpha, adjust=False).mean()
df['S-DM'] = df['-DX'].ewm(alpha=alpha, adjust=False).mean()
df['+DMI'] = (df['S+DM']/df['ATR'])*100
df['-DMI'] = (df['S-DM']/df['ATR'])*100
del df['S+DM'], df['S-DM']
# ADX
df['DX'] = (np.abs(df['+DMI'] - df['-DMI'])/(df['+DMI'] + df['-DMI']))*100
df['ADX'] = df['DX'].ewm(alpha=alpha, adjust=False).mean()
del df['DX'], df['ATR'], df['TR'], df['-DX'], df['+DX'], df['+DMI'], df['-DMI']
return df
At the beginning the values aren't correct (as always with the EWM approach) but after several computations it converges to the correct value.
Math was taken from here.
def ADX(df):
def getCDM(df):
dmpos = df["High"][-1] - df["High"][-2]
dmneg = df["Low"][-2] - df["Low"][-1]
if dmpos > dmneg:
return dmpos
else:
return dmneg
def getDMnTR(df):
DMpos = []
DMneg = []
TRarr = []
n = round(len(df)/14)
idx = n
while n <= (len(df)):
dmpos = df["High"][n-1] - df["High"][n-2]
dmneg = df["Low"][n-2] - df["Low"][n-1]
DMpos.append(dmpos)
DMneg.append(dmneg)
a1 = df["High"][n-1] - df["High"][n-2]
a2 = df["High"][n-1] - df["Close"][n-2]
a3 = df["Low"][n-1] - df["Close"][n-2]
TRarr.append(max(a1,a2,a3))
n = idx + n
return DMpos, DMneg, TRarr
def getDI(df):
DMpos, DMneg, TR = getDMnTR(df)
CDM = getCDM(df)
POSsmooth = (sum(DMpos) - sum(DMpos)/len(DMpos) + CDM)
NEGsmooth = (sum(DMneg) - sum(DMneg)/len(DMneg) + CDM)
DIpos = (POSsmooth / (sum(TR)/len(TR))) *100
DIneg = (NEGsmooth / (sum(TR)/len(TR))) *100
return DIpos, DIneg
def getADX(df):
DIpos, DIneg = getDI(df)
dx = (abs(DIpos- DIneg) / abs(DIpos + DIneg)) * 100
ADX = dx/14
return ADX
return(getADX(df))
print(ADX(df))
This gives you the exact numbers as Tradingview and Thinkorswim.
import numpy as np
def ema(arr, periods=14, weight=1, init=None):
leading_na = np.where(~np.isnan(arr))[0][0]
arr = arr[leading_na:]
alpha = weight / (periods + (weight-1))
alpha_rev = 1 - alpha
n = arr.shape[0]
pows = alpha_rev**(np.arange(n+1))
out1 = np.array([])
if 0 in pows:
out1 = ema(arr[:int(len(arr)/2)], periods)
arr = arr[int(len(arr)/2) - 1:]
init = out1[-1]
n = arr.shape[0]
pows = alpha_rev**(np.arange(n+1))
scale_arr = 1/pows[:-1]
if init:
offset = init * pows[1:]
else:
offset = arr[0]*pows[1:]
pw0 = alpha*alpha_rev**(n-1)
mult = arr*pw0*scale_arr
cumsums = mult.cumsum()
out = offset + cumsums*scale_arr[::-1]
out = out[1:] if len(out1) > 0 else out
out = np.concatenate([out1, out])
out[:periods] = np.nan
out = np.concatenate(([np.nan]*leading_na, out))
return out
def atr(highs, lows, closes, periods=14, ema_weight=1):
hi = np.array(highs)
lo = np.array(lows)
c = np.array(closes)
tr = np.vstack([np.abs(hi[1:]-c[:-1]),
np.abs(lo[1:]-c[:-1]),
(hi-lo)[1:]]).max(axis=0)
atr = ema(tr, periods=periods, weight=ema_weight)
atr = np.concatenate([[np.nan], atr])
return atr
def adx(highs, lows, closes, periods=14):
highs = np.array(highs)
lows = np.array(lows)
closes = np.array(closes)
up = highs[1:] - highs[:-1]
down = lows[:-1] - lows[1:]
up_idx = up > down
down_idx = down > up
updm = np.zeros(len(up))
updm[up_idx] = up[up_idx]
updm[updm < 0] = 0
downdm = np.zeros(len(down))
downdm[down_idx] = down[down_idx]
downdm[downdm < 0] = 0
_atr = atr(highs, lows, closes, periods)[1:]
updi = 100 * ema(updm, periods) / _atr
downdi = 100 * ema(downdm, periods) / _atr
zeros = (updi + downdi == 0)
downdi[zeros] = .0000001
adx = 100 * np.abs(updi - downdi) / (updi + downdi)
adx = ema(np.concatenate([[np.nan], adx]), periods)
return adx

Categories

Resources