How do i optimize code when the execution time is too slow? - python

I have this Python code, but it's already running for 24h and doesn't seem to print the result for now.
I don't know how long it will take.
Can someone help me to optimize this code?
The code is to find the best performance for trading RSI divergence in a certain period.
It first defines some parameters for the RSI.
The code then goes through every possible combination to find the best combination of parameters to have the best performances.
I'm not really an expert.
I don't really know how i can change the code as i'm no expert.
Happy to learn.
Thank you guys.
import pandas as pd
import numpy as np
import ta
def load_data(file_path, start_date, end_date):
"""
Loads data for the specified symbol and date range from a CSV file
"""
df = pd.read_csv(file_path)
if 'Date' not in df.columns:
df['Date'] = pd.to_datetime(df.index)
df['Date'] = pd.to_datetime(df['Date'])
df = df.set_index('Date')
df = df[(df.index >= start_date) & (df.index <= end_date)]
return df
def calc_rsi(df, n):
"""
Calculates the relative strength index (RSI) for the given dataframe and window size
"""
delta = df["Close"].diff()
gain = delta.where(delta > 0, 0)
loss = abs(delta.where(delta < 0, 0))
avg_gain = gain.rolling(window=n).mean()
avg_loss = loss.rolling(window=n).mean()
rs = avg_gain / avg_loss
rsi = 100 - (100 / (1 + rs))
return rsi
def calc_pivot_point(df, pivot_point_type, pivot_point_n):
"""
Calculates the pivot point for the given dataframe and pivot point type
"""
if pivot_point_type == "Close":
pivot_point = df["Close"].rolling(window=pivot_point_n).mean()
elif pivot_point_type == "High/Low":
pivot_point = (df["High"].rolling(window=pivot_point_n).mean() + df["Low"].rolling(window=pivot_point_n).mean()) / 2
else:
raise ValueError("Invalid pivot point type")
return pivot_point
def calc_divergence(df, rsi, pivot_point, divergence_type, max_pivot_point, max_bars_to_check):
"""
Calculates the divergence for the given dataframe and parameters
"""
if divergence_type == "Regular":
pivot_point_delta = pivot_point.diff()
pivot_point_delta_sign = pivot_point_delta.where(pivot_point_delta > 0, -1)
pivot_point_delta_sign[pivot_point_delta_sign > 0] = 1
rsi_delta = rsi.diff()
rsi_delta_sign = rsi_delta.where(rsi_delta > 0, -1)
rsi_delta_sign[rsi_delta_sign > 0] = 1
divergence = pivot_point_delta_sign * rsi_delta_sign
divergence[divergence < 0] = -1
divergence = divergence.rolling(window=max_pivot_point).sum()
divergence = divergence.rolling(window=max_bars_to_check).sum()
divergence = divergence.where(divergence > 0, 0)
divergence[divergence < 0] = -1
else:
raise ValueError("Invalid divergence type")
return divergence
def backtest(df, rsi_period, pivot_point_type, pivot_point_n, divergence_type, max_pivot_point, max_bars_to_check, trailing_stop, starting_capital):
"""
Backtests the strategy for the given dataframe and parameters
"""
rsi = calc_rsi(df, rsi_period)
pivot_point = calc_pivot_point(df, pivot_point_type, pivot_point_n)
divergence = calc_divergence(df, rsi, pivot_point, divergence_type, max_pivot_point, max_bars_to_check)
positions = pd.DataFrame(index=df.index, columns=["Position", "Stop Loss"])
positions["Position"] = 0.0
positions["Stop Loss"] = 0.0
capital = starting_capital
for i, row in enumerate(df.iterrows()):
date = row[0]
close = row[1]["Close"]
rsi_val = rsi.loc[date]
pivot_val = pivot_point.loc[date]
divergence_val = divergence.loc[date]
if divergence_val > 0 and positions.loc[date]["Position"] == 0:
positions.at[date, "Position"] = capital / close
positions.at[date, "Stop Loss"] = close * (1 - trailing_stop)
elif divergence_val < 0 and positions.loc[date]["Position"] > 0:
capital = positions.loc[date]["Position"] * close
positions.at[date, "Position"] = 0.0
positions.at[date, "Stop Loss"] = 0.0
elif close < positions.loc[date]["Stop Loss"] and positions.loc[date]["Position"] > 0:
capital = positions.loc[date]["Position"] * close
positions.at[date, "Position"] = 0.0
positions.at[date, "Stop Loss"] = 0.0
return capital
def find_best_iteration(df, start_rsi_period, end_rsi_period, pivot_point_types, start_pivot_point_n, end_pivot_point_n, divergence_types, start_max_pivot_point, end_max_pivot_point, start_max_bars_to_check, end_max_bars_to_check, start_trailing_stop, end_trailing_stop, starting_capital):
"""
Finds the best iteration for the given parameters
"""
best_result = 0.0
best_params = None
for rsi_period in range(start_rsi_period, end_rsi_period + 1):
for pivot_point_type in pivot_point_types:
for pivot_point_n in range(start_pivot_point_n, end_pivot_point_n + 1):
for divergence_type in divergence_types:
for max_pivot_point in range(start_max_pivot_point, end_max_pivot_point + 1):
for max_bars_to_check in range(start_max_bars_to_check, end_max_bars_to_check + 1):
for trailing_stop in np.arange(start_trailing_stop, end_trailing_stop + 0.01, 0.01):
result = backtest(df, rsi_period, pivot_point_type, pivot_point_n, divergence_type, max_pivot_point, max_bars_to_check, trailing_stop, starting_capital)
if result > best_result:
best_result = result
best_params = (rsi_period, pivot_point_type, pivot_point_n, divergence_type, max_pivot_point, max_bars_to_check, trailing_stop)
return best_result, best_params
# Define the parameters
file_path = 'C:\\Users\\The Death\\Downloads\\Binance_BTCUSDT_spot.csv'
start_date = "2020-03-16"
end_date = "2021-04-12"
df = load_data(file_path, start_date, end_date)
def load_data(start_date, end_date):
# Your code to load the data for the specified date range
# ...
return df
# Define the parameters for the backtesting
start_rsi_period = 1
end_rsi_period = 30
pivot_point_types = ["Close", "High/Low"]
start_pivot_point_n = 1
end_pivot_point_n = 50
divergence_types = ["Regular"]
start_max_pivot_point = 1
end_max_pivot_point = 20
start_max_bars_to_check = 30
end_max_bars_to_check = 200
start_trailing_stop = 0.01
end_trailing_stop = 0.5
starting_capital = 10000
# Run the backtesting
df = load_data(start_date, end_date)
best_result, best_params = find_best_iteration(df, start_rsi_period, end_rsi_period, pivot_point_types, start_pivot_point_n, end_pivot_point_n, divergence_types, start_max_pivot_point, end_max_pivot_point, start_max_bars_to_check, end_max_bars_to_check, start_trailing_stop, end_trailing_stop, starting_capital)
# Print the results
print("Best result: ", best_result)
print("Best parameters: ", best_params)

I have two recommendations after I scroll up your code:
Reduce the usage of for loop. As you increase a layer of for loop (initial is O(n), the time complexity of your code will increase by a power. In your find_best_iteration() there is about 7 layers of for loop, this is extremely cost your time.
Save and process your data in numpy.array() instead of pd.dataframe(). Dataframe is a class that contains too many unused attributes, and its performance is also slower than numpy.array.

You can try the following methods to improve the performance:
The backtest() function is used many times inside the find_best_iteration() function under many for loops, thus the positions variable inside backtest() is being updated frequently which can be show when the positions variable is a Dataframe. You can consider using numpy array for the positions variable that is optimized for updates.
You can try using the multiprocessing module in Python to parallelize the calculation of the divergence variable.
Hope this help!

Related

My Custom Reinforcement Learning Environment is unable to extract values that I want

I am working on a grid optimization model where I am importing data from a data-set and I am training my RL-model on a custom Reinforcement Learning environment. In this model I also want to extract my grid values and pv-values step by step. While training, my custom Reinforcement learning environment does give me values for grid and pv but when I do prediction, it outputs an empty list. I wanted to ask why is it so.
What's the problem?
For Importing the dataset
def get_data(start = '2017-01-01 00:00:00', end = '2017-01-01 23:55:00'):
# import standard load profiles
slp = pd.read_csv('df_p.csv', index_col=0, parse_dates=True)['0'] / 1000
slp = slp.resample('15min').mean() * 3
pv = pd.read_csv('Solar_Data-2011.csv', delimiter=';',
index_col=0, parse_dates=False)["Generation"] * 3
pv.index = slp.index
print("Load values:")
print(slp.values)
print("PV values:")
print(pv.values)
start = pd.to_datetime(start)
end = pd.to_datetime(end)
return slp[start:end], pv[start:end]
This is my custom-environment
class CostEnv(Env):
def __init__(self):
# Actions we can take increase in cost, lowering of cost
self.action_space = Discrete(2)
### Get input data, just choose one day for now
self.load, self.pv = get_data(start = '2017-01-01 00:00:00', end = '2017-01-01 23:55:00')
self.pv_price = 0.10
self.grid_price = 0.40
self.line_max = 15
self.grid_penalty = 100
self.battery_max = 18
self.battery_state = 10
self.pv_values = []
self.grid_values = []
###
# Set episode length
self.episode_length = len(self.load)
self.observation_space = Dict(
{
"load": Box(0, self.episode_length - 1, shape=(2,), dtype=int),
"pv": Box(0, self.episode_length - 1, shape=(2,), dtype=int),
}
)
def step(self, action):
# Apply action
# 0 -1 = -1 decrease in cost
# 1 -1 = 0 Increase in cost
# self.state += action -1
# Reduce episode length by 1 second
# self.episode_length -= 1 ###Move to the end of the action
### We calculate the reward based on the price for the electricity,
#lower price, "higher" reward
if action == 0:
# Take all electricity from grid
if self.load[len(self.load)-self.episode_length] > self.line_max:
reward = self.load[len(self.load)-self.episode_length] * self.grid_price * -1 - abs(self.load[len(self.load)-self.episode_length] - self.line_max) * self.grid_penalty
self.grid_values.append(self.load[len(self.load)-self.episode_length])
else:
reward = self.load[len(self.load)-self.episode_length] * self.grid_price * -1
self.grid_values.append(self.load[len(self.load)-self.episode_length])
elif action == 1:
# Take all electricity from pv
if self.pv[len(self.pv)-self.episode_length] >= self.load[len(self.load)-self.episode_length]:
if abs(self.pv[len(self.pv)-self.episode_length] - self.load[len(self.load)-self.episode_length]) > self.line_max:
reward = self.load[len(self.load)-self.episode_length] * self.pv_price * -1 - (self.pv[len(self.pv)-self.episode_length] - self.load[len(self.load)-self.episode_length]) * self.grid_penalty
self.pv_values.append(self.pv[len(self.pv)-self.episode_length])
else:
reward = self.load[len(self.load)-self.episode_length] * self.pv_price * -1
self.pv_values.append(self.pv[len(self.pv)-self.episode_length])
else:
if (self.load[len(self.load)-self.episode_length] - self.pv[len(self.pv)-self.episode_length]) > self.line_max:
reward = self.pv[len(self.pv)-self.episode_length] * self.pv_price * -1 - (self.load[len(self.load)-self.episode_length] - self.pv[len(self.pv)-self.episode_length]) * self.grid_price - (self.load[len(self.load)-self.episode_length] - self.pv[len(self.pv)-self.episode_length]) * self.grid_penalty
self.pv_values.append(self.pv[len(self.pv)-self.episode_length])
else:
reward = self.pv[len(self.pv)-self.episode_length] * self.pv_price * -1
self.pv_values.append(self.pv[len(self.pv)-self.episode_length])
### This may lead the agent to always choose action 1,
###because it will always supply the demand and will always be cheaper.
else:
reward = -300000
# Invalid action
#raise ValueError(f'Invalid action: {action}')
info = {}
### Observation
observation = {
"load": (0,self.load[len(self.load)-self.episode_length]),
"pv": (0,self.pv[len(self.pv)-self.episode_length]),
}
### Either here or before checking self.episode_length
self.episode_length -= 1
### Check if timeseries is over
if self.episode_length <= 0:
done = True
else:
done = False
# Return step information
# return self.state, reward, done, info
return observation, reward, done, info
def render(self):
# Implement viz
pass
def reset(self):
self.done=False
# Set episode length
self.episode_length = len(self.load)
observation = {
"load": (0, self.load[len(self.load)-self.episode_length]),
"pv": (0, self.pv[len(self.pv)-self.episode_length]),
}
return observation
Here is my model-training
log_path = os.path.join('Training', 'Logs')
model = A2C("MultiInputPolicy", env, verbose=1, tensorboard_log=log_path)
model.learn(total_timesteps=300000)
The values extracted by this model are as follows:
Env.grid_values: [0.4014,
0.342,
0.5357999999999999,
0.4698,
0.44999999999999996,
0.376,
0.521,
0.4293999999999999,
0.25140000000000007,
0.7412000000000001,
env.pv_values:[0.0,
0.0,
0.0,
0.0,
0.0,
0.0607460715,
0.0678108435,
0.07642341180000001,
Now for predicting, I am using another dataset which is used as follows:
def get_data(start = '2017-01-01 00:00:00', end = '2017-01-01 23:55:00'):
# import standard load profiles
slp = pd.read_csv('df_p.csv', index_col=0, parse_dates=True)['2'] / 1000
slp = slp.resample('15min').mean() * 3
pv = pd.read_csv('Solar_Data-2011.csv', delimiter=';',
index_col=0, parse_dates=False)["Generation"] * 3
pv.index = slp.index
print("Load values:")
print(slp.values)
print("PV values:")
print(pv.values)
start = pd.to_datetime(start)
end = pd.to_datetime(end)
return slp[start:end], pv[start:end]
For prediction the code is this
episodes = 20
for ep in range(episodes):
obs = env.reset()
done = False
while not done:
action = model.predict(obs)
obs, rewards, done, info = env.step(action)
env.close()
The lists are as following:
env.pv_values
[]
env.grid_values
[]
Please tell me what I am doing wrong. Also, I load the model in a separate Jupyter notebook and environment is the same that I use for training my model. The code is as follows:
model = A2C("MultiInputPolicy",env=env)
model = model.load("A2C_Multi_Input_Policy_Improved_1",env=env)

Need help improving backtest class and find optimal combinations

I'm trying to create a backtest class. The code i have for the class atm is this:
class Backtest:
def __init__(self, df, signals, initial_capital, max_position_size, stop_loss, conditions):
self.df = df
self.signals = signals[signals.index.isin(self.df.index)]
self.initial_capital = initial_capital
self.positions = pd.DataFrame(index=signals.index).fillna(0.0)
self.max_position_size = max_position_size
self.stop_loss = stop_loss
self.conditions = conditions
self.stop_loss_levels = None
self.portfolio_value = None
self.portfolio_returns = None
self.portfolio_risk = None
self.results=None
#this section applies conditions for buying, selling, closes
def generate_trades(self):
self.positions['buy'] = np.where(self.conditions['buy'], 1000 / self.df['close'], 0)
self.positions['sell'] = np.where(self.conditions['sell'], -1000 / self.df['close'], 0)
self.positions = self.positions.where(self.conditions['close'], 0)
"""
conditions = {
'buy': (self.signals['signal_rsi_oversold'] == 1) & (self.signals['signal_stochastic_oversold'] == 1),
'sell': (self.signals['signal_rsi_overbought'] == 1) & (self.signals['signal_stochastic_overbought'] == 1),
'close': self.df['close'] > self.stop_loss_levels
}"""
def calculate_exposure_time(self):
self.positions['open_time'] = np.where(self.positions['buy'] > 0, self.df['date'], np.nan)
self.positions['close_time'] = np.where(self.positions['sell'] > 0, self.df['date'], np.nan)
self.positions['exposure_time'] = self.positions['close_time'] - self.positions['open_time']
def backtest(self):
self.generate_trades()
#self.positions['buy'] = np.where(self.signals[buys].sum(axis=1) > 0, 1000 / self.df['close'], 0)
#self.positions['sell'] = np.where(self.signals[sells].sum(axis=1) > 0, -1000 / self.df['close'], 0)
portfolio = self.positions.multiply(self.df['close'], axis=0)
pos_diff = self.positions.diff()
portfolio['holdings'] = (self.positions.multiply(self.df['close'], axis=0)).sum(axis=1)
portfolio['cash'] = self.initial_capital - (pos_diff.multiply(self.df['close'], axis=0)).sum(axis=1).cumsum()
portfolio['total'] = portfolio['cash'] + portfolio['holdings']
portfolio['returns'] = portfolio['total'].pct_change()
self.results = portfolio
# Calculate the average exposure time
average_exposure_time = self.positions['exposure_time'].mean()
# Calculate the Sharpe ratio
sharpe_ratio = np.sqrt(252) * (self.results['returns'].mean() / self.results['returns'].std())
# Calculate the maximum drawdown
max_dd = (self.results['total'].cummax() - self.results['total']).max()
# Calculate the maximum drawdown duration
max_dd_duration = (self.results['total'].cummax() - self.results['total']).argmax()
# Calculate the profit factor
profit_factor = (self.results[self.results['returns'] > 0]['returns'].sum() / abs(self.results[self.results['returns'] < 0]['returns'].sum()))
# Calculate the average loss and average profit
trades = self.results[self.results['returns'] != 0]['returns']
average_loss = trades[trades < 0].mean()
average_profit = trades[trades > 0].mean()
# Calculate the Calmar ratio
calmar_ratio = self.results['returns'].mean() / max_dd
# Calculate the number of trades taken
num_trades = trades.count()
# Calculate the number of winning trades and losing trades
num_winning_trades = 0
num_losing_trades = 0
for i, row in self.results.iterrows():
if row['returns'] > 0:
num_winning_trades += 1
elif row['returns'] < 0:
num_losing_trades += 1
# Calculate the win/loss ratio
win_loss_ratio = num_winning_trades / num_losing_trades
self.statistics = {
'calmar_ratio': calmar_ratio,
'sharpe_ratio': sharpe_ratio,
'max_drawdown': max_dd,
'max_drawdown_duration': max_dd_duration,
'profit_factor': profit_factor,
'average_loss': average_loss,
'average_profit': average_profit,
'num_trades': num_trades,
'num_winning_trades':num_winning_trades,
'num_losing_trades':num_losing_trades,
'win_loss_ratio':win_loss_ratio,
'exposure_time': average_exposure_time
}
I also have this code that I'm trying to use to analyse results. I'm not sure how to integrate:
def backtest_by_trade(self, buys, sells):
# Initialize a dictionary to store the results for each trade
self.results_by_trade = {}
# Iterate over the combinations of buy and sell values
for buy in buys:
for sell in sells:
# Create a copy of the signals dataframe
signals = self.signals.copy()
# Update the signals dataframe with the buy and sell values
signals['buy'] = np.where(signals['signal_rsi_oversold'] == 1, buy, 0)
signals['sell'] = np.where(signals['signal_rsi_overbought'] == 1, sell, 0)
# Create a new Backtest object using the updated signals dataframe
bt = Backtest(self.df, signals, initial_capital=10000)
# Run the backtest
bt.backtest()
# Store the results in the results_by_trade dictionary
self.results_by_trade[(buy, sell)] = bt.results
# Calculate the profit or loss for each trade
self.results_by_trade['profit_loss'] = self.results_by_trade['total_profit'] - self.results_by_trade['total_loss']
# Calculate the return on investment for each trade
self.results_by_trade['return_on_investment'] = self.results_by_trade['total_profit'] / self.results_by_trade['total_loss']
# Calculate the profit factor for each trade
self.results_by_trade['profit_factor'] = self.results_by_trade['total_profit'] / abs(self.results_by_trade['total_loss'])
# Calculate the percentage of profitable trades for each combination
self.results_by_trade['percent_profitable'] = self.results_by_trade['num_winning_trades'] / self.results_by_trade['num_trades']
# Calculate the average profit per trade for each combination
self.results_by_trade['avg_profit_per_trade'] = self.results_by_trade['total_profit'] / self.results_by_trade['num_winning_trades']
# Calculate the average loss per trade for each combination
self.results_by_trade['avg_loss_per_trade'] = self.results_by_trade['total_loss'] / self.results_by_trade['num_losing_trades']
# Calculate the maximum consecutive winning trades for each combination
self.results_by_trade['max_consecutive_winning_trades'] = self.results_by_trade['consecutive_winning_trades'].max()
# Calculate the maximum consecutive losing trades for each combination
self.results_by_trade['max_consecutive_losing_trades'] = self.results_by_trade['consecutive_losing_trades'].max()
# Calculate the average consecutive winning trades for each combination
self.results_by_trade['avg_consecutive_winning_trades']
In the end, i want to use different combinations of the condictions dictionary and analyse what might be the best combination of signals. How can i do this ?

For pandas query is very slow, How to enhancing performance?

How to quickly find the data that meets the conditions and terminate the subsequent search in advance.Search from right to front until the maximum findlimit of the search.
Expected accuracy <= 0.001s,
Current accuracy >= 0.03s
def test_Find(df:pd.DataFrame,findLimit:int=365):
success = False
current = pd.DataFrame()
start = len(df) -1
loopCount = 0
# Compare from back to front
for i in range(start, -1, -1):
loopCount += 1
if loopCount > findLimit:
return pd.DataFrame()
current = df.iloc[i]
success = Find(i, df)
if success:
return current
return pd.DataFrame()
#Whether the last value is greater than the current value
def LastPassCurrent(curi:int, df:pd.DataFrame):
current = df.iloc[curi]
last = df.iloc[-1]
result = last.c > current["c"]
return result
# Whether the current residual value is Less than or equal to the current value
def RemainNoPassCurrent(curi:int, df:pd.DataFrame)->bool:
cur = df.iloc[curi]
remain = df.iloc[curi+1:-1]
maxC = remain["c"].max()
if np.isnan(maxC):
maxC = 0
remainNoPassCurrent = maxC <= cur["c"]
return remainNoPassCurrent
# Qualified search
def Find(curi, df):
current = df.iloc[curi]
result = (current.a == 8) and \
RemainNoPassCurrent(curi, df) and \
LastPassCurrent(curi, df)
return result
#test data
dfs = []
for i in range(0, 4000):
dfs.append(pd.DataFrame(np.arange(365*3).reshape(365,3), columns=list('abc')))
#Test time collection
df = None
for i in range(0, 4000):
df = dfs[i]
start_time = time.time()
data = test_Find(df, 365)
end_time = time.time()
result = end_time - start_time
print(f'loop {i} Empty:{data.empty} time is %.3fs' % result)
#Current testing inefficient time
# loop 0 time is 0.367s ....

RSI results different than in TradingView (coded in Python)

It's been days I spent trying to code (and search) a python function to get RSI that match TradingView results but without success (I'm new to Python).
The closest results I get for RSI is this function, but still different (and the fact exponential is used, sometimes result is pretty close, sometimes there is a pretty huge difference):
def rsi_tradingview(ohlc: pd.DataFrame, period: int = 14, round_rsi: bool = True):
delta = ohlc["close"].diff()
up = delta.copy()
up[up < 0] = 0
up = pd.Series.ewm(up, alpha=1/period).mean()
down = delta.copy()
down[down > 0] = 0
down *= -1
down = pd.Series.ewm(down, alpha=1/period).mean()
rsi = np.where(up == 0, 0, np.where(down == 0, 100, 100 - (100 / (1 + up / down))))
return np.round(rsi, 2) if round_rsi else rsi
My code looks like this:
pairs = ["BTCUSDT", "PONDUSDT"]
def get_historical_candles():
record = client.get_historical_klines(pair, Client.KLINE_INTERVAL_5MINUTE, "3 hour ago UTC")
myList = []
try:
for item in record:
n_item = []
int_ts = int(item[0] / 1000)
n_item.append(float(item[4])) # close
myList.append(n_item)
except Exception as error:
debug_logger.debug(error)
new_ohlc = pd.DataFrame(myList, columns=['close'])
return new_ohlc
def rsi_tradingview(ohlc: all_candles, period: int = 14, round_rsi: bool = False):
delta = all_candles.diff()
up = delta.copy()
up[up < 0] = 0
up = pd.Series.ewm(up, alpha=1/period).mean()
down = delta.copy()
down[down > 0] = 0
down *= -1
down = pd.Series.ewm(down, alpha=1/period).mean()
rsi = np.where(up == 0, 0, np.where(down == 0, 100, 100 - (100 / (1 + up / down))))
return np.round(rsi, 2) if round_rsi else rsi
for pair in pairs:
all_candles = get_historical_candles()
test_rsi = rsi_tradingview(all_candles, 14, False)
test_rsi_final = test_rsi[-1]
print(test_rsi_final)
I compare results with tradingview_ta this way, that give correct results (I can't just use this function to get RSI because I need the RSI to calculate StochRSI):
for pair in pairs:
test = TA_Handler(
symbol=pair,
screener="CRYPTO",
exchange="BINANCE",
interval=Interval.INTERVAL_5_MINUTES
)
print(test.get_analysis().indicators["RSI"])
If this can help, here are the codes on TradingView to get RSI and how is calculated RMA
# RSI
study(title="Relative Strength Index", shorttitle="RSI", format=format.price, precision=2, resolution="")
len = input(14, minval=1, title="Length")
src = input(close, "Source", type = input.source)
up = rma(max(change(src), 0), len)
down = rma(-min(change(src), 0), len)
rsi = down == 0 ? 100 : up == 0 ? 0 : 100 - (100 / (1 + up / down))
plot(rsi, "RSI", color=#7E57C2)
band1 = hline(70, "Upper Band", color=#787B86)
bandm = hline(50, "Middle Band", color=color.new(#787B86, 50))
band0 = hline(30, "Lower Band", color=#787B86)
fill(band1, band0, color=color.rgb(126, 87, 194, 90), title="Background")
# RMA
plot(rma(close, 15))
//the same on pine
pine_rma(src, length) =>
alpha = 1/length
sum = 0.0
sum := na(sum[1]) ? sma(src, length) : alpha * src + (1 - alpha) * nz(sum[1])
plot(pine_rma(close, 15))
Please guys, help me to find what is wrong. :(
And Thank you by advance! Thanks for reading!

How to find Average directional movement for stocks using Pandas?

I have a dataframe of OHLCV data. I would like to know if anyone knows any tutorial or any way of finding ADX(Average directional movement ) using pandas?
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import datetime as dt
import numpy as nm
start=dt.datetime.today()-dt.timedelta(59)
end=dt.datetime.today()
df=pd.DataFrame(yf.download("MSFT", start=start, end=end))
The average directional index, or ADX, is the primary technical indicator among the five indicators that make up a technical trading system developed by J. Welles Wilder, Jr. and is calculated using the other indicators that make up the trading system. The ADX is primarily used as an indicator of momentum, or trend strength, but the total ADX system is also used as a directional indicator.
Directional movement is calculated by comparing the difference between two consecutive lows with the difference between their respective highs.
For the excel calculation of ADX this is a really good video:
https://www.youtube.com/watch?v=LKDJQLrXedg&t=387s
I was playing with this a little bit and found something that can help you with the issue:
def ADX(data: pd.DataFrame, period: int):
"""
Computes the ADX indicator.
"""
df = data.copy()
alpha = 1/period
# TR
df['H-L'] = df['High'] - df['Low']
df['H-C'] = np.abs(df['High'] - df['Close'].shift(1))
df['L-C'] = np.abs(df['Low'] - df['Close'].shift(1))
df['TR'] = df[['H-L', 'H-C', 'L-C']].max(axis=1)
del df['H-L'], df['H-C'], df['L-C']
# ATR
df['ATR'] = df['TR'].ewm(alpha=alpha, adjust=False).mean()
# +-DX
df['H-pH'] = df['High'] - df['High'].shift(1)
df['pL-L'] = df['Low'].shift(1) - df['Low']
df['+DX'] = np.where(
(df['H-pH'] > df['pL-L']) & (df['H-pH']>0),
df['H-pH'],
0.0
)
df['-DX'] = np.where(
(df['H-pH'] < df['pL-L']) & (df['pL-L']>0),
df['pL-L'],
0.0
)
del df['H-pH'], df['pL-L']
# +- DMI
df['S+DM'] = df['+DX'].ewm(alpha=alpha, adjust=False).mean()
df['S-DM'] = df['-DX'].ewm(alpha=alpha, adjust=False).mean()
df['+DMI'] = (df['S+DM']/df['ATR'])*100
df['-DMI'] = (df['S-DM']/df['ATR'])*100
del df['S+DM'], df['S-DM']
# ADX
df['DX'] = (np.abs(df['+DMI'] - df['-DMI'])/(df['+DMI'] + df['-DMI']))*100
df['ADX'] = df['DX'].ewm(alpha=alpha, adjust=False).mean()
del df['DX'], df['ATR'], df['TR'], df['-DX'], df['+DX'], df['+DMI'], df['-DMI']
return df
At the beginning the values aren't correct (as always with the EWM approach) but after several computations it converges to the correct value.
Math was taken from here.
def ADX(df):
def getCDM(df):
dmpos = df["High"][-1] - df["High"][-2]
dmneg = df["Low"][-2] - df["Low"][-1]
if dmpos > dmneg:
return dmpos
else:
return dmneg
def getDMnTR(df):
DMpos = []
DMneg = []
TRarr = []
n = round(len(df)/14)
idx = n
while n <= (len(df)):
dmpos = df["High"][n-1] - df["High"][n-2]
dmneg = df["Low"][n-2] - df["Low"][n-1]
DMpos.append(dmpos)
DMneg.append(dmneg)
a1 = df["High"][n-1] - df["High"][n-2]
a2 = df["High"][n-1] - df["Close"][n-2]
a3 = df["Low"][n-1] - df["Close"][n-2]
TRarr.append(max(a1,a2,a3))
n = idx + n
return DMpos, DMneg, TRarr
def getDI(df):
DMpos, DMneg, TR = getDMnTR(df)
CDM = getCDM(df)
POSsmooth = (sum(DMpos) - sum(DMpos)/len(DMpos) + CDM)
NEGsmooth = (sum(DMneg) - sum(DMneg)/len(DMneg) + CDM)
DIpos = (POSsmooth / (sum(TR)/len(TR))) *100
DIneg = (NEGsmooth / (sum(TR)/len(TR))) *100
return DIpos, DIneg
def getADX(df):
DIpos, DIneg = getDI(df)
dx = (abs(DIpos- DIneg) / abs(DIpos + DIneg)) * 100
ADX = dx/14
return ADX
return(getADX(df))
print(ADX(df))
This gives you the exact numbers as Tradingview and Thinkorswim.
import numpy as np
def ema(arr, periods=14, weight=1, init=None):
leading_na = np.where(~np.isnan(arr))[0][0]
arr = arr[leading_na:]
alpha = weight / (periods + (weight-1))
alpha_rev = 1 - alpha
n = arr.shape[0]
pows = alpha_rev**(np.arange(n+1))
out1 = np.array([])
if 0 in pows:
out1 = ema(arr[:int(len(arr)/2)], periods)
arr = arr[int(len(arr)/2) - 1:]
init = out1[-1]
n = arr.shape[0]
pows = alpha_rev**(np.arange(n+1))
scale_arr = 1/pows[:-1]
if init:
offset = init * pows[1:]
else:
offset = arr[0]*pows[1:]
pw0 = alpha*alpha_rev**(n-1)
mult = arr*pw0*scale_arr
cumsums = mult.cumsum()
out = offset + cumsums*scale_arr[::-1]
out = out[1:] if len(out1) > 0 else out
out = np.concatenate([out1, out])
out[:periods] = np.nan
out = np.concatenate(([np.nan]*leading_na, out))
return out
def atr(highs, lows, closes, periods=14, ema_weight=1):
hi = np.array(highs)
lo = np.array(lows)
c = np.array(closes)
tr = np.vstack([np.abs(hi[1:]-c[:-1]),
np.abs(lo[1:]-c[:-1]),
(hi-lo)[1:]]).max(axis=0)
atr = ema(tr, periods=periods, weight=ema_weight)
atr = np.concatenate([[np.nan], atr])
return atr
def adx(highs, lows, closes, periods=14):
highs = np.array(highs)
lows = np.array(lows)
closes = np.array(closes)
up = highs[1:] - highs[:-1]
down = lows[:-1] - lows[1:]
up_idx = up > down
down_idx = down > up
updm = np.zeros(len(up))
updm[up_idx] = up[up_idx]
updm[updm < 0] = 0
downdm = np.zeros(len(down))
downdm[down_idx] = down[down_idx]
downdm[downdm < 0] = 0
_atr = atr(highs, lows, closes, periods)[1:]
updi = 100 * ema(updm, periods) / _atr
downdi = 100 * ema(downdm, periods) / _atr
zeros = (updi + downdi == 0)
downdi[zeros] = .0000001
adx = 100 * np.abs(updi - downdi) / (updi + downdi)
adx = ema(np.concatenate([[np.nan], adx]), periods)
return adx

Categories

Resources