I am trying to run the following script. I am getting a KeyError on the function trying to plot stock returns.
It seems to be coming from fig = px.line(grouped_metrics, x="Date Snapshot", y="value", color='variable'). However, it is a valid column in my df. I have tried adding different columns in the color= argument but I get the same error. My three columns are 'variable', 'value', 'Date Snapshot'. Appologies for the block of code:
Data for ref
import pandas as pd
import numpy as np
import datetime
import plotly.express as px
import yfinance as yf
import pandas_market_calendars as mcal
from plotly.offline import init_notebook_mode, plot
init_notebook_mode(connected=True)
def create_market_cal(start, end):
nyse = mcal.get_calendar('NYSE')
schedule = nyse.schedule(stocks_start, stocks_end)
market_cal = mcal.date_range(schedule, frequency='1D')
market_cal = market_cal.tz_localize(None)
market_cal = [i.replace(hour=0) for i in market_cal]
return market_cal
def get_data(stocks, start, end):
def data(ticker):
df = yf.download(ticker, start=start, end=(end + datetime.timedelta(days=1)))
df['symbol'] = ticker
df.index = pd.to_datetime(df.index)
return df
datas = map(data, stocks)
return(pd.concat(datas, keys=stocks, names=['Ticker', 'Date'], sort=True))
def get_benchmark(benchmark, start, end):
benchmark = get_data(benchmark, start, end)
benchmark = benchmark.drop(['symbol'], axis=1)
benchmark.reset_index(inplace=True)
return benchmark
portfolio_df = pd.read_csv('C:\\tmp\\stock_transactions.csv')
portfolio_df['Open date'] = pd.to_datetime(portfolio_df['Open date'])
symbols = portfolio_df.Symbol.unique()
stocks_start = datetime.datetime(2018, 3, 1)
stocks_end = datetime.datetime(2021, 3, 10)
daily_adj_close = get_data(symbols, stocks_start, stocks_end)
daily_adj_close = daily_adj_close[['Close']].reset_index()
daily_benchmark = get_benchmark(['SPY'], stocks_start, stocks_end)
daily_benchmark = daily_benchmark[['Date', 'Close']]
market_cal = create_market_cal(stocks_start, stocks_end)
def position_adjust(daily_positions, sale):
stocks_with_sales = pd.DataFrame()
buys_before_start = daily_positions[daily_positions['Type'] == 'Buy'].sort_values(by='Open date')
for position in buys_before_start[buys_before_start['Symbol'] == sale[1]['Symbol']].iterrows():
if position[1]['Qty'] <= sale[1]['Qty']:
sale[1]['Qty'] -= position[1]['Qty']
position[1]['Qty'] = 0
else:
position[1]['Qty'] -= sale[1]['Qty']
sale[1]['Qty'] -= sale[1]['Qty']
stocks_with_sales = stocks_with_sales.append(position[1])
return stocks_with_sales
def portfolio_start_balance(portfolio, start_date):
positions_before_start = portfolio[portfolio['Open date'] <= start_date]
future_sales = portfolio[(portfolio['Open date'] >= start_date) & (portfolio['Type'] == 'Sell')]
sales = positions_before_start[positions_before_start['Type'] =='Sell'].groupby(['Symbol'])['Qty'].sum()
sales = sales.reset_index()
positions_no_change = positions_before_start[~positions_before_start['Symbol'].isin(sales['Symbol'].unique())]
adj_positions_df = pd.DataFrame()
for sale in sales.iterrows():
adj_positions = position_adjust(positions_before_start, sale)
adj_positions_df = adj_positions_df.append(adj_positions)
adj_positions_df = adj_positions_df.append(positions_no_change)
adj_positions_df = adj_positions_df.append(future_sales)
adj_positions_df = adj_positions_df[adj_positions_df['Qty'] > 0]
return adj_positions_df
active_portfolio = portfolio_start_balance(portfolio_df, stocks_start)
def fifo(daily_positions, sales, date):
sales = sales[sales['Open date'] == date]
daily_positions = daily_positions[daily_positions['Open date'] <= date]
positions_no_change = daily_positions[~daily_positions['Symbol'].isin(sales['Symbol'].unique())]
adj_positions = pd.DataFrame()
for sale in sales.iterrows():
adj_positions = adj_positions.append(position_adjust(daily_positions, sale))
adj_positions = adj_positions.append(positions_no_change)
adj_positions = adj_positions[adj_positions['Qty'] > 0]
return adj_positions
def time_fill(portfolio, market_cal):
sales = portfolio[portfolio['Type'] == 'Sell'].groupby(['Symbol','Open date'])['Qty'].sum()
sales = sales.reset_index()
per_day_balance = []
for date in market_cal:
if (sales['Open date'] == date).any():
portfolio = fifo(portfolio, sales, date)
daily_positions = portfolio[portfolio['Open date'] <= date]
daily_positions = daily_positions[daily_positions['Type'] == 'Buy']
daily_positions['Date Snapshot'] = date
per_day_balance.append(daily_positions)
return per_day_balance
positions_per_day = time_fill(active_portfolio, market_cal)
def modified_cost_per_share(portfolio, adj_close, start_date):
df = pd.merge(portfolio, adj_close, left_on=['Date Snapshot', 'Symbol'],
right_on=['Date', 'Ticker'], how='left')
df.rename(columns={'Close': 'Symbol Adj Close'}, inplace=True)
df['Adj cost daily'] = df['Symbol Adj Close'] * df['Qty']
df = df.drop(['Ticker', 'Date'], axis=1)
return df
def benchmark_portfolio_calcs(portfolio, benchmark):
portfolio = pd.merge(portfolio, benchmark, left_on=['Date Snapshot'],
right_on=['Date'], how='left')
portfolio = portfolio.drop(['Date'], axis=1)
portfolio.rename(columns={'Close': 'Benchmark Close'}, inplace=True)
benchmark_max = benchmark[benchmark['Date'] == benchmark['Date'].max()]
portfolio['Benchmark End Date Close'] = portfolio.apply(lambda x: benchmark_max['Close'], axis=1)
benchmark_min = benchmark[benchmark['Date'] == benchmark['Date'].min()]
portfolio['Benchmark Start Date Close'] = portfolio.apply(lambda x: benchmark_min['Close'], axis=1)
return portfolio
def portfolio_end_of_year_stats(portfolio, adj_close_end):
adj_close_end = adj_close_end[adj_close_end['Date'] == adj_close_end['Date'].max()]
portfolio_end_data = pd.merge(portfolio, adj_close_end, left_on='Symbol',
right_on='Ticker')
portfolio_end_data.rename(columns={'Close': 'Ticker End Date Close'}, inplace=True)
portfolio_end_data = portfolio_end_data.drop(['Ticker', 'Date'], axis=1)
return portfolio_end_data
def portfolio_start_of_year_stats(portfolio, adj_close_start):
adj_close_start = adj_close_start[adj_close_start['Date'] == adj_close_start['Date'].min()]
portfolio_start = pd.merge(portfolio, adj_close_start[['Ticker', 'Close', 'Date']],
left_on='Symbol', right_on='Ticker')
portfolio_start.rename(columns={'Close': 'Ticker Start Date Close'}, inplace=True)
portfolio_start['Adj cost per share'] = np.where(portfolio_start['Open date'] <= portfolio_start['Date'],
portfolio_start['Ticker Start Date Close'],
portfolio_start['Adj cost per share'])
portfolio_start['Adj cost'] = portfolio_start['Adj cost per share'] * portfolio_start['Qty']
portfolio_start = portfolio_start.drop(['Ticker', 'Date'], axis=1)
portfolio_start['Equiv Benchmark Shares'] = portfolio_start['Adj cost'] / portfolio_start['Benchmark Start Date Close']
portfolio_start['Benchmark Start Date Cost'] = portfolio_start['Equiv Benchmark Shares'] * portfolio_start['Benchmark Start Date Close']
return portfolio_start
def calc_returns(portfolio):
portfolio['Benchmark Return'] = portfolio['Benchmark Close'] / portfolio['Benchmark Start Date Close'] - 1
portfolio['Ticker Return'] = portfolio['Symbol Adj Close'] / portfolio['Adj cost per share'] - 1
portfolio['Ticker Share Value'] = portfolio['Qty'] * portfolio['Symbol Adj Close']
portfolio['Benchmark Share Value'] = portfolio['Equiv Benchmark Shares'] * portfolio['Benchmark Close']
portfolio['Abs Value Compare'] = portfolio['Ticker Share Value'] - portfolio['Benchmark Start Date Cost']
portfolio['Abs Value Return'] = portfolio['Abs Value Compare']/portfolio['Benchmark Start Date Cost']
portfolio['Stock Gain / (Loss)'] = portfolio['Ticker Share Value'] - portfolio['Adj cost']
portfolio['Benchmark Gain / (Loss)'] = portfolio['Benchmark Share Value'] - portfolio['Adj cost']
portfolio['Abs. Return Compare'] = portfolio['Ticker Return'] - portfolio['Benchmark Return']
return portfolio
def per_day_portfolio_calcs(per_day_holdings, daily_benchmark, daily_adj_close, stocks_start):
df = pd.concat(per_day_holdings, sort=True)
mcps = modified_cost_per_share(df, daily_adj_close, stocks_start)
bpc = benchmark_portfolio_calcs(mcps, daily_benchmark)
pes = portfolio_end_of_year_stats(bpc, daily_adj_close)
pss = portfolio_start_of_year_stats(pes, daily_adj_close)
returns = calc_returns(pss)
return returns
combined_df = per_day_portfolio_calcs(positions_per_day, daily_benchmark, daily_adj_close, stocks_start)
def line(df, val_1, val_2):
grouped_metrics = combined_df.groupby(['Date Snapshot'])[[val_1, val_2]].sum().reset_index()
grouped_metrics = pd.melt(grouped_metrics, id_vars=['Date Snapshot'],
value_vars=[val_1, val_2])
fig = px.line(grouped_metrics, x="Date Snapshot", y="value",
color='variable')
plot(fig)
line(combined_df, 'Stock Gain / (Loss)', 'Benchmark Gain / (Loss)')
def line_facets(df, val_1, val_2):
grouped_metrics = combined_df.groupby(['Symbol', 'Date Snapshot'])[[val_1, val_2]].sum().reset_index()
grouped_metrics = pd.melt(grouped_metrics, id_vars=['Symbol', 'Date Snapshot'],
value_vars=[val_1, val_2])
fig = px.line(grouped_metrics, x="Date Snapshot", y="value",
color='variable', facet_col="Symbol", facet_col_wrap=5)
plot(fig)
line_facets(combined_df, 'Ticker Return', 'Benchmark Return')
The above throws the following error:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-4-337cc930bd36> in <module>
183
184
--> 185 line(combined_df, 'Stock Gain / (Loss)', 'Benchmark Gain / (Loss)')
186
187
<ipython-input-4-337cc930bd36> in line(df, val_1, val_2)
179 value_vars=[val_1, val_2])
180 fig = px.line(grouped_metrics, x="Date Snapshot", y="value",
--> 181 color='variable')
182 plot(fig)
183
~\anaconda3\lib\site-packages\plotly\express\_chart_types.py in line(data_frame, x, y, line_group, color, line_dash, hover_name, hover_data, custom_data, text, facet_row, facet_col, facet_col_wrap, facet_row_spacing, facet_col_spacing, error_x, error_x_minus, error_y, error_y_minus, animation_frame, animation_group, category_orders, labels, orientation, color_discrete_sequence, color_discrete_map, line_dash_sequence, line_dash_map, log_x, log_y, range_x, range_y, line_shape, render_mode, title, template, width, height)
250 a polyline mark in 2D space.
251 """
--> 252 return make_figure(args=locals(), constructor=go.Scatter)
253
254
~\anaconda3\lib\site-packages\plotly\express\_core.py in make_figure(args, constructor, trace_patch, layout_patch)
1887 prefix = get_label(args, args["facet_row"]) + "="
1888 row_labels = [prefix + str(s) for s in sorted_group_values[m.grouper]]
-> 1889 for val in sorted_group_values[m.grouper]:
1890 if val not in m.val_map:
1891 m.val_map[val] = m.sequence[len(m.val_map) % len(m.sequence)]
KeyError: 'variable'
In case someone comes across this issue: I had the same situation, in my case the error message was misleading: root cause was that the dataframe in px.line() was emtpy (no rows).
Related
I have the following custom function that generates a row with EMA data for a specific asset based on the current time.
Here's the complete code for the function:
def find_ema(futures_symbol):
futures_symbol = futures_symbol
def fetch_ohlc(symbol,timeframe, timesymbol):
symbol = symbol
timeframe = timeframe
timesymbol = timesymbol
#fetch data-binance api
candlestick_url = 'https://fapi.binance.com/fapi/v1/continuousKlines?pair='+symbol+'&contractType=PERPETUAL&interval='+str(timeframe)+timesymbol+'&limit=1500'
candlestick_chart = requests.get(candlestick_url).json()
candlestick_df = pd.DataFrame(candlestick_chart)
candlestick_df = candlestick_df.iloc[:,1:7]
candlestick_df.columns = ['open', 'high', 'low','close','volume', 'date']
candlestick_df['date'] = pd.to_datetime(candlestick_df['date'], unit='ms').round('1s')
candlestick_df.insert(0, 'date', candlestick_df.pop('date') )
# reset to midnight
candlestick_df.date = pd.to_datetime(candlestick_df.date)
min_date = candlestick_df.date.min()
NextDay_Date = (min_date + datetime.timedelta(days=1)).replace(hour=0, minute=0, second=0, microsecond=0)
candlestick_df = candlestick_df[candlestick_df.date >= NextDay_Date].copy()
candlestick_df = candlestick_df.set_index('date')
candlestick_df['symbol'] = symbol
ohlc_data = candlestick_df
cols= ['open', 'high', 'low','close','volume']
ohlc_data[cols] = ohlc_data[cols].apply(pd.to_numeric, errors = 'coerce')
ohlc_data[cols] = ohlc_data[cols].round(decimals=2)
return ohlc_data
#separate df for limited candle stick data
ohlc_smaller = fetch_ohlc(futures_symbol,5,'m')
ohlc_larger = fetch_ohlc(futures_symbol,1,'h')
ema_df = ohlc_smaller
#calculating ema with 200 row data
ema_df['15m'] = ohlc_smaller.resample('15T').apply({'close':'last'}).ewm(span = 200, min_periods = 200).mean()
ema_df['30m'] = ohlc_smaller.resample('30T').apply({'close':'last'}).ewm(span = 200, min_periods = 200).mean()
ema_df['1h'] = ohlc_larger.resample('60T').apply({'close':'last'}).ewm(span = 200, min_periods = 200).mean()
ema_df['2h'] = ohlc_larger.resample('120T').apply({'close':'last'}).ewm(span = 200, min_periods = 200).mean()
ema_df['4h'] = ohlc_larger.resample('240T').apply({'close':'last'}).ewm(span = 200, min_periods = 200).mean()
#forward fill larger tf data to smaller tf
ema_df = ema_df.fillna(method='ffill').tail(1)
ema_df.insert(0, 'symbol', ema_df.pop('symbol'))
ema_df = ema_df.drop(['high','low','close','volume'], axis=1)
return ema_df
When I apply this function to a single symbol it returns the dataframe perfectly. for example:
working example on single symbol
However, I now have a list of symbols on which I want to apply this function and create a new dataframe.
Here's how I am generating my list of symbols:
symbols_url = 'https://fapi.binance.com/fapi/v1/ticker/price'
symbols_data = requests.get(symbols_url).json()
symbols_df = pd.DataFrame(symbols_data)
symbols_df = symbols_df[symbols_df['symbol'].str.contains('USDT')]
futures_tickers_binance = list(symbols_df['symbol'])
#some ends with numbers(eg:Quarterly Contracts), hence filter:
futures_tickers_binance = list(filter(lambda x: x.endswith(('USDT')), futures_tickers_binance))
Here's what I thought would work:
for symbol in futures_tickers_binance:
for j in range(len(futures_tickers_binance)):
df = df.append(find_ema(futures_tickers_binance[j]))
df = df.drop_duplicates()
However, this returns a valueError:
ValueError: If using all scalar values, you must pass an index
Is there a way to apply this function and generate a new dataframe with the values for the complete list in a faster way?
Thank you in advance for your patience to read this!
The final result would look something like this, however my loop is not working the way it is supposed to be working:
Expected (almost) perfect result
Here's my complete code if needed:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import datetime
import requests
symbols_url = 'https://fapi.binance.com/fapi/v1/ticker/price'
symbols_data = requests.get(symbols_url).json()
symbols_df = pd.DataFrame(symbols_data)
symbols_df = symbols_df[symbols_df['symbol'].str.contains('USDT')]
futures_tickers_binance = list(symbols_df['symbol'])
#some ends with numbers(eg:Quarterly Contracts), hence filter:
futures_tickers_binance = list(filter(lambda x: x.endswith(('USDT')), futures_tickers_binance))
def find_ema(futures_symbol):
futures_symbol = futures_symbol
def fetch_ohlc(symbol,timeframe, timesymbol):
symbol = symbol
timeframe = timeframe
timesymbol = timesymbol
#fetch data-binance api
candlestick_url = 'https://fapi.binance.com/fapi/v1/continuousKlines?pair='+symbol+'&contractType=PERPETUAL&interval='+str(timeframe)+timesymbol+'&limit=1500'
candlestick_chart = requests.get(candlestick_url).json()
candlestick_df = pd.DataFrame(candlestick_chart)
candlestick_df = candlestick_df.iloc[:,1:7]
candlestick_df.columns = ['open', 'high', 'low','close','volume', 'date']
candlestick_df['date'] = pd.to_datetime(candlestick_df['date'], unit='ms').round('1s')
candlestick_df.insert(0, 'date', candlestick_df.pop('date') )
# reset to midnight
candlestick_df.date = pd.to_datetime(candlestick_df.date)
min_date = candlestick_df.date.min()
NextDay_Date = (min_date + datetime.timedelta(days=1)).replace(hour=0, minute=0, second=0, microsecond=0)
candlestick_df = candlestick_df[candlestick_df.date >= NextDay_Date].copy()
candlestick_df = candlestick_df.set_index('date')
candlestick_df['symbol'] = symbol
ohlc_data = candlestick_df
cols= ['open', 'high', 'low','close','volume']
ohlc_data[cols] = ohlc_data[cols].apply(pd.to_numeric, errors = 'coerce')
ohlc_data[cols] = ohlc_data[cols].round(decimals=2)
return ohlc_data
#separate df for limited candle stick data
ohlc_smaller = fetch_ohlc(futures_symbol,5,'m')
ohlc_larger = fetch_ohlc(futures_symbol,1,'h')
ema_df = ohlc_smaller
#calculating ema with 200 row data
ema_df['15m'] = ohlc_smaller.resample('15T').apply({'close':'last'}).ewm(span = 200, min_periods = 200).mean()
ema_df['30m'] = ohlc_smaller.resample('30T').apply({'close':'last'}).ewm(span = 200, min_periods = 200).mean()
ema_df['1h'] = ohlc_larger.resample('60T').apply({'close':'last'}).ewm(span = 200, min_periods = 200).mean()
ema_df['2h'] = ohlc_larger.resample('120T').apply({'close':'last'}).ewm(span = 200, min_periods = 200).mean()
ema_df['4h'] = ohlc_larger.resample('240T').apply({'close':'last'}).ewm(span = 200, min_periods = 200).mean()
#forward fill larger tf data to smaller tf
ema_df = ema_df.fillna(method='ffill').tail(1)
ema_df.insert(0, 'symbol', ema_df.pop('symbol'))
ema_df = ema_df.drop(['high','low','close','volume'], axis=1)
return ema_df
for symbol in futures_tickers_binance:
for j in range(len(futures_tickers_binance)):
df = df.append(find_ema(futures_tickers_binance[j]))
df = df.drop_duplicates()
So I'm trying to plot Heiken Ashi candles, and then I want to plot them on graph.
My code so far:
def heikin_ashi():
historical_data = client.get_historical_klines(symbol=SYMBOL, interval=TIME_PERIOD, start_str="15 days ago UTC", klines_type=HistoricalKlinesType.FUTURES)
hist_df = pd.DataFrame(historical_data)
hist_df.columns = ['Open Time', 'Open', 'High', 'Low', 'Close', 'Volume', 'Close Time', 'Quote Asset Volume',
'Number of Trades', 'TB Base Volume', 'TB Quote Volume', 'Ignore']
hist_df['Open Time'] = pd.to_datetime(hist_df['Open Time']/1000, unit='s')
hist_df['Close Time'] = pd.to_datetime(hist_df['Close Time']/1000, unit='s')
df_HA = hist_df
df_HA['Close'] = (hist_df['Open'] + hist_df['High'] + hist_df['Low'] + hist_df['Close']) / 4
# idx = df_HA.index.name
# df_HA.reset_index(inplace=True)
for i in range(0, len(hist_df)):
if i == 0:
df_HA['Open'][i] = ((hist_df['Open'][i] + hist_df['Close'][i]) / 2)
else:
df_HA['Open'][i] = ((hist_df['Open'][i - 1] + hist_df['Close'][i - 1]) / 2)
# if idx:
# df_HA.set_index(idx, inplace=True)
df_HA['High'] = hist_df[['Open', 'Close', 'High']].max(axis=1)
df_HA['Low'] = hist_df[['Open', 'Close', 'Low']].min(axis=1)
print(df_HA)
Error:
result[mask] = op(xrav[mask], y)
TypeError: unsupported operand type(s) for /: 'str' and 'int'
Also I came across this:
import pandas as pd
def heikin_ashi(df):
heikin_ashi_df = pd.DataFrame(index=df.index.values, columns=['open', 'high', 'low', 'close'])
heikin_ashi_df['close'] = (df['open'] + df['high'] + df['low'] + df['close']) / 4
for i in range(len(df)):
if i == 0:
heikin_ashi_df.iat[0, 0] = df['open'].iloc[0]
else:
heikin_ashi_df.iat[i, 0] = (heikin_ashi_df.iat[i-1, 0] + heikin_ashi_df.iat[i-1, 3]) / 2
heikin_ashi_df['high'] = heikin_ashi_df.loc[:, ['open', 'close']].join(df['high']).max(axis=1)
heikin_ashi_df['low'] = heikin_ashi_df.loc[:, ['open', 'close']].join(df['low']).min(axis=1)
return heikin_ashi_df
How do I use the above code with my data? I'm a novice, so I'm confused. I'd appreciate it if someone could provide me with a proper way to do this.
Link to the source: https://github.com/emreturan/heikin-ashi/blob/master/heikin_ashi.py
I need to plot this on a graph too. Thanks.
I will use the 'heikin_ashi' code to answer the example of using mplfinance, a popular finance library, for the graph. There are many other libraries available for visualizing investments, so we will use this as a basic form for data acquisition and visualization. A sample of mplfinance can be found here for reference.
import yfinance as yf
import pandas as pd
import mplfinance as mpf
data = yf.download("AAPL", start="2021-07-01", end="2022-01-01", progress=False)
data.columns = ['open', 'high', 'low', 'close', 'adj close', 'volume']
def heikin_ashi(df):
heikin_ashi_df = df.copy()
#heikin_ashi_df = pd.DataFrame(index=df.index.values, columns=['open', 'high', 'low', 'close'])
heikin_ashi_df['close'] = (df['open'] + df['high'] + df['low'] + df['close']) / 4
for i in range(len(df)):
if i == 0:
heikin_ashi_df.iat[0, 0] = df['open'].iloc[0]
else:
heikin_ashi_df.iat[i, 0] = (heikin_ashi_df.iat[i-1, 0] + heikin_ashi_df.iat[i-1, 3]) / 2
heikin_ashi_df['high'] = heikin_ashi_df.loc[:, ['open', 'close']].join(df['high']).max(axis=1)
heikin_ashi_df['low'] = heikin_ashi_df.loc[:, ['open', 'close']].join(df['low']).min(axis=1)
return heikin_ashi_df
df_ha = heikin_ashi(data)
# mpf plotting
mpf.plot(df_ha, type='candle', figratio=(8,4), title='APPL', style='yahoo')
import pandas as pd
import time
import yfinance as yf
import money_18
import talib
def backtest(df,us_code, profit_target, stop_loss, macd_diff):
pos_opened = False
open_price = 0
close_price = 0
pnl = 0
pnl_list = []
original_capital = 100000
temp_capital = original_capital
num_of_lot = 0
equity_value = 0
equity_value_list = []
dd_dollar = 0
dd_dollar_list = []
dd_pct = 0
dd_pct_list = []
mdd_dollar = 0
mdd_pct = 0
total_profit = 0
num_of_trade = 0
for i in range(1, len(df)):
now_date = df.loc[i,'Date']
now_open = df.loc[i,'Open']
now_high = df.loc[i,'High']
now_low = df.loc[i,'Low']
now_close = df.loc[i,'Close']
now_rsi = df.loc[i,'RSI']
now_upper_band = df.loc[i,'Upper_Band']
now_middle_band = df.loc[i,'Middle_Band']
now_lower_band = df.loc[i,'Lower_Band']
now_macd = df.loc[i,'MACD']
now_macd_signal = df.loc[i,'MACD_Signal']
now_macd_hist = df.loc[i,'MACD_Hist']
##### equity curve #####
equity_value = round(temp_capital + (now_open - open_price) * num_of_lot )
equity_value_list.append(equity_value)
temp_max_equity = max(equity_value_list)
dd_dollar = temp_max_equity - equity_value
dd_dollar_list.append(dd_dollar)
mdd_dollar = max(dd_dollar_list)
dd_pct = (temp_max_equity - equity_value) / temp_max_equity
dd_pct_list.append(dd_pct)
mdd_pct = max(dd_pct_list)
##### open position #####
if (pos_opened == False) and (i < len(df) - 1) and now_macd_hist > macd_diff :
pos_opened = True
open_price = now_close
num_of_lot = temp_capital // (open_price)
##### profit taking and stop loss #####
if (pos_opened == True) and ((now_open - open_price > profit_target * open_price) or (now_open - open_price < stop_loss * open_price) or (i == len(df) -1)):
pos_opened = False
close_price = now_open
pnl = (close_price - open_price) * num_of_lot
pnl_list.append(pnl)
open_price = 0
num_of_lot = 0
temp_capital = temp_capital + pnl
if len(pnl_list) > 0:
total_profit = sum(pnl_list)
num_of_trade = len(pnl_list)
return us_code, profit_target, stop_loss, total_profit, num_of_trade, mdd_dollar, mdd_pct, macd_diff
if __name__ == '__main__':
us_code_list = ['TSLA', 'AAPL']
macd_diff_list = [0, 0.05]
profit_target_list = [0.03, 0.06]
stop_loss_list = [-0.01, -0.02, -0.03]
start_date = '2020-01-01'
end_date = '2020-12-31'
df_dict = {}
for us_code in us_code_list:
df= yf.Ticker(us_code).history(start=start_date, end=end_date)
df= df[df['Volume'] > 0]
df = df[['Open', 'High', 'Low', 'Close']]
df['RSI'] = talib.RSI(df['Close'], timeperiod=14)
df['Upper_Band'], df['Middle_Band'], df['Lower_Band'] = talib.BBANDS(df['Close'], 20, 2, 2)
df['MACD'], df['MACD_Signal'], df['MACD_Hist'] = talib.MACD(df['Close'], fastperiod=12, slowperiod=26,
signalperiod=9)
df = df[df['MACD_Hist'].notna()]
df = df.reset_index()
df_dict[us_code] = df
save_us_code = ''
save_macd_diff = 0
save_profit_target = 0
save_stop_loss = 0
total_profit = 0
num_of_trade = 0
mdd_dollar = 0
mdd_pct = 0
save_us_code_list = []
save_macd_diff_list = []
save_profit_target_list = []
save_stop_loss_list = []
total_profit_list = []
num_of_trade_list = []
mdd_dollar_list = []
mdd_pct_list = []
result_dict = {}
for us_code in us_code_list:
for macd_diff in macd_diff_list:
for profit_target in profit_target_list:
for stop_loss in stop_loss_list:
print(us_code, macd_diff, profit_target, stop_loss) ## the problem should be starting from here##
save_us_code, save_profit_target, save_stop_loss, total_profit, num_of_trade, mdd_dollar, mdd_pct, macd_diff = backtest(df, us_code, profit_target, stop_loss, macd_diff)
save_us_code_list.append(save_us_code)
save_profit_target_list.append(save_profit_target)
save_stop_loss_list.append(save_stop_loss)
total_profit_list.append(total_profit)
num_of_trade_list.append(num_of_trade)
mdd_dollar_list.append(mdd_dollar)
mdd_pct_list.append(mdd_pct)
macd_diff_list.append(macd_diff)
I am working on the algo trade, however, I created a for loop to put my parameter into my backtest function. However, the for loop keeps looping non-stop.
I think the error starting from "for macd_diff in macd_diff_list:" because i try to print the result below that row, the result is already indefinite.
Now that you've shown the full code, your problem is obvious. Your original example didn't show the issue because you didn't include all relevant code. Here's your example with the relevant code that's causing the issue:
for us_code in us_code_list:
for macd_diff in macd_diff_list:
for profit_target in profit_target_list:
for stop_loss in stop_loss_list:
... # irrelevant code not shown
macd_diff_list.append(macd_diff)
The issue is that you're looping through each item in macd_diff_list, but then for each loop iteration, you add an item to that list. So of course the loop will be infinite. You need to be looping through a different list, or adding items to a different list.
I'm using apply on a multi-index groupby with Numba and found that the slowest part is adding the results back to the original frame. How would I achieve this with speeds that are practical? The indexes in the original df are unique.
Import and generate some data:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import numba
times = np.arange(datetime(2017, 1, 1), datetime(2020, 2, 1), timedelta(minutes=60)).astype(np.datetime64)
tlen = len(times)
A, Z = np.array(['A', 'Z']).view('int32')
symbol_names = np.random.randint(low=A, high=Z, size=50 * 7, dtype='int32').view(f'U{7}')
times = np.concatenate([times] * 50)
names = np.array([y for x in [[s] * tlen for s in symbol_names] for y in x])
open_column = np.random.randint(low=40, high=60, size=len(times), dtype='uint32')
high_column = np.random.randint(low=50, high=70, size=len(times), dtype='uint32')
low_column = np.random.randint(low=30, high=50, size=len(times), dtype='uint32')
close_column = np.random.randint(low=40, high=60, size=len(times), dtype='uint32')
df = pd.DataFrame({'open': open_column, 'high': high_column, 'low': low_column, 'close': close_column}, index=[names, times])
df.index = df.index.set_names(['Symbol', 'Date'])
df['entry'] = np.select( [df.open > df.open.shift(), False], (df.close, -1), np.nan)
df['exit'] = df.close.where(df.high > df.open * 1.33, np.nan)
Define the numba function:
#numba.jit(nopython=True)
def nb_func(arr, limit=0, stop=0, tbe=0):
is_active = 0
bars_held = 0
limit_target = np.inf
stop_target = -np.inf
result = np.empty(arr.shape[0], dtype='float32')
for n in range(arr.shape[0]):
ret = 0
if is_active == 1:
bars_held += 1
if arr[n][2] < stop_target:
ret = stop_target
is_active = 0
elif arr[n][1] > limit_target:
ret = limit_target
is_active = 0
elif bars_held >= tbe:
ret = arr[n][3]
is_active = 0
elif arr[n][5] > 0:
ret = arr[n][3]
is_active = 0
if is_active == 0:
if arr[n][4] > 0:
is_active = 1
bars_held = 0
if stop != 0:
stop_target = arr[n][3] * stop
if limit != 0:
limit_target = arr[n][3] * limit
result[n] = ret
return result
Define the applied function with additional arguments and create the groupBy object:
def process(group):
group['result'] = nb_func(group.values, limit=1.10, stop=0.50, tbe=5)
return group['result']
group = df[['open', 'high', 'low', 'close', 'entry', 'exit']].groupby('Symbol')
Using regular concat the result is extremely slow:
def concat(group):
ret = group.apply(*[process]).droplevel(0)
df2 = pd.concat([df, ret], axis=1)
concat(group)
>> concat function took 20.292 s
Adding the result back as a column is faster but still slow:
def simple_readd(group):
ret = group.apply(*[process]).droplevel(0)
df['result'] = ret
simple_readd(group)
>> simple_readd function took 7.141 s
Calculating the results without adding them back to the original frame is fast and I'd like to get the total time to around 1 s:
def series_return(group):
ret = group.apply(*[process]).droplevel(0)
series_return(group)
>> series_return function took 0.905 s
UPDATE:
This is the most improvement in time I've managed, from 7 seconds to about 1.8. Still interested in doing better as the indexes and columns are completely identical, perhaps there is a numPy solution?
def series_merge(group):
ret = group.apply(*[process]).droplevel(0)
merged = df.reset_index().merge(ret, on=['Symbol', 'Date'], how='left').reset_index(drop=True)
merged = merged.set_index(['Symbol', 'Date'])
return merged
series_merge(group)
>> series_merge function took 1.892 s
I am trying to format this data monthly, but by the current date of the month.
import pandas as pd
import datetime
import quandl
import numpy as np
start = datetime.datetime(1993, 10, 2)
end = datetime.date.today()
df = quandl.get("FRED/DGS20", collapse="daily").reset_index()
df.index=np.arange(0,len(df))
print (df)
l=[]
for i in range(0,len(df)):
if (df['DATE'].loc[i]).day == (df['DATE'].loc[len(df)-1]).day:
l.append(df['DATE'].loc[i])
The issue I am running in to is if that date is on a weekend or a holiday, it skips the month. How can I get python to choose the closest applicable day of the month if the given day is N/A?
A bit long but no for loops! insert my code after your df.index=np.arange(0,len(df)) line:
years = pd.DatetimeIndex(df['DATE']).year
years_u = np.unique(years)
years_u_norm = years_u - years_u[0]
months = pd.DatetimeIndex(df['DATE']).month
months_u = np.unique(months)
months_u_norm = months_u - months_u[0]
days = pd.DatetimeIndex(df['DATE']).day
days_u = np.unique(days)
days_u_norm = days_u - days_u[0]
shp = (years_u_norm[-1]+1, months_u_norm[-1]+1, days_u_norm[-1]+1)
mat = np.full(shp, np.nan).ravel()
y_ind = years - years_u[0]
m_ind = months - months_u[0]
d_ind = days - days_u[0]
inds = np.vstack([y_ind[np.newaxis], m_ind[np.newaxis], d_ind[np.newaxis]])
inds2 = np.ravel_multi_index(inds, shp)
inds_grid = np.indices(shp)[2].ravel()
mat[inds2] = inds_grid[inds2]
start_ind = np.ravel_multi_index([[start.year - years_u[0]], [start.month - months_u[0]], [start.day - days_u[0]]], shp)
mat[:start_ind] = np.inf
end_ind = np.ravel_multi_index([[end.year - years_u[0]], [end.month - months_u[0]], [end.day - days_u[0] + 1]], shp)
mat[end_ind:] = np.inf
mat = mat.reshape(shp)
dist = np.absolute(mat - np.full(shp, end.day-1))
min_dist = np.nanargmin(dist, axis=2) + 1
inds_f = np.unique(np.ravel_multi_index(inds[:-1, :], shp[:-1]))
res_inds = np.indices(min_dist.shape)
res_y = res_inds[0].ravel()[inds_f] + years_u[0]
res_m = res_inds[1].ravel()[inds_f] + months_u[0]
res_d = min_dist.ravel()[inds_f]
df = pd.DataFrame({'year': res_y,
'month': res_m,
'day': res_d})
print(df)