Use a pandas DataFrame created inside a function outside of the function - python

I am a Python beginner and wrote a function for a simple moving average strategy. I created a portfolio DataFrame inside the function and now I want to use this DataFrame outside of the function for plotting some graphs. My solution is: return portfolio - but this does not work. Can anybody help me?
This is my code:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Import a data source - FSE-Data with Index 'Date'
all_close_prices = pd.read_csv('FSE_daily_close.csv')
all_close_prices = all_close_prices.set_index('Date')
# Fill NaN Values with the last available stock price - except for Zalando
all_close_prices = all_close_prices.fillna(method='ffill')
# Import ticker symbols
ticker_list = list(all_close_prices)
# Zalando 'FSE/ZO1_X' (position row 99) - doesn't begin in 2004
# Drop Zalando
all_close_prices.drop('FSE/ZO1_X', axis=1)
# Also from the ticker list
ticker_list.remove('FSE/ZO1_X')
# Create an empty signal dataframe with datetime index equivalent to the stocks
signals = pd.DataFrame(index=all_close_prices.index)
def ma_strategy(ticker, long_window, short_window):
# Calculate the moving avergaes
moving_avg_long = all_close_prices.rolling(window=long_window, min_periods=1).mean()
moving_avg_short = all_close_prices.rolling(window=short_window, min_periods=1).mean()
moving_avg_short = moving_avg_short
moving_avg_long = moving_avg_long
# Add the two MAs for the stocks in the ticker_list to the signals dataframe
for i in ticker_list:
signals['moving_avg_short_' + i] = moving_avg_short[i]
signals['moving_avg_long_' + i] = moving_avg_long[i]
# Set up the signals
for i in ticker_list:
signals['signal_' + i] = np.where(signals['moving_avg_short_' + i] > signals['moving_avg_long_' + i], 1, 0)
signals['positions_' + i] = signals['signal_' + i].diff(periods=1)
#Backtest
initial_capital = float(100000)
# Create a DataFrame `positions` with index of signals
positions = pd.DataFrame(index=all_close_prices)
# Create a new column in the positions DataFrame
# On the days that the signal is 1 (short moving average crosses the long moving average, you’ll buy a 100 shares.
# The days on which the signal is 0, the final result will be 0 as a result of the operation 100*signals['signal']
positions = 100 * signals[['signal_' + ticker]]
# Store the portfolio value owned with the stock
# DataFrame.multiply(other, axis='columns', fill_value=None) - Multiplication of dataframe and other, element-wise
# Store the difference in shares owned - same like position column in signals
pos_diff = positions.diff()
# Add `holdings` to portfolio
portfolio = pd.DataFrame(index=all_close_prices.index)
portfolio['holdings'] = (positions.multiply(all_close_prices[ticker], axis=0)).sum(axis=1)
# Add `cash` to portfolio
portfolio['cash'] = initial_capital - (pos_diff.multiply(all_close_prices[ticker], axis=0)).sum(
axis=1).cumsum()
# Add `total` to portfolio
portfolio['total'] = portfolio['cash'] + portfolio['holdings']
# Add `returns` to portfolio
portfolio['return'] = portfolio['total'].pct_change()
portfolio['return_cum'] = portfolio['total'].pct_change().cumsum()
return portfolio
ma_strategy('FSE/VOW3_X',20,5)
# Visualize the total value of the portfolio
portfolio_value = plt.figure(figsize=(12, 8))
ax1 = portfolio_value.add_subplot(1, 1, 1, ylabel='Portfolio value in $')
# Plot the equity curve in dollars
portfolio['total'].plot(ax=ax1, lw=2.)

You need to assign your function return value to a variable. The line which says
ma_strategy('FSE/VOW3_X',20,5)
probably needs to change to
portfolio = ma_strategy('FSE/VOW3_X',20,5)

Related

Index Error when performing simple calculation

Goal is to pull stock tickers from Wikipedia, use yfinance to grab the historical closing prices, and perform the Relative Strength Index (RSI) calculation for each of the tickers listed in the S&P500. Once this is accomplished, the tickers will be grouped into a 'buy', 'sell', or 'donothing' category based on their respective calculated RSI values. However, I am receiving an Index Error that refers to [ if RSI[245]>=30 and RSI[245-10]<30: ] claiming 245 is out of bounds for axis 0 with size 126, with size 126 being the number of days of closing prices collected.
import yfinance as yf
import pandas as pd
# Read and print the stock tickers that make up S&P500
tickers = pd.read_html(
'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]
print(tickers.head())
# Get the data for this tickers from yahoo finance
df = yf.download(tickers.Symbol.to_list(),'2021-9-24','2022-3-24', auto_adjust=True)['Close']
print(data.head())
#-------------------------------------------------------
df2 = df
#print(df2)
total_columns = df2.shape[1]
#print(total_columns)
print(df2)
#-------------------------------------------------------
buy = []
sell = []
donothing = []
for i in range(total_columns):
#company_name = header
df_RSI = df2.iloc[:,i]
#print(df_RSI)
df_RSI['diff']=df_RSI.diff(1)
#print(df_RSI['diff'])
# Calculate Avg. Gains/Losses
df_RSI['gain'] = df_RSI['diff'].clip(lower=0).round(2)
df_RSI['loss'] = df_RSI['diff'].clip(upper=0).abs().round(2)
#print(df_RSI['gain'])
window_length = 14
df_RSI['avg_gain'] = df_RSI['gain'].rolling(window=window_length,min_periods=window_length).mean()
#print(df_RSI['avg_gain'][:30]) #yay working!
df_RSI['avg_loss'] = df_RSI['loss'].rolling(window=window_length, min_periods=window_length).mean()
#print(df_RSI['avg_loss'][:30]) #yay working!
#print(df_RSI.name) #prints out the tickers wooooo
# Get WMS averages
# Average Gains
for k, row in enumerate(df_RSI['avg_gain'].iloc[window_length+1:]):
df_RSI['avg_gain'].iloc[k + window_length + 1] =\
(df_RSI['avg_gain'].iloc[k + window_length] *
(window_length - 1) +
df_RSI['gain'].iloc[k + window_length + 1])\
/ window_length
# Average Losses
for j, row in enumerate(df_RSI['avg_loss'].iloc[window_length+1:]):
df_RSI['avg_loss'].iloc[j + window_length + 1] =\
(df_RSI['avg_loss'].iloc[j + window_length] *
(window_length - 1) +
df_RSI['loss'].iloc[j + window_length + 1])\
/ window_length
# View initial results
#print(df_RSI[:,window_length-1:window_length+5])
# Calculate RS Values
df_RSI['rs'] = df_RSI['avg_gain'] / df_RSI['avg_loss']
#print(df_RSI['rs'][:30])
#print(df_RSI['rs'][30:]) #yay working!
# Calculate RSI
df_RSI['rsi'] = 100 - (100 / (1.0 + df_RSI['rs']))
# View Result RSI
RSI = (df_RSI['rsi'])
#print(RSI[200:])
#print(RSI[199])
if RSI[245]>=30 and RSI[245-10]<30:
buy.append(df_RSI.name)
elif RSI[240]<=70 and RSI[245-10]>70:
sell.append(df_RSI.name)
else:
donothing.append(df_RSI.name)
print(buy)
print(sell)
print(donothing)
I have made a few changes to your code so that it works (I received an error because you didn't set df_RSI to a dataframe initially, and I changed your if statements at the end - explained below code).
import yfinance as yf
import pandas as pd
# Read and print the stock tickers that make up S&P500
tickers = pd.read_html(
'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]
print(tickers.head())
# Get the data for this tickers from yahoo finance
df = yf.download(tickers.Symbol.to_list(),'2021-9-24','2022-3-24', auto_adjust=True)['Close']
print(df.head())
#-------------------------------------------------------
df2 = df
#print(df2)
total_columns = df2.shape[1]
#print(total_columns)
print(df2)
#-------------------------------------------------------
buy = []
sell = []
donothing = []
for i in range(total_columns):
#company_name = header
df_RSI = df2.iloc[:,i].to_frame()
#print(df_RSI)
df_RSI['diff']=df_RSI.diff(1)
#print(df_RSI['diff'])
# Calculate Avg. Gains/Losses
df_RSI['gain'] = df_RSI['diff'].clip(lower=0).round(2)
df_RSI['loss'] = df_RSI['diff'].clip(upper=0).abs().round(2)
#print(df_RSI['gain'])
window_length = 14
df_RSI['avg_gain'] = df_RSI['gain'].rolling(window=window_length,min_periods=window_length).mean()
#print(df_RSI['avg_gain'][:30]) #yay working!
df_RSI['avg_loss'] = df_RSI['loss'].rolling(window=window_length, min_periods=window_length).mean()
#print(df_RSI['avg_loss'][:30]) #yay working!
#print(df_RSI.name) #prints out the tickers wooooo
# Get WMS averages
# Average Gains
for k, row in enumerate(df_RSI['avg_gain'].iloc[window_length+1:]):
df_RSI['avg_gain'].iloc[k + window_length + 1] =\
(df_RSI['avg_gain'].iloc[k + window_length] *
(window_length - 1) +
df_RSI['gain'].iloc[k + window_length + 1])\
/ window_length
# Average Losses
for j, row in enumerate(df_RSI['avg_loss'].iloc[window_length+1:]):
df_RSI['avg_loss'].iloc[j + window_length + 1] =\
(df_RSI['avg_loss'].iloc[j + window_length] *
(window_length - 1) +
df_RSI['loss'].iloc[j + window_length + 1])\
/ window_length
# View initial results
#print(df_RSI[:,window_length-1:window_length+5])
# Calculate RS Values
df_RSI['rs'] = df_RSI['avg_gain'] / df_RSI['avg_loss']
#print(df_RSI['rs'][:30])
#print(df_RSI['rs'][30:]) #yay working!
# Calculate RSI
df_RSI['rsi'] = 100 - (100 / (1.0 + df_RSI['rs']))
# View Result RSI
RSI = (df_RSI['rsi'])
#print(RSI[200:])
#print(RSI[199])
if RSI[-1]>=30 and RSI[-1-10]<30:
buy.append(df2.columns[i])
elif RSI[-1]<=70 and RSI[-1-10]>70:
sell.append(df2.columns[i])
else:
donothing.append(df2.columns[i])
print(buy)
print(sell)
print(donothing)
The changes to your if statement:
df_RSI.name was changed to df2.columns[i], as I assume you wanted to append the ticker to buy, sell or donothing (and I changed df_RSI to a dataframe at the start of the for loop, so that you were appending columns in the rest of the loop, rather than additional rows to a series - and changing to_frame means that there is no longer a name for df_RSI).
The if statement itself was changed, to if RSI[-1]>=30.... This is because you are currently trying to find the index values: 245, 235 (in if), 240 and 235 (in elif), but the length of the series RSI is only 126 (so, because the index starts at 0, the very last row is RSI[125]). I changed this to be the last row RSI[-1], and the row 10 before that. Let me know if this is not what you were looking for, and I can change it (if you need more help with it).

changing interval at which simulation fits ARIMA (help w/ for-loop)

I'm currently working on a trading strategy simulator that fits an ARIMA to stock return data, makes a next day prediction, then buys/sells based on that prediction. It continues to accumulate shares until a sell signal is generated, at which point the program will liquidate the accumulated position and begin again.
Right now, I specify an interval of dates, then the loop will start by fitting an ARIMA to the first 14 days of return data, making a prediction for day 15, acting on the prediction, then it will begin again with the first 15 days, fitting a new ARIMA. It will continue this until it gets to the end of the range of dates specified, with each new iteration adding the previous day's sample.
So, basically n increases by 1 for every iteration of the loop. I don't want this. I want it to repeatedly fit to an interval of a fixed length. For example, say I'm testing a strategy over 500 trading days. For the first iteration I want the loop to take the 50 days prior to day 1 of the specified interval and fit an ARIMA, and then trade in the same manner as before, but for the next iteration of the loop, I don't want it to fit to 51 days, I want to fit the 50 days prior to the current date every time.
Here's the start of the simulation function where the for-loop is specified. I can't seem to figure out how to change the loop to accomplish my goal. Any help would be greatly appreciated!!
def run_simulation(returns, prices, amt, order, thresh, verbose=True, plot=True):
if type(order) == float:
thresh = None
curr_holding = False
sum_list = []
events_list = []
sharpe_list = []
init_amt = amt
#go through dates
for date, r in tqdm (returns.iloc[14:].items(), total=len(returns.iloc[14:])):
#get data til just before current date
curr_data = returns[:date]
# check if using ARIMA from order
if type(order) == tuple:
#fit model
model = ARIMA(curr_data, order=order).fit()
print(model.summary())
#get forecast
pred = model.forecast()
print(pred)
float_pred = float(pred)
Here's the full script for context:
import yfinance as yf
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima.model import ARIMA
import numpy as np
import seaborn as sns
from tqdm import tqdm
import pandas as pd
from statsmodels.tools.sm_exceptions import ValueWarning, HessianInversionWarning, ConvergenceWarning
import warnings
#in practice do not supress these warnings, they carry important information about the status of your model
warnings.filterwarnings('ignore', category=ValueWarning)
warnings.filterwarnings('ignore', category=HessianInversionWarning)
warnings.filterwarnings('ignore', category=ConvergenceWarning)
tickerSymbol = 'SPY'
data = yf.Ticker(tickerSymbol)
prices = data.history(start='2021-01-01', end='2022-01-03').Close
returns = prices.pct_change().dropna()
def std_dev(data):
# Get number of observations
n = len(data)
# Calculate mean
mean = sum(data) / n
# Calculate deviations from the mean
deviations = sum([(x - mean)**2 for x in data])
# Calculate Variance & Standard Deviation
variance = deviations / (n - 1)
s = variance**(1/2)
return s
# Sharpe Ratio From Scratch
def sharpe_ratio(data, risk_free_rate=0):
# Calculate Average Daily Return
mean_daily_return = sum(data) / len(data)
print(f"mean daily return = {mean_daily_return}")
# Calculate Standard Deviation
s = std_dev(data)
# Calculate Daily Sharpe Ratio
daily_sharpe_ratio = (mean_daily_return - risk_free_rate) / s
# Annualize Daily Sharpe Ratio
sharpe_ratio = 252**(1/2) * daily_sharpe_ratio
return sharpe_ratio
def run_simulation(returns, prices, amt, order, thresh, verbose=True, plot=True):
if type(order) == float:
thresh = None
curr_holding = False
sum_list = []
events_list = []
sharpe_list = []
init_amt = amt
#go through dates
for date, r in tqdm (returns.iloc[14:].items(), total=len(returns.iloc[14:])):
#get data til just before current date
curr_data = returns[:date]
# check if using ARIMA from order
if type(order) == tuple:
#fit model
model = ARIMA(curr_data, order=order).fit()
print(model.summary())
#get forecast
pred = model.forecast()
print(pred)
float_pred = float(pred)
#if you predict a high enough return and not holding, buy stock
# order for random strat and tuple for ARIMA
if float_pred > thresh \
or (order == 'last' and curr_data[-1] > 0):
buy_price = prices.loc[date]
events_list.append(('b', date))
int_buy_price = int(buy_price)
sum_list.append(int_buy_price)
curr_holding = True
if verbose:
print('Bought at $%s'%buy_price)
print('Predicted Return: %s'%round(pred,4))
print(f"Current holdings = {sum(sum_list)}")
print('=======================================')
continue
#if you predict below the threshold return, sell the stock
if (curr_holding) and \
((type(order) == float and np.random.random() < order)
or (type(order) == tuple and float_pred < thresh)
or (order == 'last' and curr_data[-1] > 0)):
sell_price = prices.loc[date]
total_return = len(sum_list) * sell_price
ret = (total_return-sum(sum_list))/sum(sum_list)
amt *= (1+ret)
events_list.append(('s', date, ret))
sharpe_list.append(ret)
sum_list.clear()
curr_holding = False
if verbose:
print('Sold at $%s'%sell_price)
print('Predicted Return: %s'%round(pred,4))
print('Actual Return: %s'%(round(ret, 4)))
print('=======================================')
if verbose:
sharpe = sharpe_ratio(sharpe_list, risk_free_rate=0.004)
print('Total Amount: $%s'%round(amt,2))
print(f"Sharpe Ratio: {sharpe}")
#graph
if plot:
plt.figure(figsize=(10,4))
plt.plot(prices[14:])
y_lims = (int(prices.min()*.95), int(prices.max()*1.05))
shaded_y_lims = int(prices.min()*.5), int(prices.max()*1.5)
for idx, event in enumerate(events_list):
plt.axvline(event[1], color='k', linestyle='--', alpha=0.4)
if event[0] == 's':
color = 'green' if event[2] > 0 else 'red'
plt.fill_betweenx(range(*shaded_y_lims),
event[1], events_list[idx-1][1], color=color, alpha=0.1)
tot_return = round(100*(amt / init_amt - 1), 2)
sharpe = sharpe_ratio(sharpe_list, risk_free_rate=0)
tot_return = str(tot_return) + '%'
plt.title("%s Price Data\nThresh=%s\nTotal Amt: $%s\nTotal Return: %s"%(tickerSymbol, thresh, round(amt,2), tot_return), fontsize=20)
plt.ylim(*y_lims)
plt.show()
print(sharpe)
return amt
# A model with a dth difference to fit and ARMA(p,q) model is called an ARIMA process
# of order (p,d,q). You can select p,d, and q with a wide range of methods,
# including AIC, BIC, and empirical autocorrelations (Petris, 2009).
for thresh in [0.001]:
run_simulation(returns, prices, 100000, (7,0,0), thresh, verbose=True)
solution:
curr_data = returns[:date]
curr_data_sliced = curr_data[-14:]
.
.
.
model=ARIMA(curr_data_sliced, ... )
Changing index for range of dates to use
e.g. [-50:] to incrementally train on 50 most recent data points

Getting a python error "cannot reindex from a duplicate index" and a warning too "setting with copy warning" in the below code

import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import plot_roc_curve
from sklearn.metrics import accuracy_score, classification_report
#kindly download the data FIRST which is required and then update the path accordingly for the variables you have to give the path
# variable1 = pd.read_csv(r"give the path to the data")
variable1 = pd.read_csv(r"C:/Users/hp/Desktop/NIFTY/TCS.csv")
variable2 = pd.read_csv(r"C:/Users/hp/Desktop/NIFTY/WIPRO.csv")
variable3 = pd.read_csv(r"C:/Users/hp/Desktop/NIFTY/HDFC.csv")
variable4 = pd.read_csv(r"C:/Users/hp/Desktop/NIFTY/ITC.csv")
frames = [variable1,variable2,variable3,variable4]
all = pd.concat(frames)
print(all)
price_data = all [['Symbol','Date','Close','High','Low','Open','Volume']]
First, for average investors, the return of an asset is a complete and scale–free summary of the investment opportunity. Second, return series are easier to handle than prices series as they have more attractive statistical properties
# sort the values by symbol and then date
price_data.sort_values(by = ['Symbol','Date'], inplace = True)
# calculate the change in price
price_data['change_in_price'] = price_data['Close'].diff()
# identify rows where the symbol changes
mask = price_data['Symbol'] != price_data['Symbol'].shift(1)
# For those rows, let's make the value null
price_data['change_in_price'] = np.where(mask == True, np.nan, price_data['change_in_price'])
# print the rows that have a null value, should only be 5
price_data[price_data.isna().any(axis = 1)]
days_out = 30
# Group by symbol, then apply the rolling function and grab the Min and Max.
price_data_smoothed = price_data.groupby(['Symbol'])
[['Close','Low','High','Open','Volume']].transform(lambda x: x.ewm(span = days_out).mean())
# Join the smoothed columns with the symbol and datetime column from the old data frame.
smoothed_df = pd.concat([price_data[['Symbol','Date']], price_data_smoothed], axis=1, sort=False)
smoothed_df
days_out = 30
# create a new column that will house the flag, and for each group calculate the diff compared to 30 days ago. Then use Numpy to define the sign.
smoothed_df['Signal_Flag'] = smoothed_df.groupby('Symbol')['Close'].transform(lambda x :
np.sign(x.diff(days_out)))
# print the first 50 rows
smoothed_df.head(50)
up to here it is working but when i execute the below code then it throws an error cannot reindex from a duplicate axis
n = 14
# First make a copy of the data frame twice
up_df, down_df = price_data[['Symbol','change_in_price']].copy(),
price_data[['Symbol','change_in_price']].copy()
# For up days, if the change is less than 0 set to 0.
up_df.loc['change_in_price'] = up_df.loc[(up_df['change_in_price'] < 0), 'change_in_price'] = 0
# For down days, if the change is greater than 0 set to 0.
down_df.loc['change_in_price'] = down_df.loc[(down_df['change_in_price'] > 0), 'change_in_price']
= 0
# We need change in price to be absolute.
down_df['change_in_price'] = down_df['change_in_price'].abs()
# Calculate the EWMA (Exponential Weighted Moving Average), meaning older values are given less weight compared to newer values.
ewma_up = up_df.groupby('Symbol')['change_in_price'].transform(lambda x: x.ewm(span = n).mean())
ewma_down = down_df.groupby('Symbol')['change_in_price'].transform(lambda x: x.ewm(span =
n).mean())
# Calculate the Relative Strength
relative_strength = ewma_up / ewma_down
# Calculate the Relative Strength Index
relative_strength_index = 100.0 - (100.0 / (1.0 + relative_strength))
# Add the info to the data frame.
price_data['down_days'] = down_df['change_in_price']
price_data['up_days'] = up_df['change_in_price']
price_data['RSI'] = relative_strength_index
# Display the head.
price_data.head(30)

Newbie stuck on Python Quandl ValueError

I am trying to calculate the Sharpe ratio with a set of stock symbols. The code works with the first 5 stock symbols, however, it stops working after 6 symbols.
I searched the document for dimension errors that could possibly be the ValueError message but I do not see any possibilities. I also searched Quandl and Google for the error I was getting but could not get a specific result.
If someone could please let me know what I am doing wrong that would be great. I am very new to coding.
# import needed modules
import quandl
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# get adjusted closing prices of 5 selected companies with Quandl
quandl.ApiConfig.api_key = 'oskr4yzppjZwxgJ7zNra'
selected = ['TGT', 'AAPL', 'MSFT', 'FCN', 'TSLA', 'SPY', 'XLV', 'BRK.B', 'WMT', 'JPM']
data = quandl.get_table('WIKI/PRICES', ticker = selected,
qopts = { 'columns': ['date', 'ticker', 'adj_close'] },
date = { 'gte': '2009-1-1', 'lte': '2019-12-31'}, paginate=True)
# reorganize data pulled by setting date as index width
# columns of tickers and their corresponding adjusted prices
clean = data.set_index('date')
table = clean.pivot(columns='ticker')
# calculate daily and annual returns of the stocks
returns_daily = table.pct_change()
returns_annual = returns_daily.mean() * 250
# get daily and covariance of returns of the stock
cov_daily = returns_daily.cov()
cov_annual = cov_daily * 250
# empty lists to store returns, volatility and weights of imiginary portfolios
port_returns = []
port_volatility = []
sharpe_ratio = []
stock_weights = []
# set the number of combinations for imaginary portfolios
num_assets = len(selected)
num_portfolios = 50000
# set random seed for reproduction's sake
np.random.seed(101)
# populate the empty lists with each portfolios returns,risk and weights
for single_portfolio in range(num_portfolios):
weights = np.random.random(num_assets)
weights /= np.sum(weights)
returns = np.dot(weights, returns_annual)
volatility = np.sqrt(np.dot(weights.T, np.dot(cov_annual, weights)))
sharpe = returns / volatility
sharpe_ratio.append(sharpe)
port_returns.append(returns)
port_volatility.append(volatility)
stock_weights.append(weights)
# a dictionary for Returns and Risk values of each portfolio
portfolio = {'Returns': port_returns,
'Volatility': port_volatility,
'Sharpe Ratio': sharpe_ratio}
# extend original dictionary to accomodate each ticker and weight in the portfolio
for counter,symbol in enumerate(selected):
portfolio[symbol+' weight'] = [weight[counter] for weight in stock_weights]
# make a nice dataframe of the extended dictionary
df = pd.DataFrame(portfolio)
# get better labels for desired arrangement of columns
column_order = ['Returns', 'Volatility', 'Sharpe Ratio'] + [stock+' weight' for stock in selected]
# reorder dataframe columns
df = df[column_order]
# find min Volatility & max sharpe values in the dataframe (df)
min_volatility = df['Volatility'].min()
max_sharpe = df['Sharpe Ratio'].max()
# use the min, max values to locate and create the two special portfolios
sharpe_portfolio = df.loc[df['Sharpe Ratio'] == max_sharpe]
min_variance_port = df.loc[df['Volatility'] == min_volatility]
# plot the efficient frontier with a scatter plot
plt.style.use('seaborn-dark')
df.plot.scatter(x='Volatility', y='Returns', c='Sharpe Ratio',
cmap='RdYlGn', edgecolors='black', figsize=(10, 8), grid=True)
plt.scatter(x=sharpe_portfolio['Volatility'], y=sharpe_portfolio['Returns'], c='red', marker='D', s=200)
plt.scatter(x=min_variance_port['Volatility'], y=min_variance_port['Returns'], c='blue', marker='D', s=200)
plt.xlabel('Volatility (Std. Deviation)')
plt.ylabel('Expected Returns')
plt.title('Efficient Frontier')
plt.show()
# print the details of the 2 special portfolios
print(min_variance_port.T)
print(sharpe_portfolio.T)
The error I am getting is this:
ValueError Traceback (most recent call last)
<ipython-input-8-3e66668bf017> in <module>
42 weights = np.random.random(num_assets)
43 weights /= np.sum(weights)
---> 44 returns = np.dot(weights, returns_annual)
45 volatility = np.sqrt(np.dot(weights.T, np.dot(cov_annual, weights)))
46 sharpe = returns / volatility
ValueError: shapes (10,) and (7,) not aligned: 10 (dim 0) != 7 (dim 0)

Low volatility portfolio construction

I want to test the low volatility factor for some market other than equities. Contradiccting finance 101, it has been Shown that low volatility stocks outperform high volatility stocks (see, for example, Baker, Malcolm, Brendan Bradley, and Jeffrey Wurgler (2011), “Benchmarks as Limits to Arbitrage: Understanding the Low-Volatility Anomaly”, Financial Analyst Journal, Vol. 67, No. 1, pp. 40–54.)
So what I want to do is construct the low vola factor by following the methodology of Jegadeesh and Titman (1993), namely raning stocks according to their previous j historical volatility and short top 30% (the most volatile) and Long the bottom 30% (the least volatile), and hold that Long-short Portfolio for k periods. Therefore, a 3-3 j-k Portfolio would mean, looking at the past 3 months of historical volatility (j), and hold that Portfolio for the following 3 months (k).
I have written some Code, and the j part Can be easily managed by simply increasing or decreasing the window of the rolling window vola calculation. The part I am struggling with is the k part, how this could be done. Unfortunately, I couldnt find many examples online.
In addition, I was wondering if my Code is correct or if I did any mistake, since it surprisingly did not work, regardless of the dataset I used. I am not sure whether this is the right place to ask, but if someone could take a look at it that would be great and might be helpful to others planning to implement a strategy like this as well.
Below is a simple working example with just 10 stocks. As I said, I want to implement it for some other assets, but this Code should work. You just have to use your own API key in line 16. Thanks a lot!
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import quandl
import pickle
import scipy.optimize as sco
from scipy.ndimage.interpolation import shift
import matplotlib.pyplot as plt
##################
# Low volatility #
##################
quandl.ApiConfig.api_key = 'Your key here'
stocks = ['MSFT','AAPL','AMZN','FB','BRK.B','JPM','GOOG','JNJ','V','PG','XOM']
data = quandl.get_table('WIKI/PRICES', ticker = stocks,
qopts = { 'columns': ['date', 'ticker', 'adj_close'] },
date = { 'gte': '2016-1-1', 'lte': '2019-11-3' }, paginate=True)
# with open("data.pkl", "wb") as pickle_file:
# pickle.dump(data, pickle_file)
# with open("data.pkl", "rb") as pickle_file:
# data = pickle.load(pickle_file)
data = data.pivot_table(index='date', columns='ticker', values='adj_close')
data = data.groupby(pd.Grouper(freq="M")).mean() # convert from daily to monthly prices
returns = (np.log(data) - np.log(data.shift(1))).dropna()
stds = returns.rolling(12).std()
stds = stds.values # convert to numpy array
list = []
for x in range(0, stds.shape[0]): # for each row in std matrix, create decile buckets (dec -> breakpoint to the next bucket)
for y in range(0,100,10):
dec = np.percentile(stds[x], y)
list.append(dec)
list = np.array(list) # convert list to numpy array
list = np.reshape(list, (stds.shape[0], -1)) # reshape the array such that it has the same format as returns (here: (26,10))
inds = []
for x in range(0, stds.shape[0]): # if the return is in the lower 30%, allocate a -1 to the asset. If it is in the upper 30%, allocate a 1. 0 otherwise.
ind = np.digitize(stds[x], list[x])
for x in range(0, ind.shape[0]):
if ind[x] <= 3:
ind[x] = 1
elif ind[x] >= 8:
ind[x] = -1
else:
ind[x] = 0
inds.append(ind)
inds = np.array(inds)
inds = inds.astype(np.float32)
for x in inds: # divide -1, 1 and 0 by the respective total number of counts of -1, 1 and 0, such that they sum up to -1 and 1 (beta neutral long-short)
ones = np.count_nonzero(x == 1) # count the number of 1
minus_ones = np.count_nonzero(x == -1) # count the number of -1
zeros = np.count_nonzero(x == 0) # count the number of 0
for y in range(0, inds.shape[1]):
if x[y] == 1:
x[y] = x[y] / ones
elif x[y] == -1:
x[y] = x[y] / minus_ones
else:
x[y] = x[y] / zeros
returns = returns.shift(periods=-1).values # shift returns one period back, and create numpy array
pf_returns = np.sum((inds*returns), axis=1) # multiply returns with weights, and sum up
pf_returns = pd.DataFrame(pf_returns)
print("---")
print(pf_returns.describe())
# Plot
pf_returns_indexed = 100 * (1 + pf_returns).cumprod()
pf_returns_indexed = pf_returns_indexed.plot(linewidth=1.2) # change line width
plt.show()

Categories

Resources