efficient frontier/stock analyze

efficient frontier/stock analyze - python

Consider the following task. Using a 10-year period I should calculate the portfolio weights in January and then use these weights in February to calculate the portfolio return and standard deviation. The program should then continue to calculate the weights In February and then use these weights in February to calculate the portfolio returns and standard deviation in marts. This should be done through all the 131 months in the data meaning I should only calculate the weights in the first month of the dataset.
import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf
p_ret = [] # Define an empty array for portfolio returns
p_vol = [] # Define an empty array for portfolio volatility
tickers = ['AAPL', 'AMZN', 'XOM']
start_date = datetime.date(2010, 1, 2)
end_date = datetime.date(2020, 12, 31)
daily_data = yf.download(tickers, start=start_date, end=end_date) # definere datasættet
daily_data = daily_data['Adj Close'].dropna()
Vector_of_ones = np.array([1,1,1])
frames = [v for _, v in daily_data.groupby(pd.Grouper(freq='M'))]
rf = 0.01 # risk free asset
weights = []
df = pd.DataFrame(columns=tickers)
for w in frames:
#corr_matrix = w.pct_change().apply(lambda x: np.log(1 + x)).corr()
mu = (w.resample('D').last().pct_change().sum())
individual_asset_return = np.subtract(np.transpose(mu), np.dot(Vector_of_ones,rf))
# individual_asset_return = daily_data.pct_change().mean() # finder gennemsnittet
df.loc[+1] = [individual_asset_return[tickers[0]], individual_asset_return[tickers[1]],
individual_asset_return[tickers[2]]]
df.index = df.index - 1
df = df.sort_index()
for d in range(len(df)):
cov_matrix = w.pct_change().apply(lambda x: np.log(1 + x)).cov()
liste = df.iloc[d].tolist()
a = np.dot(np.linalg.inv(cov_matrix), np.transpose(np.array(liste)))
omega_weights = a / (np.dot(np.transpose(Vector_of_ones), a)) # expression to find weights
weights.append(omega_weights)
for afkast in frames[1:]: #loop to find the portfolio returns and standard deviation
cov_matrix1 = afkast.pct_change().apply(lambda x: np.log(1 + x)).cov()
#corr_matrix1 = afkast.pct_change().apply(lambda x: np.log(1 + x)).corr()
df1 = df.iloc[1:, :]
for d1 in range(len(df)):
liste1 = df.iloc[d1].tolist()
portfolio_return = np.dot(np.transpose(omega_weights),
mu)
p_ret.append(portfolio_return)
volatility_portfolio = np.sqrt(np.dot(np.transpose(omega_weights), np.dot(cov_matrix1, omega_weights)))
p_vol.append(volatility_portfolio)
data = {'Returns': p_ret, 'Volatility': p_vol}
for counter, symbol in enumerate(afkast.columns.tolist()):
# print(counter, symbol)
data[symbol + ' weight'] = [w[counter] for w in weights]
portfolios = pd.DataFrame(data) # laver dataframe som sortere sådan at den med mindst volatility er øverst
portfolios['Date'] = pd.date_range(start=start_date, periods=len(portfolios), freq='M')
portfolios.plot(x='Date', y='Returns', kind='line')
# portfolios.plot(x = 'Date', y = 'Volatility', kind = 'line')
plt.show()
print(portfolios.head())
As you probably can see I’m not an advanced coder but I hope I could some help where my code is wrong if there is anything wrong.
I really appreciate any help you can provide.

Related

Index Error when performing simple calculation

Goal is to pull stock tickers from Wikipedia, use yfinance to grab the historical closing prices, and perform the Relative Strength Index (RSI) calculation for each of the tickers listed in the S&P500. Once this is accomplished, the tickers will be grouped into a 'buy', 'sell', or 'donothing' category based on their respective calculated RSI values. However, I am receiving an Index Error that refers to [ if RSI[245]>=30 and RSI[245-10]<30: ] claiming 245 is out of bounds for axis 0 with size 126, with size 126 being the number of days of closing prices collected.
import yfinance as yf
import pandas as pd
# Read and print the stock tickers that make up S&P500
tickers = pd.read_html(
'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]
print(tickers.head())
# Get the data for this tickers from yahoo finance
df = yf.download(tickers.Symbol.to_list(),'2021-9-24','2022-3-24', auto_adjust=True)['Close']
print(data.head())
#-------------------------------------------------------
df2 = df
#print(df2)
total_columns = df2.shape[1]
#print(total_columns)
print(df2)
#-------------------------------------------------------
buy = []
sell = []
donothing = []
for i in range(total_columns):
#company_name = header
df_RSI = df2.iloc[:,i]
#print(df_RSI)
df_RSI['diff']=df_RSI.diff(1)
#print(df_RSI['diff'])
# Calculate Avg. Gains/Losses
df_RSI['gain'] = df_RSI['diff'].clip(lower=0).round(2)
df_RSI['loss'] = df_RSI['diff'].clip(upper=0).abs().round(2)
#print(df_RSI['gain'])
window_length = 14
df_RSI['avg_gain'] = df_RSI['gain'].rolling(window=window_length,min_periods=window_length).mean()
#print(df_RSI['avg_gain'][:30]) #yay working!
df_RSI['avg_loss'] = df_RSI['loss'].rolling(window=window_length, min_periods=window_length).mean()
#print(df_RSI['avg_loss'][:30]) #yay working!
#print(df_RSI.name) #prints out the tickers wooooo
# Get WMS averages
# Average Gains
for k, row in enumerate(df_RSI['avg_gain'].iloc[window_length+1:]):
df_RSI['avg_gain'].iloc[k + window_length + 1] =\
(df_RSI['avg_gain'].iloc[k + window_length] *
(window_length - 1) +
df_RSI['gain'].iloc[k + window_length + 1])\
/ window_length
# Average Losses
for j, row in enumerate(df_RSI['avg_loss'].iloc[window_length+1:]):
df_RSI['avg_loss'].iloc[j + window_length + 1] =\
(df_RSI['avg_loss'].iloc[j + window_length] *
(window_length - 1) +
df_RSI['loss'].iloc[j + window_length + 1])\
/ window_length
# View initial results
#print(df_RSI[:,window_length-1:window_length+5])
# Calculate RS Values
df_RSI['rs'] = df_RSI['avg_gain'] / df_RSI['avg_loss']
#print(df_RSI['rs'][:30])
#print(df_RSI['rs'][30:]) #yay working!
# Calculate RSI
df_RSI['rsi'] = 100 - (100 / (1.0 + df_RSI['rs']))
# View Result RSI
RSI = (df_RSI['rsi'])
#print(RSI[200:])
#print(RSI[199])
if RSI[245]>=30 and RSI[245-10]<30:
buy.append(df_RSI.name)
elif RSI[240]<=70 and RSI[245-10]>70:
sell.append(df_RSI.name)
else:
donothing.append(df_RSI.name)
print(buy)
print(sell)
print(donothing)

I have made a few changes to your code so that it works (I received an error because you didn't set df_RSI to a dataframe initially, and I changed your if statements at the end - explained below code).
import yfinance as yf
import pandas as pd
# Read and print the stock tickers that make up S&P500
tickers = pd.read_html(
'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]
print(tickers.head())
# Get the data for this tickers from yahoo finance
df = yf.download(tickers.Symbol.to_list(),'2021-9-24','2022-3-24', auto_adjust=True)['Close']
print(df.head())
#-------------------------------------------------------
df2 = df
#print(df2)
total_columns = df2.shape[1]
#print(total_columns)
print(df2)
#-------------------------------------------------------
buy = []
sell = []
donothing = []
for i in range(total_columns):
#company_name = header
df_RSI = df2.iloc[:,i].to_frame()
#print(df_RSI)
df_RSI['diff']=df_RSI.diff(1)
#print(df_RSI['diff'])
# Calculate Avg. Gains/Losses
df_RSI['gain'] = df_RSI['diff'].clip(lower=0).round(2)
df_RSI['loss'] = df_RSI['diff'].clip(upper=0).abs().round(2)
#print(df_RSI['gain'])
window_length = 14
df_RSI['avg_gain'] = df_RSI['gain'].rolling(window=window_length,min_periods=window_length).mean()
#print(df_RSI['avg_gain'][:30]) #yay working!
df_RSI['avg_loss'] = df_RSI['loss'].rolling(window=window_length, min_periods=window_length).mean()
#print(df_RSI['avg_loss'][:30]) #yay working!
#print(df_RSI.name) #prints out the tickers wooooo
# Get WMS averages
# Average Gains
for k, row in enumerate(df_RSI['avg_gain'].iloc[window_length+1:]):
df_RSI['avg_gain'].iloc[k + window_length + 1] =\
(df_RSI['avg_gain'].iloc[k + window_length] *
(window_length - 1) +
df_RSI['gain'].iloc[k + window_length + 1])\
/ window_length
# Average Losses
for j, row in enumerate(df_RSI['avg_loss'].iloc[window_length+1:]):
df_RSI['avg_loss'].iloc[j + window_length + 1] =\
(df_RSI['avg_loss'].iloc[j + window_length] *
(window_length - 1) +
df_RSI['loss'].iloc[j + window_length + 1])\
/ window_length
# View initial results
#print(df_RSI[:,window_length-1:window_length+5])
# Calculate RS Values
df_RSI['rs'] = df_RSI['avg_gain'] / df_RSI['avg_loss']
#print(df_RSI['rs'][:30])
#print(df_RSI['rs'][30:]) #yay working!
# Calculate RSI
df_RSI['rsi'] = 100 - (100 / (1.0 + df_RSI['rs']))
# View Result RSI
RSI = (df_RSI['rsi'])
#print(RSI[200:])
#print(RSI[199])
if RSI[-1]>=30 and RSI[-1-10]<30:
buy.append(df2.columns[i])
elif RSI[-1]<=70 and RSI[-1-10]>70:
sell.append(df2.columns[i])
else:
donothing.append(df2.columns[i])
print(buy)
print(sell)
print(donothing)
The changes to your if statement:
df_RSI.name was changed to df2.columns[i], as I assume you wanted to append the ticker to buy, sell or donothing (and I changed df_RSI to a dataframe at the start of the for loop, so that you were appending columns in the rest of the loop, rather than additional rows to a series - and changing to_frame means that there is no longer a name for df_RSI).
The if statement itself was changed, to if RSI[-1]>=30.... This is because you are currently trying to find the index values: 245, 235 (in if), 240 and 235 (in elif), but the length of the series RSI is only 126 (so, because the index starts at 0, the very last row is RSI[125]). I changed this to be the last row RSI[-1], and the row 10 before that. Let me know if this is not what you were looking for, and I can change it (if you need more help with it).

heatmap of values grouped by time - seaborn

I'm plotting the counts of a variable grouped by time as a heatmap. However, when including both hour and minute, the counts are quite low so the resulting heatmap doesn't really provide any real insight. Is it possible to group the counts in a bigger block of time? I'm hoping to test some different periods (5, 10 mins).
I'm also hoping to plot time on the x-axis. Similar to the output attached.
import seaborn as sns
import pandas as pd
from datetime import datetime
from datetime import timedelta
start = datetime(1900,1,1,10,0,0)
end = datetime(1900,1,1,13,0,0)
seconds = (end - start).total_seconds()
step = timedelta(minutes = 1)
array = []
for i in range(0, int(seconds), int(step.total_seconds())):
array.append(start + timedelta(seconds=i))
array = [i.strftime('%Y-%m-%d %H:%M%:%S') for i in array]
df2 = pd.DataFrame(array).rename(columns = {0:'Time'})
df2['Count'] = np.random.uniform(0.0, 0.5, size = len(df2))
df2['Count'] = df2['Count'].round(1)
df2['Time'] = pd.to_datetime(df2['Time'])
df2['Hour'] = df2['Time'].dt.hour
df2['Min'] = df2['Time'].dt.minute
g = df2.groupby(['Hour','Min','Count'])
count_df = g['Count'].nunique().unstack()
count_df.fillna(0, inplace = True)
sns.heatmap(count_df)

To deal with such cases, I think it would be easy to use data downsampling. It is also easy to change the thresholds. The axis labels in the output graph will need to be modified, but we recommend this method.
import seaborn as sns
import pandas as pd
import numpy as np
from datetime import datetime
from datetime import timedelta
start = datetime(1900,1,1,10,0,0)
end = datetime(1900,1,1,13,0,0)
seconds = (end - start).total_seconds()
step = timedelta(minutes = 1)
array = []
for i in range(0, int(seconds), int(step.total_seconds())):
array.append(start + timedelta(seconds=i))
array = [i.strftime('%Y-%m-%d %H:%M:%S') for i in array]
df2 = pd.DataFrame(array).rename(columns = {0:'Time'})
df2['Count'] = np.random.uniform(0.0, 0.5, size = len(df2))
df2['Count'] = df2['Count'].round(1)
df2['Time'] = pd.to_datetime(df2['Time'])
df2['Hour'] = df2['Time'].dt.hour
df2['Min'] = df2['Time'].dt.minute
df2.set_index('Time', inplace=True)
count_df = df2.resample('10min')['Count'].value_counts().unstack()
count_df.fillna(0, inplace = True)
sns.heatmap(count_df.T)

The way you could achieve this is by creating a column with numbers that have repeating elements for the number of minutes.
For example:
minutes = 3
x = [0,1,2]
np.repeat(x, repeats=minutes, axis=0)
>>>> [0,0,0,1,1,1,2,2,2]
and then group your data using this column.
So your code would look like:
...
minutes = 5
x = [i for i in range(int(df2.shape[0]/5))]
df2['group'] = np.repeat(x, repeats=minutes, axis=0)
g = df2.groupby(['Min', 'Count'])
count_df = g['Count'].nunique().unstack()
count_df.fillna(0, inplace = True)

Fetching date of high and low prices for week based on daily high low prices

First of all I will share objective of running python code.
Getting Daily High and Low Prices for a stock from Yahoo.
Converting the daily high and lows to Weekly High/Lows, monthly High Lows, Yearly High Lows.
Getting exact dates of Weekly or Monthly High Lows from a daily dataframe
Finally after fetching Dates for Weekly(or Monthly)High & lows, I want to arrange the data of what occured first High or Low during the week. for eg. during week ending 12th December, 2020, I get High of the week is 100 and low of week is 97(after completing step 2) and also High date and low date from daily dataframe (from step 3), I want to arrange Prices in order of occurence. so if High happened on 9th December and Low happened on 12th December. The prices will be arranged as 100 in row 1 and then 97 in row 2 and this process repeats for entire data frame.
What I have been able to achieve.
I have completed step 1 and step 2. Struggling in step for 3 as of now.
Have accomplished Step 1 by
import pandas as pd
import yfinance as yf
Ticker = '^NSEI'
f = yf.download(Ticker,period="max")
f = f.drop(['Adj Close'], axis=1)
f = f.drop(['Open'], axis=1)
f = f.drop(['Close'], axis=1)
f = f.drop(['Volume'], axis=1)
f.reset_index(inplace=True)
f.insert(0,'Ticker',Ticker)
Step 2 by
fw = f.groupby(['Ticker', pd.Grouper(key='Date', freq='W')])\
.agg(High=pd.NamedAgg(column='High', aggfunc='max'),
Low=pd.NamedAgg(column='Low', aggfunc='min'))\
.reset_index()
fm = f.groupby(['Ticker', pd.Grouper(key='Date', freq='M')])\
.agg(High=pd.NamedAgg(column='High', aggfunc='max'),
Low=pd.NamedAgg(column='Low', aggfunc='min'))\
.reset_index()
fq = f.groupby(['Ticker', pd.Grouper(key='Date', freq='Q')])\
.agg(High=pd.NamedAgg(column='High', aggfunc='max'),
Low=pd.NamedAgg(column='Low', aggfunc='min'))\
.reset_index()
fy = f.groupby(['Ticker', pd.Grouper(key='Date', freq='Y')])\
.agg(High=pd.NamedAgg(column='High', aggfunc='max'),
Low=pd.NamedAgg(column='Low', aggfunc='min'))\
.reset_index()
Struggling with step 3. used pd.merge, pd.join, pd.concat but unable to combine Weekly dataframe with dataframe on Highs and lows. The no of weekly records increase by performing merge and drop duplcates also didn't work properly when specified keep last.
So if you all can help me in step 3 and 4 would be grateful. Thanks

Solved the query which i posted above. Hope this help others. Thanks
import pandas as pd
import yfinance as yf
import datetime as dt
import numpy as np
Ticker = '^NSEI'
df = yf.download(Ticker, period='max')
df= df.drop(['Open', 'Close', 'Adj Close', 'Volume'], axis = 1).reset_index()
# Daily 3238 columns for reference
#Adding columns for weekly, monthly,6 month,Yearly,
df['WkEnd'] = df.Date.dt.to_period('W').apply(lambda r: r.start_time) + dt.timedelta(days=6)
df['MEnd'] = (df.Date.dt.to_period('M').apply(lambda r: r.end_time)).dt.date
df['6Mend'] = np.where(df.Date.dt.month <= 6,(df.Date.dt.year).astype(str)+'-1H',(df['Date'].dt.year).astype(str)+'-2H')
df['YEnd'] = (df.Date.dt.to_period('Y').apply(lambda r: r.end_time)).dt.date
# key variable for melting
d = {'Date':['Hidate', 'Lodate'], 'Price':['High','Low']}
#creating weekly neoformat
dw = df.groupby(['WkEnd']).agg({'High' : 'max','Low' : 'min' }).reset_index()
dw['Hidate'] = dw[['WkEnd','High']].merge(df,how = 'left').Date
dw['Lodate'] = dw[['WkEnd','Low']].merge(df,how = 'left').Date
dw = pd.lreshape(dw,d)
dw = dw.sort_values(by = ['Date']).reset_index()
dw = dw.drop(['index'], axis = 1)
#creating Monthly neoformat
dm = df.groupby(['MEnd']).agg({'High' : 'max','Low' : 'min' }).reset_index()
dm['Hidate'] = dm[['MEnd','High']].merge(df,how = 'left').Date
dm['Lodate'] = dm[['MEnd','Low']].merge(df,how = 'left').Date
dm = pd.lreshape(dm,d)
dm = dm.sort_values(by = ['Date']).reset_index()
dm = dm.drop(['index'], axis = 1)
#creating 6mth neoformat
d6m = df.groupby(['6Mend']).agg({'High' : 'max','Low' : 'min' }).reset_index()
d6m['Hidate'] = d6m[['6Mend','High']].merge(df,how = 'left').Date
d6m['Lodate'] = d6m[['6Mend','Low']].merge(df,how = 'left').Date
d6m = pd.lreshape(d6m,d)
d6m = d6m.sort_values(by = ['Date']).reset_index()
d6m = d6m.drop(['index'], axis = 1)
#creating Yearly neoformat
dy = df.groupby(['YEnd']).agg({'High' : 'max','Low' : 'min' }).reset_index()
dy['Hidate'] = dy[['YEnd','High']].merge(df,how = 'left').Date
dy['Lodate'] = dy[['YEnd','Low']].merge(df,how = 'left').Date
dy = pd.lreshape(dy,d)
dy = dy.sort_values(by = ['Date']).reset_index()
dy = dy.drop(['index'], axis = 1)

Interpolation using ExponentialSmoothing from stats models

I am using ExponentialSmoothing from statsmodels to run Holt-Winters method on time series.
I get forecasted values but can not extract calculated values and compare them with observed values.
from pandas import Series
from scipy import stats
import statsmodels.api as sm
from statsmodels.tsa.api import ExponentialSmoothing
modelHW = ExponentialSmoothing(np.asarray(passtrain_df['n_passengers']), seasonal_periods=12, trend='add', seasonal='mul',).fit()
y_hat_avg['Holt_Winter'] = modelHW.forecast(prediction_size)
So here, prediction_size = number of forecasted datapoints (4 in my case)
passtrain_df is a dataframe with observations (140 datapoints) based on which Holt_Winter model is built (regression).
I can easily display 4 forecasted values.
How do I extract 140 calculated values?
Tried to use:
print(ExponentialSmoothing.predict(np.asarray(passtrain_df), start=0, end=139))
But I probably have a syntax error somewhere
Thank you!

Edit:
Replaced synthetic dataset with sample data from OP
Fixed function that builds new forecast period
Fixed x-axis date format as per OPs request
Answer:
If you're looking for calculated values within your estimation period, you should use modelHW.fittedvalues and not modelHW.forecast(). The latter will give you just what it says; forecasts. And it's pretty awesome. Let me show you how to do both things:
Plot 1 - Model within estimation period
Plot 2 - Forecasts
Code:
#imports
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from statsmodels.tsa.api import ExponentialSmoothing
import matplotlib.dates as mdates
#%%
#
# Load data
pass_df = pd.read_csv('https://raw.githubusercontent.com/dacatay/time-series-analysis/master/data/passengers.csv', sep=';')
pass_df = pass_df.set_index('month')
type(pass_df.index)
df = pass_df.copy()
# Model
modelHW = ExponentialSmoothing(np.asarray(df['n_passengers']), seasonal_periods=12, trend='add', seasonal='mul',).fit()
modelHW.summary()
# Model, fitted values
model_values = modelHW.fittedvalues
model_period = df.index
df_model = pd.concat([df['n_passengers'], pd.Series(model_values, index = model_period)], axis = 1)
df_model.columns = ['n_passengers', 'HWmodel']
df_model = df_model.set_index(pd.DatetimeIndex(df_model.index))
# Model, plot
fig, ax = plt.subplots()
myFmt = mdates.DateFormatter('%Y-%m')
df_model.plot(ax = ax, x_compat=True)
ax.xaxis.set_major_formatter(myFmt)
# Forecasts
prediction_size = 10
forecast_values = modelHW.forecast(prediction_size)
# Forecasts, build new period
forecast_start = df.index[-1]
forecast_start = pd.to_datetime(forecast_start, format='%Y-%m-%d')
forecast_period = pd.period_range(forecast_start, periods=prediction_size+1, freq='M')
forecast_period = forecast_period[1:]
# Forecasts, create dataframe
df_forecast = pd.Series(forecast_values, index = forecast_period.values).to_frame()
df_forecast.columns = ['HWforecast']
# merge input and forecast dataframes
df_all = pd.merge(df,df_forecast, how='outer', left_index=True, right_index=True)
#df_all = df_all.set_index(pd.DatetimeIndex(df_all.index.values))
ix = df_all.index
ixp = pd.PeriodIndex(ix, freq = 'M')
df_all = df_all.set_index(ixp)
# Forecast, plot
fig, ax = plt.subplots()
myFmt = mdates.DateFormatter('%Y-%m')
df_all.plot(ax = ax, x_compat=True)
ax.xaxis.set_major_formatter(myFmt)
Previous attempts:
# imports
import pandas as pd
import numpy as np
from statsmodels.tsa.api import ExponentialSmoothing
# Data that matches your setup, but with a random
# seed to make it reproducible
np.random.seed(42)
# Time
date = pd.to_datetime("1st of Jan, 2019")
dates = date+pd.to_timedelta(np.arange(140), 'D')
# Data
n_passengers = np.random.normal(loc=0.0, scale=5.0, size=140).cumsum()
n_passengers = n_passengers.astype(int) + 100
df = pd.DataFrame({'n_passengers':n_passengers},index=dates)
1. How to plot observed vs. estimated values within the estimation period:
The following snippet will extract all fitted values and plot it against your observed values.
Snippet 2:
# Model
modelHW = ExponentialSmoothing(np.asarray(df['n_passengers']), seasonal_periods=12, trend='add', seasonal='mul',).fit()
modelHW.summary()
# Model, fitted values
model_values = modelHW.fittedvalues
model_period = df.index
df_model = pd.concat([df['n_passengers'], pd.Series(model_values, index = model_period)], axis = 1)
df_model.columns = ['n_passengers', 'HWmodel']
df_model.plot()
Plot 1:
2. How to produce and plot model forecasts of a certain length:
The following snippet will produce 10 forecasts from your model, and plot it as an extended period compared to your observer values.
Snippet 3:
# Forecast
prediction_size = 10
forecast_values = modelHW.forecast(prediction_size)
forecast_period = df.index[-1] + pd.to_timedelta(np.arange(prediction_size+1), 'D')
forecast_period = forecast_period[1:]
df_forecast = pd.concat([df['n_passengers'], pd.Series(forecast_values, index = forecast_period)], axis = 1)
df_forecast.columns = ['n_passengers', 'HWforecast']
df_forecast.plot()
Plot 2:
And here's the whole thing for an easy copy&paste:
# imports
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from statsmodels.tsa.api import ExponentialSmoothing
# Data that matches your setup, but with a random
# seed to make it reproducible
np.random.seed(42)
# Time
date = pd.to_datetime("1st of Jan, 2019")
dates = date+pd.to_timedelta(np.arange(140), 'D')
# Data
n_passengers = np.random.normal(loc=0.0, scale=5.0, size=140).cumsum()
n_passengers = n_passengers.astype(int) + 100
df = pd.DataFrame({'n_passengers':n_passengers},index=dates)
# Model
modelHW = ExponentialSmoothing(np.asarray(df['n_passengers']), seasonal_periods=12, trend='add', seasonal='mul',).fit()
modelHW.summary()
# Model, fitted values
model_values = modelHW.fittedvalues
model_period = df.index
df_model = pd.concat([df['n_passengers'], pd.Series(model_values, index = model_period)], axis = 1)
df_model.columns = ['n_passengers', 'HWmodel']
df_model.plot()
# Forecast
prediction_size = 10
forecast_values = modelHW.forecast(prediction_size)
forecast_period = df.index[-1] + pd.to_timedelta(np.arange(prediction_size+1), 'D')
forecast_period = forecast_period[1:]
df_forecast = pd.concat([df['n_passengers'], pd.Series(forecast_values, index = forecast_period)], axis = 1)
df_forecast.columns = ['n_passengers', 'HWforecast']
df_forecast.plot()
#vestland - here is the code and error:
y_train = passtrain_df.copy(deep=True)
model_HW = ExponentialSmoothing(np.asarray(y_train['n_passengers']), seasonal_periods=12, trend='add', seasonal='mul',).fit()
model_values = model_HW.fittedvalues
model_period = y_train.index
hw_model = pd.concat([y_train['n_passengers'], pd.Series(model_values, index = model_period)], axis = 1)
hw_model.columns = ['Observed Passengers', 'Holt-Winters']
plt.figure(figsize=(18,12))
hw_model.plot()
forecast_values = model_HW.forecast(prediction_size)
forecast_period = y_train.index[-1] + pd.to_timedelta(np.arange(prediction_size+1),'D')
forecast_period = forecast_period[1:]
hw_forecast = pd.concat([y_train['n_passengers'], pd.Series(forecast_values, index = forecast_period)], axis = 1)
hw_forecast.columns = ['Observed Passengers', 'HW-Forecast']
hw_forecast.plot()
Error:
NullFrequencyError Traceback (most recent call last)
<ipython-input-25-5f37a0dd0cfa> in <module>()
17
18 forecast_values = model_HW.forecast(prediction_size)
---> 19 forecast_period = y_train.index[-1] + pd.to_timedelta(np.arange(prediction_size+1),'D')
20 forecast_period = forecast_period[1:]
21
/anaconda3/lib/python3.6/site- packages/pandas/core/indexes/datetimelike.py in __radd__(self, other)
879 def __radd__(self, other):
880 # alias for __add__
--> 881 return self.__add__(other)
882 cls.__radd__ = __radd__
883
/anaconda3/lib/python3.6/site- packages/pandas/core/indexes/datetimelike.py in __add__(self, other)
842 # This check must come after the check for np.timedelta64
843 # as is_integer returns True for these
--> 844 result = self.shift(other)
845
846 # array-like others
/anaconda3/lib/python3.6/site-packages/pandas/core/indexes/datetimelike.py in shift(self, n, freq)
1049
1050 if self.freq is None:
-> 1051 raise NullFrequencyError("Cannot shift with no freq")
1052
1053 start = self[0] + n * self.freq
NullFrequencyError: Cannot shift with no freq

Use a pandas DataFrame created inside a function outside of the function

I am a Python beginner and wrote a function for a simple moving average strategy. I created a portfolio DataFrame inside the function and now I want to use this DataFrame outside of the function for plotting some graphs. My solution is: return portfolio - but this does not work. Can anybody help me?
This is my code:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Import a data source - FSE-Data with Index 'Date'
all_close_prices = pd.read_csv('FSE_daily_close.csv')
all_close_prices = all_close_prices.set_index('Date')
# Fill NaN Values with the last available stock price - except for Zalando
all_close_prices = all_close_prices.fillna(method='ffill')
# Import ticker symbols
ticker_list = list(all_close_prices)
# Zalando 'FSE/ZO1_X' (position row 99) - doesn't begin in 2004
# Drop Zalando
all_close_prices.drop('FSE/ZO1_X', axis=1)
# Also from the ticker list
ticker_list.remove('FSE/ZO1_X')
# Create an empty signal dataframe with datetime index equivalent to the stocks
signals = pd.DataFrame(index=all_close_prices.index)
def ma_strategy(ticker, long_window, short_window):
# Calculate the moving avergaes
moving_avg_long = all_close_prices.rolling(window=long_window, min_periods=1).mean()
moving_avg_short = all_close_prices.rolling(window=short_window, min_periods=1).mean()
moving_avg_short = moving_avg_short
moving_avg_long = moving_avg_long
# Add the two MAs for the stocks in the ticker_list to the signals dataframe
for i in ticker_list:
signals['moving_avg_short_' + i] = moving_avg_short[i]
signals['moving_avg_long_' + i] = moving_avg_long[i]
# Set up the signals
for i in ticker_list:
signals['signal_' + i] = np.where(signals['moving_avg_short_' + i] > signals['moving_avg_long_' + i], 1, 0)
signals['positions_' + i] = signals['signal_' + i].diff(periods=1)
#Backtest
initial_capital = float(100000)
# Create a DataFrame `positions` with index of signals
positions = pd.DataFrame(index=all_close_prices)
# Create a new column in the positions DataFrame
# On the days that the signal is 1 (short moving average crosses the long moving average, you’ll buy a 100 shares.
# The days on which the signal is 0, the final result will be 0 as a result of the operation 100*signals['signal']
positions = 100 * signals[['signal_' + ticker]]
# Store the portfolio value owned with the stock
# DataFrame.multiply(other, axis='columns', fill_value=None) - Multiplication of dataframe and other, element-wise
# Store the difference in shares owned - same like position column in signals
pos_diff = positions.diff()
# Add `holdings` to portfolio
portfolio = pd.DataFrame(index=all_close_prices.index)
portfolio['holdings'] = (positions.multiply(all_close_prices[ticker], axis=0)).sum(axis=1)
# Add `cash` to portfolio
portfolio['cash'] = initial_capital - (pos_diff.multiply(all_close_prices[ticker], axis=0)).sum(
axis=1).cumsum()
# Add `total` to portfolio
portfolio['total'] = portfolio['cash'] + portfolio['holdings']
# Add `returns` to portfolio
portfolio['return'] = portfolio['total'].pct_change()
portfolio['return_cum'] = portfolio['total'].pct_change().cumsum()
return portfolio
ma_strategy('FSE/VOW3_X',20,5)
# Visualize the total value of the portfolio
portfolio_value = plt.figure(figsize=(12, 8))
ax1 = portfolio_value.add_subplot(1, 1, 1, ylabel='Portfolio value in $')
# Plot the equity curve in dollars
portfolio['total'].plot(ax=ax1, lw=2.)

You need to assign your function return value to a variable. The line which says
ma_strategy('FSE/VOW3_X',20,5)
probably needs to change to
portfolio = ma_strategy('FSE/VOW3_X',20,5)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

efficient frontier/stock analyze - python

Related

Index Error when performing simple calculation

heatmap of values grouped by time - seaborn

Fetching date of high and low prices for week based on daily high low prices

Interpolation using ExponentialSmoothing from stats models

Use a pandas DataFrame created inside a function outside of the function

Categories

Resources