GARCH and future volatility monte carlo simulation - python

Im trying to run a rolling volatility (GARCH) using this python code:
import pandas as pd
import numpy as np
from matplotlib import style
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
class monte_carlo:
def __init__(self,S,mu,sigma,c):
self.S=S #The start value of the portfolio
self.mu=mu #The expected return calculated by CAPM
self.sigma=sigma #Volatility
self.c=c #Confidence level
def brownian_motion(self, num_sim, pred_days):#Main function to rund MCS
last_price = self.S #Collect the start value from init function
# There is a need of a dataframe to create the plots
simulation_df = pd.DataFrame()
#Loop to simualte the number of needed simulations
for x in range(num_sim): #Loop to ensure start value as value day 0
#Ensure the loop to start with start value
count = 0
#Create empty list
prices = []
#Append the start value to the list at position 0
prices.append(last_price)
for i in range(pred_days): #The actual MCS
if count == 251: #Restricting the simulation to one year
break
shock = ((self.mu-0.5*self.sigma**2)+self.sigma*np.random.normal())
# The brownian motion
price = prices[count] * np.exp(shock)
#Calculate price after shock
prices.append(price)
#Append price to the list
count += 1 #next count
simulation_df[x] = prices
#When loop is done, add the prices to the data frame
self.simulation_df = simulation_df
self.predicted_days = pred_days
def plot(self):
pred_days = self.predicted_days
simulation_df = self.simulation_df
last_price = self.S
#recall values
plot_line = plt.figure() #call lot function
style.use('bmh') #set style
title = "Monte Carlo Simulation: " + str(pred_days) + " Days"
plt.plot(simulation_df) #plot all the price paths
plot_line.suptitle(title,fontsize=18, fontweight='bold')
plt.xlabel('Day')
plt.ylabel('Price ($USD)')
plt.grid(True,color='grey')
plt.axhline(y=last_price, color='r', linestyle='-')
#ˆCreate right font
plt.show()
def VaR(self): #funtcion to callculte VaR
simulation_df = self.simulation_df
#Recall price an list of price paths
price_array = simulation_df.iloc[-1, :] #Rename
price_array = sorted(price_array, key=int) #Sort final prices
percentile = np.percentile(price_array,(1-self.c)*100)
#Use percentile function to find the percentile at
#The given confidence interval
Value_at_Risk = 1-(percentile/self.S)
#Calculate the acutall value at risk
print("Value at Risk: ", Value_at_Risk)
'''#fit = stats.norm.pdf(price_array, np.mean(price_array), np.std(price_array))
#plt.plot(price_array,fit,'-o')
plt.hist(price_array,normed=True)
plt.xlabel('Price')
plt.ylabel('Probability')
plt.title(r'Histogram of Speculated Stock Prices', fontsize=18, fontweight='bold')
plt.legend(loc="upper right")
plt.show()'''
if __name__== "__main__":
S = 100000000
c = 0.99
mu = 0.00024
sigma = 0.02
sim = monte_carlo(S,mu,sigma,c)
sim.brownian_motion(1000, 10)
#sim.plot()
sim.VaR()
#sim.key_stats()
#symbols = ['AAPL', 'KO', 'HD', 'PM']
#weights = [1000,1000,2000,3000]
#sim.get_portfolio(symbols, weights)
#sim.get_asset('AAPL')
Can someone help me with this implementation?

Related

How to specifically assign X and Y Axis on matplotlib in Python?

I'm trying to create a Monte Carlo simulation to simulate the price of a stock.
Every day, the price of the stock changes. The change is determined by a random variable. The stock prices over the number of days (numDays) is captured in a list, stock_price_list.
I've created an array, monte_list, to store a bunch of different stock_price_lists. I want to graph all those stock_price_lists on the same graph. So I've created the variable numSimulations, which is supposed to create numSimulations number of rows in monte_list.
As far as I can tell, monte_list works. It's an array with one column and numSimulations numbers of rows. These rows are populated with stock_price_lists, which are themselves lists of stock price data.
stock_price_list works; I've graphed it multiple times.
I think that monte_list works too; at least, when I print the array, it returns information that looks correct.
My problem is that the axes are graphing the wrong variables.
The X axis is graphing numSimulations.
The Y axis is graphing stock price.
I WANT the X axis to graph numDays, NOT numSimulations, but I can't figure out how to change that.
I'd really love any advice. (Note that I hope to make numDays and numSimulations much bigger, but wanted to use smaller numbers to get the hang of things.)
daily_mean = .06/250
daily_stdev = .2/(250**.5)
start_stock_price = 100
numDays = 7
numSimulations = 5
monte_arr = pd.DataFrame({'FirstCol': numSimulations}, index=[0])
monte_list = [None] * numSimulations #this is a test: I'm trying to createa list of numPrices Nones,\
#then fill them all with stock_price_lists in the for loop
for j in range(0, numSimulations):
stock_price_list = [start_stock_price]
daily_stock_price = start_stock_price
#add a col of stock price data
for i in range (0,numDays):
daily_ret = np.random.normal(daily_mean, daily_stdev, 1) # generates a random return
daily_stock_price = daily_stock_price * (1+daily_ret)
stock_price_list.append(float(daily_stock_price))
np.array(stock_price_list)
#arr = np.array(stock_price_list)
#arr[j] = stock_price_list
monte_list[j] = stock_price_list # somehow stock_price_list is over-writing cols
#I think monte_list generates numSimulations of stock_price_list entries.
#Problem: the axes are wrong. X axis should have numDays on it. Y should have prices
# y axis is currently graphing highest stock price, but I want X to be graphing highest stock price
# I want X axis to be numDays
plt.figure(figsize = (14,5))
plt.plot(monte_list)
plt.title("monte list")
plt.show()
Blockquote
So, it actually turns out that I figured out how to code this with some help from a friend.
I created a for loop to plot various elements of monte_list.
import numpy as np
import pandas as pd
from pandas_datareader import data as wb
from scipy.stats import norm
import matplotlib.pyplot as plt
import statsmodels as sm
import math
daily_mean = .06/250
daily_stdev = .2/(250**.5)
start_stock_price = 100
#stock_price_list = [start_stock_price]
#daily_stock_price = start_stock_price
numDays = 250
numSimulations = 100
monte_arr = pd.DataFrame({'FirstCol': numSimulations}, index=[0])
monte_list = [None] * numSimulations #this is a test: I'm trying to createa list of numPrices Nones,\
#then fill them all with stock_price_lists in the for loop
for j in range(0, numSimulations):
stock_price_list = [start_stock_price]
daily_stock_price = start_stock_price
#add a col of stock price data
for i in range (0,numDays):
daily_ret = np.random.normal(daily_mean, daily_stdev, 1) # generates a random return
daily_stock_price = daily_stock_price * (1+daily_ret)
stock_price_list.append(float(daily_stock_price))
np.array(stock_price_list)
monte_list[j] = stock_price_list
plt.figure(figsize = (14,5))
plt.title("Monte List")
plt.xlabel("Number of Days")
plt.ylabel("Stock price")
plt.legend()
for i in range(0, numDays):
plt.plot(monte_list[i])
plt.show()

changing interval at which simulation fits ARIMA (help w/ for-loop)

I'm currently working on a trading strategy simulator that fits an ARIMA to stock return data, makes a next day prediction, then buys/sells based on that prediction. It continues to accumulate shares until a sell signal is generated, at which point the program will liquidate the accumulated position and begin again.
Right now, I specify an interval of dates, then the loop will start by fitting an ARIMA to the first 14 days of return data, making a prediction for day 15, acting on the prediction, then it will begin again with the first 15 days, fitting a new ARIMA. It will continue this until it gets to the end of the range of dates specified, with each new iteration adding the previous day's sample.
So, basically n increases by 1 for every iteration of the loop. I don't want this. I want it to repeatedly fit to an interval of a fixed length. For example, say I'm testing a strategy over 500 trading days. For the first iteration I want the loop to take the 50 days prior to day 1 of the specified interval and fit an ARIMA, and then trade in the same manner as before, but for the next iteration of the loop, I don't want it to fit to 51 days, I want to fit the 50 days prior to the current date every time.
Here's the start of the simulation function where the for-loop is specified. I can't seem to figure out how to change the loop to accomplish my goal. Any help would be greatly appreciated!!
def run_simulation(returns, prices, amt, order, thresh, verbose=True, plot=True):
if type(order) == float:
thresh = None
curr_holding = False
sum_list = []
events_list = []
sharpe_list = []
init_amt = amt
#go through dates
for date, r in tqdm (returns.iloc[14:].items(), total=len(returns.iloc[14:])):
#get data til just before current date
curr_data = returns[:date]
# check if using ARIMA from order
if type(order) == tuple:
#fit model
model = ARIMA(curr_data, order=order).fit()
print(model.summary())
#get forecast
pred = model.forecast()
print(pred)
float_pred = float(pred)
Here's the full script for context:
import yfinance as yf
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima.model import ARIMA
import numpy as np
import seaborn as sns
from tqdm import tqdm
import pandas as pd
from statsmodels.tools.sm_exceptions import ValueWarning, HessianInversionWarning, ConvergenceWarning
import warnings
#in practice do not supress these warnings, they carry important information about the status of your model
warnings.filterwarnings('ignore', category=ValueWarning)
warnings.filterwarnings('ignore', category=HessianInversionWarning)
warnings.filterwarnings('ignore', category=ConvergenceWarning)
tickerSymbol = 'SPY'
data = yf.Ticker(tickerSymbol)
prices = data.history(start='2021-01-01', end='2022-01-03').Close
returns = prices.pct_change().dropna()
def std_dev(data):
# Get number of observations
n = len(data)
# Calculate mean
mean = sum(data) / n
# Calculate deviations from the mean
deviations = sum([(x - mean)**2 for x in data])
# Calculate Variance & Standard Deviation
variance = deviations / (n - 1)
s = variance**(1/2)
return s
# Sharpe Ratio From Scratch
def sharpe_ratio(data, risk_free_rate=0):
# Calculate Average Daily Return
mean_daily_return = sum(data) / len(data)
print(f"mean daily return = {mean_daily_return}")
# Calculate Standard Deviation
s = std_dev(data)
# Calculate Daily Sharpe Ratio
daily_sharpe_ratio = (mean_daily_return - risk_free_rate) / s
# Annualize Daily Sharpe Ratio
sharpe_ratio = 252**(1/2) * daily_sharpe_ratio
return sharpe_ratio
def run_simulation(returns, prices, amt, order, thresh, verbose=True, plot=True):
if type(order) == float:
thresh = None
curr_holding = False
sum_list = []
events_list = []
sharpe_list = []
init_amt = amt
#go through dates
for date, r in tqdm (returns.iloc[14:].items(), total=len(returns.iloc[14:])):
#get data til just before current date
curr_data = returns[:date]
# check if using ARIMA from order
if type(order) == tuple:
#fit model
model = ARIMA(curr_data, order=order).fit()
print(model.summary())
#get forecast
pred = model.forecast()
print(pred)
float_pred = float(pred)
#if you predict a high enough return and not holding, buy stock
# order for random strat and tuple for ARIMA
if float_pred > thresh \
or (order == 'last' and curr_data[-1] > 0):
buy_price = prices.loc[date]
events_list.append(('b', date))
int_buy_price = int(buy_price)
sum_list.append(int_buy_price)
curr_holding = True
if verbose:
print('Bought at $%s'%buy_price)
print('Predicted Return: %s'%round(pred,4))
print(f"Current holdings = {sum(sum_list)}")
print('=======================================')
continue
#if you predict below the threshold return, sell the stock
if (curr_holding) and \
((type(order) == float and np.random.random() < order)
or (type(order) == tuple and float_pred < thresh)
or (order == 'last' and curr_data[-1] > 0)):
sell_price = prices.loc[date]
total_return = len(sum_list) * sell_price
ret = (total_return-sum(sum_list))/sum(sum_list)
amt *= (1+ret)
events_list.append(('s', date, ret))
sharpe_list.append(ret)
sum_list.clear()
curr_holding = False
if verbose:
print('Sold at $%s'%sell_price)
print('Predicted Return: %s'%round(pred,4))
print('Actual Return: %s'%(round(ret, 4)))
print('=======================================')
if verbose:
sharpe = sharpe_ratio(sharpe_list, risk_free_rate=0.004)
print('Total Amount: $%s'%round(amt,2))
print(f"Sharpe Ratio: {sharpe}")
#graph
if plot:
plt.figure(figsize=(10,4))
plt.plot(prices[14:])
y_lims = (int(prices.min()*.95), int(prices.max()*1.05))
shaded_y_lims = int(prices.min()*.5), int(prices.max()*1.5)
for idx, event in enumerate(events_list):
plt.axvline(event[1], color='k', linestyle='--', alpha=0.4)
if event[0] == 's':
color = 'green' if event[2] > 0 else 'red'
plt.fill_betweenx(range(*shaded_y_lims),
event[1], events_list[idx-1][1], color=color, alpha=0.1)
tot_return = round(100*(amt / init_amt - 1), 2)
sharpe = sharpe_ratio(sharpe_list, risk_free_rate=0)
tot_return = str(tot_return) + '%'
plt.title("%s Price Data\nThresh=%s\nTotal Amt: $%s\nTotal Return: %s"%(tickerSymbol, thresh, round(amt,2), tot_return), fontsize=20)
plt.ylim(*y_lims)
plt.show()
print(sharpe)
return amt
# A model with a dth difference to fit and ARMA(p,q) model is called an ARIMA process
# of order (p,d,q). You can select p,d, and q with a wide range of methods,
# including AIC, BIC, and empirical autocorrelations (Petris, 2009).
for thresh in [0.001]:
run_simulation(returns, prices, 100000, (7,0,0), thresh, verbose=True)
solution:
curr_data = returns[:date]
curr_data_sliced = curr_data[-14:]
.
.
.
model=ARIMA(curr_data_sliced, ... )
Changing index for range of dates to use
e.g. [-50:] to incrementally train on 50 most recent data points

How to save data to csv Cantera and error <cantera.composite.SolutionArray object at 0x7f4badca0fd0>

I have this script on Cantera. I want to save data into csv for both the two parts of the script: the first that evaluate Tfinal vs autoignition delay time and the second that evalutate the NTC behavior. In the first part the example suggests to uncomment # timeHistory.to_csv("time_history.csv") but it doesn't work. I think I need to create a dataframe because it's not well defined (I suppose). Not only this, but I saw also this error: <cantera.composite.SolutionArray object at 0x7f4badca0fd0>.
How can I solve this, and how can I create the two csv for this script?
Thank you very much
import pandas as pd
import numpy as np
import time
import cantera as ct
print('Runnning Cantera version: ' + ct.__version__)
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 18
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
plt.rcParams['figure.autolayout'] = True
plt.style.use('ggplot')
plt.style.use('seaborn-pastel')
gas = ct.Solution('Seiser.cti')
# Define the reactor temperature and pressure
reactor_temperature = 1000 # Kelvin
reactor_pressure = 101325 # Pascals
gas.TP = reactor_temperature, reactor_pressure
# Define the fuel, oxidizer and set the stoichiometry
gas.set_equivalence_ratio(phi=1.0, fuel="nc7h16", oxidizer={"o2": 1.0, "n2": 3.76})
# Create a batch reactor object and add it to a reactor network
# In this example, the batch reactor will be the only reactor
# in the network
r = ct.IdealGasReactor(contents=gas, name="Batch Reactor")
reactor_network = ct.ReactorNet([r])
# use the above list to create a DataFrame
time_history = ct.SolutionArray(gas, extra="t")
def ignition_delay(states, species):
"""
This function computes the ignition delay from the occurence of the
peak in species' concentration.
"""
i_ign = states(species).Y.argmax()
return states.t[i_ign]
reference_species = "oh"
# Tic
t0 = time.time()
# This is a starting estimate. If you do not get an ignition within this time, increase it
estimated_ignition_delay_time = 0.1
t = 0
counter = 1
while t < estimated_ignition_delay_time:
t = reactor_network.step()
if not counter % 10:
# We will save only every 10th value. Otherwise, this takes too long
# Note that the species concentrations are mass fractions
time_history.append(r.thermo.state, t=t)
counter += 1
# We will use the 'oh' species to compute the ignition delay
tau = ignition_delay(time_history, reference_species)
# Toc
t1 = time.time()
print(f"Computed Ignition Delay: {tau:.3e} seconds. Took {t1-t0:3.2f}s to compute")
# If you want to save all the data - molefractions, temperature, pressure, etc
# >>>>>>>>>>>>>>>>>>>>>>>>uncomment the next line
time_history.to_csv("time_history_TEST.csv")
plt.figure()
plt.plot(time_history.t, time_history(reference_species).Y, "-o")
plt.xlabel("Time (s)")
plt.ylabel("$Y_{OH}$")
plt.xlim([0,0.05])
plt.arrow(0, 0.008, tau, 0, width=0.0001, head_width=0.0005,
head_length=0.001, length_includes_head=True, color="r", shape="full")
plt.annotate(r"$Ignition Delay: \tau_{ign}$", xy=(0,0), xytext=(0.01, 0.0082), fontsize=16);
# Make a list of all the temperatures we would like to run simulations at
T = np.hstack((np.arange(1800, 900, -100), np.arange(975, 475, -25)))
estimated_ignition_delay_times = np.ones_like(T, dtype=float)
# Make time adjustments for the highest and lowest temperatures. This we do empirically
estimated_ignition_delay_times[:6] = 6 * [0.1]
estimated_ignition_delay_times[-4:-2] = 10
estimated_ignition_delay_times[-2:] = 100
# Now create a SolutionArray out of these
ignition_delays = ct.SolutionArray(gas, shape=T.shape, extra={"tau": estimated_ignition_delay_times})
ignition_delays.set_equivalence_ratio(1.0, fuel="nc7h16", oxidizer={"o2": 1.0, "n2": 3.76})
ignition_delays.TP = T, reactor_pressure
for i, state in enumerate(ignition_delays):
# Setup the gas and reactor
gas.TPX = state.TPX
r = ct.IdealGasReactor(contents=gas, name="Batch Reactor")
reactor_network = ct.ReactorNet([r])
reference_species_history = []
time_history = []
t0 = time.time()
t = 0
while t < estimated_ignition_delay_times[i]:
t = reactor_network.step()
time_history.append(t)
reference_species_history.append(gas[reference_species].X[0])
i_ign = np.array(reference_species_history).argmax()
tau = time_history[i_ign]
t1 = time.time()
print('Computed Ignition Delay: {:.3e} seconds for T={}K. Took {:3.2f}s to compute'.format(tau, state.T, t1-t0))
ignition_delays.tau[i] = tau
fig = plt.figure()
ax = fig.add_subplot(111)
ax.semilogy(1000/ignition_delays.T, ignition_delays.tau, 'o-')
ax.set_ylabel('Ignition Delay (s)')
ax.set_xlabel(r'$\frac{1000}{T (K)}$', fontsize=18)
# Add a second axis on top to plot the temperature for better readability
ax2 = ax.twiny()
ticks = ax.get_xticks()
ax2.set_xticks(ticks)
ax2.set_xticklabels((1000/ticks).round(1))
ax2.set_xlim(ax.get_xlim())
ax2.set_xlabel(r'Temperature: $T(K)$');
I modified the first part of the script. I deleted time_history as function of ct.SolutionArray(gas, extra="t") because It created problems to create a functional dataframe to save data. Now, I implemented pandas to save into csv but, It creates the csv file with columns and declaration of variables but it doesn't fill the csv. Moreover, I see the error:
Traceback (most recent call last):
File "test.py", line 77, in <module>
tau = ignition_delay(tHyBatch_base, reference_species)
File "test.py", line 50, in ignition_delay
i_ign = states(species).Y.argmax()
TypeError: 'DataFrame' object is not callable
import pandas as pd
import numpy as np
import time
import csv
import cantera as ct
print('Running Cantera version: ' + ct.__version__)
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 18
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
plt.rcParams['figure.autolayout'] = True
plt.style.use('ggplot')
plt.style.use('seaborn-pastel')
gas = ct.Solution('Seiser.cti')
# Define the reactor temperature and pressure
reactor_temperature = 1000 # Kelvin
reactor_pressure = 101325 # Pascals
gas.TP = reactor_temperature, reactor_pressure
# Define the fuel, oxidizer and set the stoichiometry
gas.set_equivalence_ratio(phi=1.0, fuel="nc7h16", oxidizer={"o2": 1.0, "n2": 3.76})
# Create a batch reactor object and add it to a reactor network
# In this example, the batch reactor will be the only reactor
# in the network
r = ct.IdealGasReactor(contents=gas, name="Batch Reactor")
reactor_network = ct.ReactorNet([r])
# Now compile a list of all variables for which we will store data
columnNames = [r.component_name(item) for item in range(r.n_vars)]
columnNames = ['pressure'] + columnNames
tHyBatch_base=pd.DataFrame(columns=columnNames)
tHyBatch_base.index.name = 'time'
def ignition_delay(states, species):
"""
This function computes the ignition delay from the occurence of the
peak in species' concentration.
"""
i_ign = states(species).Y.argmax()
return states.t[i_ign]
reference_species = "oh"
# Tic
t0 = time.time()
# This is a starting estimate. If you do not get an ignition within this time, increase it
estimated_ignition_delay_time = 0.1
t = 0
counter = 1
while t < estimated_ignition_delay_time:
t = reactor_network.step()
if not counter % 10:
# We will save only every 10th value. Otherwise, this takes too long
# Note that the species concentrations are mass fractions
state = np.hstack([r.thermo.state])
# Update the dataframe
tHyBatch_base.append(pd.Series(state, index=tHyBatch_base.columns[:len(state)]), ignore_index=True)
counter += 1
tHyBatch_base.to_csv("TESTCSV.csv")
# We will use the 'oh' species to compute the ignition delay
tau = ignition_delay(tHyBatch_base, reference_species)
# Toc
t1 = time.time()
print(f"Computed Ignition Delay: {tau:.3e} seconds. Took {t1-t0:3.2f}s to compute")
Someone can help? Thanks to all who want to give me an answer for an intrinsic problem of using pandas.
You should only change the command
timeHistory.to_csv("time_history.csv")
as below :
time_history.write_csv('time_history.csv')

Matplotlib Bar Chart - x-axis Categorical Variables changes whenever code re-runs (Numpy)

i'm trying to plot a simple bar chart of the average 'resale_price' (y-axis) for each flat type against 'town' (x-axis) for data from 2015-2019. However for some reason, my x-axis keeps changing every time I re-run my code. Not sure where I've gone wrong.
dataset: https://data.gov.sg/dataset/resale-flat-prices
here's the code i've used below
labels1 = list(set(data_3room['town']))
town1 = np.arange(0,len(labels1))
town1_values = data_3room[['town','resale_price']]
values1 = town1_values['resale_price']/1000
# print(values1)
avg_values1 = {}
for i in labels1:
valuesfortown1 = values1[town1_values['town']==i]
avg1 = np.average(valuesfortown1)
print("Average 3 Room Resale Price for town " + i + " is {:.0f}".format(avg1))
avg_values1[i] = avg1
from collections import OrderedDict
from operator import itemgetter
avg_values1 = OrderedDict(sorted(avg_values1.items(), key = itemgetter(1), reverse = True))
plt.figure(1, figsize=(30,30))
barchart1 = plt.bar(list(avg_values1.keys()), list(avg_values1.values()), color='#d62728')
for i in range(len(barchart1)):
bar1 = barchart1[i]
x1,y1 = bar1.get_xy()
h1 = bar1.get_height()
plt.text(x1,h1,"{:.0f}".format(list(avg_values1.values())[i]),fontsize=30)
plt.title('3 Room Resale Prices by Town',fontsize=40)
plt.ylabel('Resale Prices (Thousands)',fontsize=40)
plt.yticks(fontsize=20)
plt.xticks(town1, labels1, fontsize=40,rotation='vertical')

Use a pandas DataFrame created inside a function outside of the function

I am a Python beginner and wrote a function for a simple moving average strategy. I created a portfolio DataFrame inside the function and now I want to use this DataFrame outside of the function for plotting some graphs. My solution is: return portfolio - but this does not work. Can anybody help me?
This is my code:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Import a data source - FSE-Data with Index 'Date'
all_close_prices = pd.read_csv('FSE_daily_close.csv')
all_close_prices = all_close_prices.set_index('Date')
# Fill NaN Values with the last available stock price - except for Zalando
all_close_prices = all_close_prices.fillna(method='ffill')
# Import ticker symbols
ticker_list = list(all_close_prices)
# Zalando 'FSE/ZO1_X' (position row 99) - doesn't begin in 2004
# Drop Zalando
all_close_prices.drop('FSE/ZO1_X', axis=1)
# Also from the ticker list
ticker_list.remove('FSE/ZO1_X')
# Create an empty signal dataframe with datetime index equivalent to the stocks
signals = pd.DataFrame(index=all_close_prices.index)
def ma_strategy(ticker, long_window, short_window):
# Calculate the moving avergaes
moving_avg_long = all_close_prices.rolling(window=long_window, min_periods=1).mean()
moving_avg_short = all_close_prices.rolling(window=short_window, min_periods=1).mean()
moving_avg_short = moving_avg_short
moving_avg_long = moving_avg_long
# Add the two MAs for the stocks in the ticker_list to the signals dataframe
for i in ticker_list:
signals['moving_avg_short_' + i] = moving_avg_short[i]
signals['moving_avg_long_' + i] = moving_avg_long[i]
# Set up the signals
for i in ticker_list:
signals['signal_' + i] = np.where(signals['moving_avg_short_' + i] > signals['moving_avg_long_' + i], 1, 0)
signals['positions_' + i] = signals['signal_' + i].diff(periods=1)
#Backtest
initial_capital = float(100000)
# Create a DataFrame `positions` with index of signals
positions = pd.DataFrame(index=all_close_prices)
# Create a new column in the positions DataFrame
# On the days that the signal is 1 (short moving average crosses the long moving average, you’ll buy a 100 shares.
# The days on which the signal is 0, the final result will be 0 as a result of the operation 100*signals['signal']
positions = 100 * signals[['signal_' + ticker]]
# Store the portfolio value owned with the stock
# DataFrame.multiply(other, axis='columns', fill_value=None) - Multiplication of dataframe and other, element-wise
# Store the difference in shares owned - same like position column in signals
pos_diff = positions.diff()
# Add `holdings` to portfolio
portfolio = pd.DataFrame(index=all_close_prices.index)
portfolio['holdings'] = (positions.multiply(all_close_prices[ticker], axis=0)).sum(axis=1)
# Add `cash` to portfolio
portfolio['cash'] = initial_capital - (pos_diff.multiply(all_close_prices[ticker], axis=0)).sum(
axis=1).cumsum()
# Add `total` to portfolio
portfolio['total'] = portfolio['cash'] + portfolio['holdings']
# Add `returns` to portfolio
portfolio['return'] = portfolio['total'].pct_change()
portfolio['return_cum'] = portfolio['total'].pct_change().cumsum()
return portfolio
ma_strategy('FSE/VOW3_X',20,5)
# Visualize the total value of the portfolio
portfolio_value = plt.figure(figsize=(12, 8))
ax1 = portfolio_value.add_subplot(1, 1, 1, ylabel='Portfolio value in $')
# Plot the equity curve in dollars
portfolio['total'].plot(ax=ax1, lw=2.)
You need to assign your function return value to a variable. The line which says
ma_strategy('FSE/VOW3_X',20,5)
probably needs to change to
portfolio = ma_strategy('FSE/VOW3_X',20,5)

Categories

Resources