Stop loss and Take profit question in Quantopian - python

I need to set a stop loss and take profit to every trade I make in Quantopian. This is the code I have at the moment but it's not working as intended.
The order logic (to enter a short or long trade) is scheduled only once per day while the take profit or stop loss should be checked every minute.
import talib as ta
import pandas
risk_per_trade = 500
factor_tp = 2
factor_sl = 2
Bars_count = 60
def initialize(context):
context.stocks = [sid(4265), sid(5061)]
schedule_function(orderlogic,date_rules.every_day(), time_rules.market_open(hours=0, minutes=10))
def orderlogic(context, data):
hist = data.history(context.stocks,['price','high','low','close','open'], bar_count=Bars_count, frequency='1d')
for stock in context.stocks:
atr = ta.ATR(hist['high'][stock],hist['low'][stock],hist['close'][stock],timeperiod=14)
sma_20 = ta.SMA(hist['close'][stock], timeperiod=20)
stop_size = factor_sl * atr[-1]
amount_shares = round(risk_per_trade / stop_size)
open_orders = get_open_orders()
LongCondition = hist['price'][stock][-1] < sma_20[-1]
SellCondition = hist['price'][stock][-1] > sma_20[-1]
if LongCondition and stock not in open_orders and context.portfolio.positions[stock].amount ==0:
order(stock, amount_shares)
elif SellCondition and stock not in open_orders and context.portfolio.positions[stock].amount ==0:
order(stock, -1 * amount_shares)
def handle_data(context,data):
# record(leverage=context.account.leverage)
for axion in context.stocks:
current_price = data.current(axion, 'price')
position = context.portfolio.positions[axion].amount
price_position = context.portfolio.positions[axion].cost_basis
pnl = ( current_price - price_position ) * position
if position > 0 and current_price > price_position:
if pnl >= factor_tp * risk_per_trade:
order_target_percent(axion, 0)
log.info("Buy with Take Profit hit " + str(axion.symbol))
if position > 0 and current_price < price_position:
if pnl <= -risk_per_trade:
order_target_percent(axion, 0)
log.info("Buy with Stop Loss hit " + str(axion.symbol))
if position < 0 and current_price < price_position:
if -pnl >= factor_tp * risk_per_trade:
order_target_percent(axion, 0)
log.info("Sell with Take Profit hit " + str(axion.symbol))
if position < 0 and current_price > price_position:
if pnl >= risk_per_trade:
order_target_percent(axion, 0)
log.info("Sell with Stop Loss hit " + str(axion.symbol))

Related

How do i optimize code when the execution time is too slow?

I have this Python code, but it's already running for 24h and doesn't seem to print the result for now.
I don't know how long it will take.
Can someone help me to optimize this code?
The code is to find the best performance for trading RSI divergence in a certain period.
It first defines some parameters for the RSI.
The code then goes through every possible combination to find the best combination of parameters to have the best performances.
I'm not really an expert.
I don't really know how i can change the code as i'm no expert.
Happy to learn.
Thank you guys.
import pandas as pd
import numpy as np
import ta
def load_data(file_path, start_date, end_date):
"""
Loads data for the specified symbol and date range from a CSV file
"""
df = pd.read_csv(file_path)
if 'Date' not in df.columns:
df['Date'] = pd.to_datetime(df.index)
df['Date'] = pd.to_datetime(df['Date'])
df = df.set_index('Date')
df = df[(df.index >= start_date) & (df.index <= end_date)]
return df
def calc_rsi(df, n):
"""
Calculates the relative strength index (RSI) for the given dataframe and window size
"""
delta = df["Close"].diff()
gain = delta.where(delta > 0, 0)
loss = abs(delta.where(delta < 0, 0))
avg_gain = gain.rolling(window=n).mean()
avg_loss = loss.rolling(window=n).mean()
rs = avg_gain / avg_loss
rsi = 100 - (100 / (1 + rs))
return rsi
def calc_pivot_point(df, pivot_point_type, pivot_point_n):
"""
Calculates the pivot point for the given dataframe and pivot point type
"""
if pivot_point_type == "Close":
pivot_point = df["Close"].rolling(window=pivot_point_n).mean()
elif pivot_point_type == "High/Low":
pivot_point = (df["High"].rolling(window=pivot_point_n).mean() + df["Low"].rolling(window=pivot_point_n).mean()) / 2
else:
raise ValueError("Invalid pivot point type")
return pivot_point
def calc_divergence(df, rsi, pivot_point, divergence_type, max_pivot_point, max_bars_to_check):
"""
Calculates the divergence for the given dataframe and parameters
"""
if divergence_type == "Regular":
pivot_point_delta = pivot_point.diff()
pivot_point_delta_sign = pivot_point_delta.where(pivot_point_delta > 0, -1)
pivot_point_delta_sign[pivot_point_delta_sign > 0] = 1
rsi_delta = rsi.diff()
rsi_delta_sign = rsi_delta.where(rsi_delta > 0, -1)
rsi_delta_sign[rsi_delta_sign > 0] = 1
divergence = pivot_point_delta_sign * rsi_delta_sign
divergence[divergence < 0] = -1
divergence = divergence.rolling(window=max_pivot_point).sum()
divergence = divergence.rolling(window=max_bars_to_check).sum()
divergence = divergence.where(divergence > 0, 0)
divergence[divergence < 0] = -1
else:
raise ValueError("Invalid divergence type")
return divergence
def backtest(df, rsi_period, pivot_point_type, pivot_point_n, divergence_type, max_pivot_point, max_bars_to_check, trailing_stop, starting_capital):
"""
Backtests the strategy for the given dataframe and parameters
"""
rsi = calc_rsi(df, rsi_period)
pivot_point = calc_pivot_point(df, pivot_point_type, pivot_point_n)
divergence = calc_divergence(df, rsi, pivot_point, divergence_type, max_pivot_point, max_bars_to_check)
positions = pd.DataFrame(index=df.index, columns=["Position", "Stop Loss"])
positions["Position"] = 0.0
positions["Stop Loss"] = 0.0
capital = starting_capital
for i, row in enumerate(df.iterrows()):
date = row[0]
close = row[1]["Close"]
rsi_val = rsi.loc[date]
pivot_val = pivot_point.loc[date]
divergence_val = divergence.loc[date]
if divergence_val > 0 and positions.loc[date]["Position"] == 0:
positions.at[date, "Position"] = capital / close
positions.at[date, "Stop Loss"] = close * (1 - trailing_stop)
elif divergence_val < 0 and positions.loc[date]["Position"] > 0:
capital = positions.loc[date]["Position"] * close
positions.at[date, "Position"] = 0.0
positions.at[date, "Stop Loss"] = 0.0
elif close < positions.loc[date]["Stop Loss"] and positions.loc[date]["Position"] > 0:
capital = positions.loc[date]["Position"] * close
positions.at[date, "Position"] = 0.0
positions.at[date, "Stop Loss"] = 0.0
return capital
def find_best_iteration(df, start_rsi_period, end_rsi_period, pivot_point_types, start_pivot_point_n, end_pivot_point_n, divergence_types, start_max_pivot_point, end_max_pivot_point, start_max_bars_to_check, end_max_bars_to_check, start_trailing_stop, end_trailing_stop, starting_capital):
"""
Finds the best iteration for the given parameters
"""
best_result = 0.0
best_params = None
for rsi_period in range(start_rsi_period, end_rsi_period + 1):
for pivot_point_type in pivot_point_types:
for pivot_point_n in range(start_pivot_point_n, end_pivot_point_n + 1):
for divergence_type in divergence_types:
for max_pivot_point in range(start_max_pivot_point, end_max_pivot_point + 1):
for max_bars_to_check in range(start_max_bars_to_check, end_max_bars_to_check + 1):
for trailing_stop in np.arange(start_trailing_stop, end_trailing_stop + 0.01, 0.01):
result = backtest(df, rsi_period, pivot_point_type, pivot_point_n, divergence_type, max_pivot_point, max_bars_to_check, trailing_stop, starting_capital)
if result > best_result:
best_result = result
best_params = (rsi_period, pivot_point_type, pivot_point_n, divergence_type, max_pivot_point, max_bars_to_check, trailing_stop)
return best_result, best_params
# Define the parameters
file_path = 'C:\\Users\\The Death\\Downloads\\Binance_BTCUSDT_spot.csv'
start_date = "2020-03-16"
end_date = "2021-04-12"
df = load_data(file_path, start_date, end_date)
def load_data(start_date, end_date):
# Your code to load the data for the specified date range
# ...
return df
# Define the parameters for the backtesting
start_rsi_period = 1
end_rsi_period = 30
pivot_point_types = ["Close", "High/Low"]
start_pivot_point_n = 1
end_pivot_point_n = 50
divergence_types = ["Regular"]
start_max_pivot_point = 1
end_max_pivot_point = 20
start_max_bars_to_check = 30
end_max_bars_to_check = 200
start_trailing_stop = 0.01
end_trailing_stop = 0.5
starting_capital = 10000
# Run the backtesting
df = load_data(start_date, end_date)
best_result, best_params = find_best_iteration(df, start_rsi_period, end_rsi_period, pivot_point_types, start_pivot_point_n, end_pivot_point_n, divergence_types, start_max_pivot_point, end_max_pivot_point, start_max_bars_to_check, end_max_bars_to_check, start_trailing_stop, end_trailing_stop, starting_capital)
# Print the results
print("Best result: ", best_result)
print("Best parameters: ", best_params)
I have two recommendations after I scroll up your code:
Reduce the usage of for loop. As you increase a layer of for loop (initial is O(n), the time complexity of your code will increase by a power. In your find_best_iteration() there is about 7 layers of for loop, this is extremely cost your time.
Save and process your data in numpy.array() instead of pd.dataframe(). Dataframe is a class that contains too many unused attributes, and its performance is also slower than numpy.array.
You can try the following methods to improve the performance:
The backtest() function is used many times inside the find_best_iteration() function under many for loops, thus the positions variable inside backtest() is being updated frequently which can be show when the positions variable is a Dataframe. You can consider using numpy array for the positions variable that is optimized for updates.
You can try using the multiprocessing module in Python to parallelize the calculation of the divergence variable.
Hope this help!

Need help improving backtest class and find optimal combinations

I'm trying to create a backtest class. The code i have for the class atm is this:
class Backtest:
def __init__(self, df, signals, initial_capital, max_position_size, stop_loss, conditions):
self.df = df
self.signals = signals[signals.index.isin(self.df.index)]
self.initial_capital = initial_capital
self.positions = pd.DataFrame(index=signals.index).fillna(0.0)
self.max_position_size = max_position_size
self.stop_loss = stop_loss
self.conditions = conditions
self.stop_loss_levels = None
self.portfolio_value = None
self.portfolio_returns = None
self.portfolio_risk = None
self.results=None
#this section applies conditions for buying, selling, closes
def generate_trades(self):
self.positions['buy'] = np.where(self.conditions['buy'], 1000 / self.df['close'], 0)
self.positions['sell'] = np.where(self.conditions['sell'], -1000 / self.df['close'], 0)
self.positions = self.positions.where(self.conditions['close'], 0)
"""
conditions = {
'buy': (self.signals['signal_rsi_oversold'] == 1) & (self.signals['signal_stochastic_oversold'] == 1),
'sell': (self.signals['signal_rsi_overbought'] == 1) & (self.signals['signal_stochastic_overbought'] == 1),
'close': self.df['close'] > self.stop_loss_levels
}"""
def calculate_exposure_time(self):
self.positions['open_time'] = np.where(self.positions['buy'] > 0, self.df['date'], np.nan)
self.positions['close_time'] = np.where(self.positions['sell'] > 0, self.df['date'], np.nan)
self.positions['exposure_time'] = self.positions['close_time'] - self.positions['open_time']
def backtest(self):
self.generate_trades()
#self.positions['buy'] = np.where(self.signals[buys].sum(axis=1) > 0, 1000 / self.df['close'], 0)
#self.positions['sell'] = np.where(self.signals[sells].sum(axis=1) > 0, -1000 / self.df['close'], 0)
portfolio = self.positions.multiply(self.df['close'], axis=0)
pos_diff = self.positions.diff()
portfolio['holdings'] = (self.positions.multiply(self.df['close'], axis=0)).sum(axis=1)
portfolio['cash'] = self.initial_capital - (pos_diff.multiply(self.df['close'], axis=0)).sum(axis=1).cumsum()
portfolio['total'] = portfolio['cash'] + portfolio['holdings']
portfolio['returns'] = portfolio['total'].pct_change()
self.results = portfolio
# Calculate the average exposure time
average_exposure_time = self.positions['exposure_time'].mean()
# Calculate the Sharpe ratio
sharpe_ratio = np.sqrt(252) * (self.results['returns'].mean() / self.results['returns'].std())
# Calculate the maximum drawdown
max_dd = (self.results['total'].cummax() - self.results['total']).max()
# Calculate the maximum drawdown duration
max_dd_duration = (self.results['total'].cummax() - self.results['total']).argmax()
# Calculate the profit factor
profit_factor = (self.results[self.results['returns'] > 0]['returns'].sum() / abs(self.results[self.results['returns'] < 0]['returns'].sum()))
# Calculate the average loss and average profit
trades = self.results[self.results['returns'] != 0]['returns']
average_loss = trades[trades < 0].mean()
average_profit = trades[trades > 0].mean()
# Calculate the Calmar ratio
calmar_ratio = self.results['returns'].mean() / max_dd
# Calculate the number of trades taken
num_trades = trades.count()
# Calculate the number of winning trades and losing trades
num_winning_trades = 0
num_losing_trades = 0
for i, row in self.results.iterrows():
if row['returns'] > 0:
num_winning_trades += 1
elif row['returns'] < 0:
num_losing_trades += 1
# Calculate the win/loss ratio
win_loss_ratio = num_winning_trades / num_losing_trades
self.statistics = {
'calmar_ratio': calmar_ratio,
'sharpe_ratio': sharpe_ratio,
'max_drawdown': max_dd,
'max_drawdown_duration': max_dd_duration,
'profit_factor': profit_factor,
'average_loss': average_loss,
'average_profit': average_profit,
'num_trades': num_trades,
'num_winning_trades':num_winning_trades,
'num_losing_trades':num_losing_trades,
'win_loss_ratio':win_loss_ratio,
'exposure_time': average_exposure_time
}
I also have this code that I'm trying to use to analyse results. I'm not sure how to integrate:
def backtest_by_trade(self, buys, sells):
# Initialize a dictionary to store the results for each trade
self.results_by_trade = {}
# Iterate over the combinations of buy and sell values
for buy in buys:
for sell in sells:
# Create a copy of the signals dataframe
signals = self.signals.copy()
# Update the signals dataframe with the buy and sell values
signals['buy'] = np.where(signals['signal_rsi_oversold'] == 1, buy, 0)
signals['sell'] = np.where(signals['signal_rsi_overbought'] == 1, sell, 0)
# Create a new Backtest object using the updated signals dataframe
bt = Backtest(self.df, signals, initial_capital=10000)
# Run the backtest
bt.backtest()
# Store the results in the results_by_trade dictionary
self.results_by_trade[(buy, sell)] = bt.results
# Calculate the profit or loss for each trade
self.results_by_trade['profit_loss'] = self.results_by_trade['total_profit'] - self.results_by_trade['total_loss']
# Calculate the return on investment for each trade
self.results_by_trade['return_on_investment'] = self.results_by_trade['total_profit'] / self.results_by_trade['total_loss']
# Calculate the profit factor for each trade
self.results_by_trade['profit_factor'] = self.results_by_trade['total_profit'] / abs(self.results_by_trade['total_loss'])
# Calculate the percentage of profitable trades for each combination
self.results_by_trade['percent_profitable'] = self.results_by_trade['num_winning_trades'] / self.results_by_trade['num_trades']
# Calculate the average profit per trade for each combination
self.results_by_trade['avg_profit_per_trade'] = self.results_by_trade['total_profit'] / self.results_by_trade['num_winning_trades']
# Calculate the average loss per trade for each combination
self.results_by_trade['avg_loss_per_trade'] = self.results_by_trade['total_loss'] / self.results_by_trade['num_losing_trades']
# Calculate the maximum consecutive winning trades for each combination
self.results_by_trade['max_consecutive_winning_trades'] = self.results_by_trade['consecutive_winning_trades'].max()
# Calculate the maximum consecutive losing trades for each combination
self.results_by_trade['max_consecutive_losing_trades'] = self.results_by_trade['consecutive_losing_trades'].max()
# Calculate the average consecutive winning trades for each combination
self.results_by_trade['avg_consecutive_winning_trades']
In the end, i want to use different combinations of the condictions dictionary and analyse what might be the best combination of signals. How can i do this ?

Calculation of business working hour in python

I would like to write a function that calculate working business hours in python, to do that I don't like to define a class and use python ready function to calculate.
I tried with following code but the code is not working well. I need to modify the code and change it for the hour instead of minutes too.
Do you have any suggestion?
def getminutes(datetime1,datetime2,worktiming=[9, 17]):
day_hours = (worktiming[1]-worktiming[0])
day_minutes = day_hours * 60 # minutes in a work day
weekends=[6, 7]
# Set initial default variables
dt_start = datetime1.datetime # datetime of start
dt_end = datetime2.datetime # datetime of end
worktime_in_seconds = 0
if dt_start.date() == dt_end.date():
# starts and ends on same workday
full_days = 0
if dt_start in [6, 7]:
return 0
else:
if dt_start.hour < worktiming[0]:
# set start time to opening hour
dt_start = datetime.datetime(
year=dt_start.year,
month=dt_start.month,
day=dt_start.day,
hour=worktiming[0],
minute=0)
if dt_start.hour >= worktiming[1] or \
dt_end.hour < worktiming[0]:
return 0
if dt_end.hour >= worktiming[1]:
dt_end = datetime.datetime(
year=dt_end.year,
month=dt_end.month,
day=dt_end.day,
hour=worktiming[1],
minute=0)
worktime_in_seconds = (dt_end-dt_start).total_seconds()
elif (dt_end-dt_start).days < 0:
# ends before start
return 0
else:
# start and ends on different days
current_day = dt_start # marker for counting workdays
while not current_day.date() == dt_end.date():
if not is_weekend(current_day):
if current_day == dt_start:
# increment hours of first day
if current_day.hour < worktiming[0]:
# starts before the work day
worktime_in_seconds += day_minutes*60 # add 1 full work day
elif current_day.hour >= worktiming[1]:
pass # no time on first day
else:
# starts during the working day
dt_currentday_close = datetime.datetime(
year=dt_start.year,
month=dt_start.month,
day=dt_start.day,
hour= worktiming[1],
minute=0)
worktime_in_seconds += (dt_currentday_close
- dt_start).total_seconds()
else:
# increment one full day
worktime_in_seconds += day_minutes*60
current_day += datetime.timedelta(days=1) # next day
# Time on the last day
if not is_weekend(dt_end):
if dt_end.hour >= worktiming[1]: # finish after close
# Add a full day
worktime_in_seconds += day_minutes*60
elif dt_end.hour < worktiming[0]: # close before opening
pass # no time added
else:
# Add time since opening
dt_end_open = datetime.datetime(
year=dt_end.year,
month=dt_end.month,
day=dt_end.day,
hour=worktiming[0],
minute=0)
worktime_in_seconds += (dt_end-dt_end_open).total_seconds()
return int(worktime_in_seconds / 60)
How can I modify the code that works with the following input ?
getminutes(2019-12-02 09:30:00,2019-12-07 12:15:00,worktiming=[9, 17])
You can use pd.bdate_range(datetime1, datetime2) to compute the number of working days. When converting worktiming to a pandas datetime, it is easy to compute the difference (in seconds) between the two datetimes:
import pandas as pd
datetime1 = "2019-12-02 09:30:00"
datetime2 = "2019-12-07 12:15:00"
def getminutes(datetime1, datetime2, worktiming=[9, 17]):
d1 = pd.to_datetime(datetime1)
d2 = pd.to_datetime(datetime2)
wd = pd.bdate_range(d1, d2) # working days
day_hours = (worktiming[1] - worktiming[0])
day_minutes = day_hours * 60 # minutes in a work day
day_seconds = day_minutes * 60 # seconds in a work day
full_days = len(wd)
day1 = datetime1[:10]
day2 = datetime2[:10]
dt1 = pd.to_datetime(day1 + " " + str(worktiming[0]) + ":00")
dt2 = pd.to_datetime(day2 + " " + str(worktiming[1]) + ":00")
ex1, ex2 = 0, 0
if day1 in wd:
ex1 = max(pd.Timedelta(d1 - dt1).seconds, 0)
if day2 in wd:
ex2 = max(pd.Timedelta(dt2 - d2).seconds, 0)
total_seconds = full_days * day_seconds - ex1 - ex2
total_minutes = total_seconds / 60
total_hours = total_minutes / 60
return int(total_minutes)
print(getminutes(datetime1, datetime2))
Output: 2370

Converting R to Python for Micro Market Structure Zero Intelligence Algorithm, cant get the generation of market events to work properly

I have the following R code that I wanted to change into Python but have ran into a slip. When running the code it seems when a cancel buy order or sell order is made it uses one of the number of buy orders available or number of sell orders available (nb,ns) so when the code runs it runs out of orders after many generation of events. So it seems the generation function I have tried to implement is not generating enough orders. So the orderbook is decreasing faster then increasing. That is why my code gets a keyerror because it ends up at posns not found in my book for canceling buy orders or sell orders. Below is the R code and then the Python code.
R code.
#Book setup
L <- 30 #Set number of price levels to be included in iterations
# Generate initial book
LL <- 1000 #Total number of levels in buy and sell books
# Initialize book with asymptotic depth of 5 shares
initializeBook5 <- function()
{
Price <<- -LL:LL
# Book shape is set to equal long-term average from simulation
buySize <<- c(rep(5,LL-8),5,4,4,3,3,2,2,1,rep(0,LL+1))
sellSize <<- c(rep(0,LL),0,1,2,2,3,3,4,4,5,rep(5,LL-8))
book <<- data.frame(Price, buySize, sellSize )
if(logging==T){eventLog <<- as.data.frame(matrix(0,nrow=numEvents,ncol=2))
colnames(eventLog)<<-c("Type","Price")
count <<- 0
eventType <<- c("LB","LS","CB","CS","MB","MS")
eventDescr <<- NA}
}
#Various utility functions
bestOffer <- function(){min(book$Price[book$sellSize>0])}
bestBid <- function(){max(book$Price[book$buySize>0])}
spread <- function(){bestOffer()-bestBid()}
mid <- function(){(bestOffer()+bestBid())/2}
#Functions to find mid-market
bidPosn<-function()length(book$buySize[book$Price<=bestBid()])
askPosn<-function()length(book$sellSize[book$Price<=bestOffer()])
midPosn<-function(){floor((bidPosn()+askPosn())/2)}
#Display center of book
go <- function(){book[(midPosn()-20):(midPosn()+20),]}
#Display book shape
bookShape<-function(band){c(book$buySize[midPosn()+(-band:0)],book$sellSize[midPosn()+1:band])}
bookPlot<-function(band){
plot((-band:band),bookShape(band),
col="red",type="l",xlab="Price",ylab="Quantity")
}
#Choose from L whole numbers in (1,...,L) with uniform probability
pick <- function(m){sample(1:m,1)}
# Switch logging on
logging <- T
#Buy limit order
limitBuyOrder <- function(price=NA){
if (is.na(price))
{prx <<- (bestOffer()-pick(L))}
else prx <<-price
if(logging==T){eventLog[count,]<<- c("LB",prx)}
book$buySize[book$Price==prx]<<-book$buySize[book$Price==prx]+1}
#Sell limit order
limitSellOrder <- function(price=NA){
if (is.na(price))
{prx <<- (bestBid()+pick(L))}
else prx <<-price
if(logging==T){eventLog[count,] <<- c("LS",prx)}
book$sellSize[book$Price==prx]<<-book$sellSize[book$Price==prx]+1}
#Cancel buy order
cancelBuyOrder<-function(price=NA){
q<-pick(nb)
tmp <- cumsum(rev(book$buySize)) #Cumulative buy size from 0
posn <- length(tmp[tmp>=q]) #gives position in list where cumulative size >q
prx <<- book$Price[posn]
if (!is.na(price)) {prx <<-price}
if(logging==T){eventLog[count,]<<- c("CB",prx)}
book$buySize[posn]<<-book$buySize[posn]-1}
#Cancel sell order
cancelSellOrder<-function(price=NA){
q<-pick(ns)
tmp <- cumsum(book$sellSize) #Cumulative sell size from 0
posn <- length(tmp[tmp<q])+1
prx <<- book$Price[posn]
if (!is.na(price)) {prx <<-price}
if(logging==T){eventLog[count,]<<- c("CS",prx)}
book$sellSize[posn]<<-book$sellSize[posn]-1}
#Market buy order
marketBuyOrder <- function(){
prx <<- bestOffer()
if(logging==T){eventLog[count,]<<- c("MB",prx)}
book$sellSize[book$Price==prx]<<-book$sellSize[book$Price==prx]-1}
#Market sell order
marketSellOrder <- function(){
prx <<- bestBid()
if(logging==T){eventLog[count,]<<- c("MS",prx)}
book$buySize[book$Price==prx]<<-book$buySize[book$Price==prx]-1}
#Generate an event and update the buy and sell books
#Note that limit orders may be placed inside the spread
generateEvent <- function()
{
nb <<- sum(book$buySize[book$Price>=(bestOffer()-L)]); # Number of cancelable buy orders
ns <<- sum(book$sellSize[book$Price<=(bestBid()+L)]); # Number of cancelable sell orders
eventRate <- nb*delta+ns*delta + mu +2*L*alpha;
probEvent <- c(L*alpha,L*alpha,nb*delta,ns*delta,mu/2,mu/2)/eventRate;
m <- sample(1:6, 1, replace = TRUE, probEvent); #Choose event type
switch(m,
limitBuyOrder(),
limitSellOrder(),
cancelBuyOrder(),
cancelSellOrder(),
marketBuyOrder(),
marketSellOrder()
);
}
logging <- F
lambda <- 1
mus <- c(10,8,10,10)
nus <- c(1/5,1/5,1/6,1/8)
avgBookShapes<-as.data.frame(matrix(0,nrow=41,ncol=4))
for(i in 1:4){
mu<-mus[i]
nu<-nus[i]
initializeBook5()
numEvents <- 100000 # Average over 100,000 events
avgBookShape <- bookShape(20)/numEvents
for(count in 2:numEvents){
generateEvent()
avgBookShape <- avgBookShape+bookShape(20)/numEvents
}
avgBookShapes[,i]<-avgBookShape
}
Python code
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import random
import math
class Zibook():
def __init__(self,ll,l,alpha,mu,delta,num_events,logging=False):
self.ll = ll #total number of levels in buy and sell
self.l = l # set number of price levels to be included in iterations
self.alpha = alpha
self.mu = mu
self.delta = delta
self.num_events = num_events
self.logging = logging
price = np.array(list(range(-self.ll,self.ll +1,1)))
buy_size = np.array([5]*(self.ll-8) + [5,4,4,3,3,2,2,1] + [0]*(self.ll+1))
sell_size = np.array([0]*(self.ll) + [0,1,2,2,3,3,4,4,5] +[5]*(self.ll-8))
book = pd.DataFrame(index=price,columns=['Price','Buy Size','Sell Size','Type'])
book['Price'] = price
book['Buy Size'] = buy_size
book['Sell Size'] = sell_size
book = book.reset_index(drop=True)
self.book = book
event_type = ['LB','LS','CB','CS','MB','MS']
event_descr = np.nan
x = list(range(0,self.num_events,1))
event_log = pd.DataFrame(index=x,columns=['Type','Price'])
self.event_log = event_log
nb = sum(self.book.loc[self.book.Price >= (self.best_offer()-self.l), 'Buy Size']) #number of cancellable buy orders
ns = sum(self.book.loc[self.book.Price <= (self.best_bid()+self.l), 'Sell Size']) #number of cancellable sell order
self.nb = nb
self.ns = ns
def best_offer(self):
df = self.book
a = df.loc[df['Sell Size'] > 0,'Price'].min()
return a
def best_bid(self):
df = self.book
b = df.loc[df['Buy Size']>0 ,'Price'].max()
return b
def spread(self):
spread = (self.best_offer() - self.best_bid())/2
return spread
def mid(self):
mid = (self.best_offer() + self.best_bid())/2
return mid
def bidposn(self):
df = self.book
a = len(df.loc[df.Price <= self.best_bid(),'Buy Size'])-1
return a
def askposn(self):
df = self.book
a = len(df[df['Price'] <= self.best_offer()]['Sell Size']) -1
return a
def midposn(self):
df = self.book
a = ((self.bidposn()+self.askposn())//2)
return a
def centerbook(self):
df = self.book
mid = self.midposn()
return df[mid-20:mid +21]
def bookshape(self,band):
df = self.book
mid = self.midposn()
x = np.arange(-band,0)
y = [df.loc[(mid+el),'Buy Size'] for el in x]
x1 = np.arange(0,band+1 )
z = [df.loc[(mid + el), 'Sell Size'] for el in x1]
seq3 = np.concatenate((y,z),axis=0)
return seq3
def bookplot(self,band):
x = list(range(-band,band+1,1))
seq3 = self.bookshape(band)
plt.plot(x,seq3, color='red')
plt.xlabel('Price')
plt.ylabel('Quantity')
return plt.show()
def pick(self,l):
a= np.random.choice(l,1,replace=True,p=[1/l]*l)
return a[0]
def limitbuyorder(self,price=None):
if price == None :
price = (self.best_offer() - self.pick(self.l))
else:
price = price
df = self.book
if self.logging == True:
count = 0
eventlog = self.event_log
eventlog.loc[count,'Price'] = price
eventlog.loc[count,'Type'] = 'LB'
df.loc[df.Price==price,'Buy Size'] += 1
def limitsellorder(self,price=None):
if price == None :
price = (self.best_bid() + self.pick(self.l))
else:
price = price
df = self.book
if self.logging == True:
count = 0
eventlog = self.event_log
eventlog.loc[count,'Price'] = price
eventlog.loc[count,'Type'] = 'LS'
df.loc[df.Price==price,'Sell Size'] += 1
def cancelbuyorder(self,price=None):
df = self.book
if price == None:
q = self.pick(self.nb)
tmp = np.array(df['Buy Size'].to_list()[::-1]).cumsum()
posn = len(tmp[tmp>=q]) - 1
price = df.Price[posn]
df.loc[posn,'Buy Size'] -= 1
else:
price = price
df.loc[df.Price==price,'Buy Size'] -= 1
if self.logging == True:
count = 0
eventlog = self.event_log
eventlog.loc[count,'Price'] = price
eventlog.loc[count,'Type'] = 'CB'
def cancelsellorder(self,price=None):
#global ns
df = self.book
if price == None:
q = self.pick(self.ns)
tmp = np.array(df['Sell Size'].to_list()).cumsum()
posn = len(tmp[tmp<q])
price = df.Price[posn]
df.loc[posn,'Sell Size'] -= 1
else:
price = price
df.loc[df.Price==price,'Sell Size'] -= 1
if self.logging == True:
count = 0
eventlog = self.event_log
eventlog.loc[count,'Price'] = price
eventlog.loc[count,'Type'] = 'CS'
def marketbuyorder(self,price=None):
df = self.book
price = self.best_offer()
if self.logging == True:
count = 0
eventlog = self.event_log
eventlog.loc[count,'Price'] = price
eventlog.loc[count,'Type'] = 'MB'
df.loc[df.Price==price,'Sell Size'] -= 1
def marketsellorder(self,price=None):
df = self.book
price = self.best_bid()
if self.logging == True:
count = 0
eventlog = self.event_log
eventlog.loc[count,'Price'] = price
eventlog.loc[count,'Type'] = 'MS'
df.loc[df.Price==price,'Buy Size'] -= 1
def generateevent(self):
df = self.book
nb = sum(df.loc[df.Price >= (self.best_offer()-self.l), 'Buy Size']) #number of cancellable buy orders
ns = sum(df.loc[df.Price <= (self.best_bid()+self.l), 'Sell Size']) #number of cancellable sell order
eventRate = nb*self.delta +ns*self.delta + self.mu + 2*self.l*self.alpha
probEvent = (self.l*self.alpha + self.l*self.alpha + nb*self.delta + ns*self.delta + self.mu*0.5 + self.mu*0.5)/eventRate
a = np.random.choice(6,1,replace=True,p=[probEvent/6]*6)
idx = a[0]
z= [self.limitbuyorder(),self.limitsellorder(),self.cancelbuyorder(),self.cancelsellorder(),self.marketbuyorder(),self.marketsellorder()]
return z[idx]
alpha = 1
mu = 10
delta = 1/5
num_events = 100000
'''
a = Zibook(1000,30,alpha,mu,delta,num_events,logging=False)
a.limitbuyorder(price =a.best_bid())
a.limitbuyorder(price =a.best_bid())
a.bookplot(20)
#print(a.generateevent())
#print(a.cancelbuyorder(price=None))
'''
lalpha = [1,1,1,1]
lmu = [10,8,10,10]
ldelta = [1/5,1/5,1/6,1/8]
length = len(lalpha)
Avgbookshapes = np.array([])
num_events = 100000
for i in range(0,length,1):
alpha = lalpha[i]
mu = lmu[i]
delta=ldelta[i]
a = Zibook(1000,30,alpha,mu,delta,num_events,logging=False)
for i1 in range(0,100,1):
a.generateevent()
#print(i1)
avgbookshape = a.bookshape(20)/num_events
#print(avgbookshape.shape)
for i3 in range(2,num_events+1,1):
a.generateevent()
#print(i3)
avgbookshape2 = a.bookshape(20)/num_events
#print(avgbookshape2.shape)
avgbookshape += avgbookshape2
Avgbookshapes = np.append(Avgbookshapes,avgbookshape)
np.save('my_array',Avgbookshapes)
Any help will be greatly appreciated (background on this algo is its a zero intelligence simulation of micromarketstructures https://github.com/zecophy/MTH9879-Market-Microstructure-Models/blob/master/HW1/9879HW1_Chenyu_Zhao_graded.ipynb )
just realized i didnt have the probabilities written out correctly in the generateevent function . But the code still decays to 0 orders below is the modified code in python anyone know why the orders decay instead of grow like in the R code? :
def generateevent(self):
df = self.book
nb = sum(df.loc[df.Price >= (self.best_offer()-self.l), 'Buy Size'])
ns = sum(df.loc[df.Price <= (self.best_bid()+self.l), 'Sell Size']) #number of cancellable sell order
eventRate = nb*self.delta +ns*self.delta + self.mu + 2*self.l*self.alpha
probEvent = np.array([self.l*self.alpha, self.l*self.alpha , nb*self.delta, ns*self.delta,self.mu*0.5,self.mu*0.5])/eventRate
#print(probEvent)
a = np.random.choice(6,1,replace=True,p=probEvent)
idx = a[0]
z= [self.limitbuyorder(),self.limitsellorder(),self.cancelbuyorder(),self.cancelsellorder(),self.marketbuyorder(),self.marketsellorder()]
return z[idx]

Python Last 6 Results, removing the last

I just can't get it done. Therefore I'll post the full code.
The .csv used is from http://www.football-data.co.uk/mmz4281/1415/E0.csv
Now when run, the variables home_team_a, home_team_d, away_team_a and away_team_d are based on all of the previous matches but I want them to be based always on the last 6 matches.
import csv, math, ast, numpy as np
def poisson(actual, mean):
return math.pow(mean, actual) * math.exp(-mean) / math.factorial(actual)
csvFile = '20152016.csv'
team_list = []
k = open('team_list.txt', 'w')
k.write("""{
""")
csvRead = csv.reader(open(csvFile))
next(csvRead)
for row in csvRead:
if row[2] not in team_list:
team_list.append(row[2])
if row[3] not in team_list:
team_list.append(row[3])
team_list.sort()
for team in team_list:
k.write(""" '%s': {'home_goals': 0, 'away_goals': 0, 'home_conceded': 0, 'away_conceded': 0, 'home_games': 0, 'away_games': 0, 'alpha_h': 0, 'beta_h': 0, 'alpha_a': 0, 'beta_a': 0},
""" % (team))
k.write("}")
k.close()
s = open('team_list.txt', 'r').read()
dict = ast.literal_eval(s)
GAMES_PLAYED = 0
WEEKS_WAIT = 4
TOTAL_VALUE = 0
csvRead = csv.reader(open(csvFile))
next(csvRead)
for game in csvRead:
home_team = game[2]
away_team = game[3]
home_goals = int(game[4])
away_goals = int(game[5])
home_win_prob = 0
draw_win_prob = 0
away_win_prob = 0
curr_home_goals = 0
curr_away_goals = 0
avg_home_goals = 1
avg_away_goals = 1
team_bet = ''
ev_bet = ''
# GETTING UPDATED VARIABLES
for key, value in dict.items():
curr_home_goals += dict[key]['home_goals']
curr_away_goals += dict[key]['away_goals']
if GAMES_PLAYED > (WEEKS_WAIT * 10):
avg_home_goals = curr_home_goals / (GAMES_PLAYED)
avg_away_goals = curr_away_goals / (GAMES_PLAYED)
# CALCULATING FACTORS
if GAMES_PLAYED > (WEEKS_WAIT * 10):
home_team_a = (dict[home_team]['alpha_h'] + dict[home_team]['alpha_a']) / 2
away_team_a = (dict[away_team]['alpha_h'] + dict[away_team]['alpha_a']) / 2
home_team_d = (dict[home_team]['beta_h'] + dict[home_team]['beta_a']) / 2
away_team_d = (dict[away_team]['beta_h'] + dict[away_team]['beta_a']) / 2
home_team_exp = avg_home_goals * home_team_a * away_team_d
away_team_exp = avg_away_goals * away_team_a * home_team_d
# RUNNING POISSON
l = open('poisson.txt', 'w')
for i in range(10):
for j in range(10):
prob = poisson(i, home_team_exp) * poisson(j, away_team_exp)
l.write("Prob%s%s = %s\n" % (i, j, prob))
l.close()
with open('poisson.txt') as f:
for line in f:
home_goals_m = int(line.split(' = ')[0][4])
away_goals_m = int(line.split(' = ')[0][5])
prob = float(line.split(' = ')[1])
if home_goals_m > away_goals_m:
home_win_prob += prob
elif home_goals_m == away_goals_m:
draw_win_prob += prob
elif home_goals_m < away_goals_m:
away_win_prob += prob
#CALCULATE VALUE
bet365odds_h, bet365odds_d, bet365odds_a = float(game[23]), float(game[24]), float(game[25])
ev_h = (home_win_prob * (bet365odds_h - 1)) - (1 - home_win_prob)
ev_d = (draw_win_prob * (bet365odds_d - 1)) - (1 - draw_win_prob)
ev_a = (away_win_prob * (bet365odds_a - 1)) - (1 - away_win_prob)
highestEV = max(ev_h, ev_d, ev_a)
if (ev_h == highestEV) and (ev_h > 0):
team_bet = home_team
ev_bet = ev_h
if home_goals > away_goals:
TOTAL_VALUE += (bet365odds_h - 1)
else:
TOTAL_VALUE -= 1
elif (ev_d == highestEV) and (ev_d > 0):
team_bet = 'Draw'
ev_bet = ev_d
if home_goals == away_goals:
TOTAL_VALUE += (bet365odds_d - 1)
else:
TOTAL_VALUE -= 1
elif (ev_a == highestEV) and (ev_a > 0):
team_bet = away_team
ev_bet = ev_a
if home_goals < away_goals:
TOTAL_VALUE += (bet365odds_a - 1)
else:
TOTAL_VALUE -= 1
if (team_bet != '') and (ev_bet != ''):
print ("Bet on '%s' (EV = %s)" % (team_bet, ev_bet))
print (TOTAL_VALUE)
# UPDATE VARIABLES AFTER MATCH HAS BEEN PLAYED
dict[home_team]['home_goals'] += home_goals
dict[home_team]['home_conceded'] += away_goals
dict[home_team]['home_games'] += 1
dict[away_team]['away_goals'] += away_goals
dict[away_team]['away_conceded'] += home_goals
dict[away_team]['away_games'] += 1
GAMES_PLAYED += 1
# CREATE FACTORS
if GAMES_PLAYED > (WEEKS_WAIT * 10):
for key, value in dict.items():
alpha_h = (dict[key]['home_goals'] / dict[key]['home_games']) / avg_home_goals
beta_h = (dict[key]['home_conceded'] / dict[key]['home_games']) / avg_away_goals
alpha_a = (dict[key]['away_goals'] / dict[key]['away_games']) / avg_away_goals
beta_a = (dict[key]['away_conceded'] / dict[key]['away_games']) / avg_home_goals
dict[key]['alpha_h'] = alpha_h
dict[key]['beta_h'] = beta_h
dict[key]['alpha_a'] = alpha_a
dict[key]['beta_a'] = beta_a
Use a deque to keep the 6 most recent items in memory; adding a new record will "push out" the oldest one.
import collections
import itertools
import csv
with open("foo.csv") as fh:
# Skip the first 44 rows
csv_read = islice(csv.reader(fh), 44, None)
# Initialize the deque with the next 6 rows
d = collections.deque(islice(csv_read, 6), 6)
for record in csv_read:
d.append(record)
print(list(d)) # Rows 46-51, then 47-52, then 48-53, etc
Because you set the maximum length of the deque to 6, each append to a "full" deque pushes out the older one. On the first iteration, d.append pushes out row 45 and adds row 51. On the next iteration, adding row 52 pushes out row 46, etc.
In general, a deque is a data structure that is like a combination of a queue and a stack; you can add or remove items to either end efficiently, but accessing an arbitrary item or modifying the "middle" is slow. Here, we're taking advantage of the fact that appending to a full deque causes an implicit removal from the opposite end.
How about:
if seen_records == 200:
recs = list(csvRead)[seen_records - 6:seen_records + 1]
You can do something like this....
previous_index = 0
previous_max = 6 # max number of previous numbers to remember
previous = [None for _ in range(previous_max)]
csvFile = 'X.csv'
seen_records = 0
csvRead = csv.reader(open(csvFile))
# Enumerate over the records to keep track of the index of each one
for i, records in enumerate(csvRead):
if (i > 50):
seen_records =+ 1
if previous_index == previous_max:
previous_index = 0 # Reset to the beginning when we reach the end
# Store the record and increment the index to the next location
previous[previous_index] = record
previous_index += 1
This creates a very basic array of length previous_max and just stores the oldest data at index 0 and newest at previous_max -1.

Categories

Resources