Error with copying values from one sheet to other - python

I am trying to copy the values from some cells but it give me this error, i tried even without using the def cell(x,y) but still the same error.
This is the error:
learn_tar.cell(row=learn_tar, column=1).value = sheet.cell(row=learn_tar, column=1).value
AttributeError: 'int' object has no attribute 'cell'
Source:
import openpyxl
def cell(x,y):
cell = sheet.cell(row=x,column=y).value
return cell;
def percentage(percent, whole):
return int((percent * whole) / 100.0);
ex = openpyxl.load_workbook("Final_excel2.xlsx")
sheet = ex.get_sheet_by_name('Sheet1')
num = [0,0,0]
per = [0,0,0]
for row in range(2,4798):
if cell(row,1) == '1: Progression':
num[0] = num[0] + 1
elif cell(row,1) == '2: Incidence':
num[1] = num[1] + 1
elif cell(row,1) == '3: Non-exposed control group':
num[2] = num[2] + 1
for column in range(2,49):
#doing stuff
per[0] = percentage(70,num[0])
per[1] = percentage(70,num[1])
per[2] = percentage(70,num[2])
learn_att = ex.create_sheet('Learn-Att',2)
learn_tar = ex.create_sheet('Learn-Tar',3)
test_att = ex.create_sheet('Test-Att',4)
test_tar = ex.create_sheet('Test-Tar',5)
learn_att = 1
learn_tar = 1
test_att = 1
test_tar = 1
for row in range(2,4798):
if row<=1391:
if row<=974:
learn_tar.cell(row=learn_tar, column=1).value = cell(row,1)
learn_att+= 1
learn_tar+= 1
else:
test_tar.cell(row = test_tar,column = 1).value = cell(row,1)
test_att+= 1
test_tar+= 1
for column in range(2,49):
if row<=1391:
if row<=974:
learn_att.cell(row = learn_att,column = column - 1).value = cell(row,column)
else:
test_att.cell(row = test_att,column = column - 1).value = cell(row,column)

You override learn_tar with 1:
learn_tar = ex.create_sheet('Learn-Tar',3)
...
learn_tar = 1
Remove:
learn_tar = 1
and:
learn_tar+= 1
from your code.

Related

Why my openpyxl code is slower than my VBA code?

I have an excel file of nearly 95880 rows. I made a VBA function that runs slow, so I tried to code a python script using openpyxl, but it's even slower.
It starts fast, then after 600 rows becomes slower and slower.
The VBA Code is
Option Explicit
Function FTE(Assunzione As Date, Cess As Variant, Data)
Dim myDate As Date
Dim EndDate As Date, EndDate2 As Date
Dim check As Integer
EndDate = Application.WorksheetFunction.EoMonth(Assunzione, 0)
myDate = #1/1/2022#
If Cess = 0 Then
Call Check2(Assunzione, Data, myDate, EndDate, check)
FTE = check
Else:
EndDate2 = Application.WorksheetFunction.EoMonth(Cess, -1)
Call Check1(Assunzione, Cess, Data, myDate, EndDate, EndDate2, check)
FTE = check
End If
End Function
Sub Check1(Assunzione, Cess, Data, myDate, EndDate, EndDate2, check)
Dim Cess1 As Date
Dim gg_lav As Integer, gg_lav2 As Integer
Cess1 = Cess.Value
If Assunzione > Date Then
check = 0
Else
If Month(Assunzione) <= Month(Data) And Year(Assunzione) = 2022 Then
If Assunzione > myDate Then
gg_lav = Application.WorksheetFunction.Days(EndDate, Assunzione) + 1
If gg_lav >= 15 Then
If Month(Data) = (Month(EndDate2) + 1) And Year(Cess1) = 2022 Then
gg_lav2 = Application.WorksheetFunction.Days(Cess1, EndDate2)
If gg_lav2 >= 15 Then
check = 1
Else
check = 0
End If
Else
check = 1
End If
Else
check = 0
End If
Else
check = 1
End If
Else
check = 1
End If
End If
End Sub
Sub Check2(Assunzione, Data, myDate, EndDate, check)
Dim gg_lav As Integer
If Assunzione > Date Then
check = 0
Else
If Month(Assunzione) <= Month(Data) And Year(Assunzione) = 2022 Then
If Assunzione > myDate Then
gg_lav = Application.WorksheetFunction.Days(EndDate, Assunzione) + 1
If gg_lav >= 15 Then
check = 1
Else
check = 0
End If
Else
check = 1
End If
Else
check = 1
End If
End If
End Sub
and my openpyxl is:
def check1(a,d,c,i):
if ws.cell(row=i,column=a).value > ws.cell(row=i,column=d).value:
return 0
else:
if ws.cell(row=i,column=a).value.month == ws.cell(row=i,column=d).value.month and ws.cell(row=i,column=a).value.year == 2022:
EndDate = date(ws.cell(row=i,column=a).value.year, ws.cell(row=i,column=a).value.month,
calendar.monthrange(ws.cell(row=i,column=a).value.year,
ws.cell(row=i,column=a).value.month)[1])
gg_lav = (EndDate - datetime.date(ws.cell(row=i,column=a).value)).days
if gg_lav >= 15:
EndDate2 = date(ws.cell(row=i,column=c).value.year,ws.cell(row=i,column=c).value.month-1,
calendar.monthrange(ws.cell(row=i,column=c).value.year,
ws.cell(row=i,column=c).value.month-1)[1])
if ws.cell(row=i,column=d).value.month == EndDate2.month and ws.cell(row=i,column=c).value.year == 2022:
gg_lav2 = (datetime.date(ws.cell(row=i,column=c).value)-EndDate2).days
if gg_lav2 >= 15:
return 1
else:
return 0
else:
return 1
else:
return 0
else:
return 1
def check2(a,d,i):
if ws.cell(row=i,column=a).value > ws.cell(row=i,column=a).value:
return 0
else:
if ws.cell(row=i,column=a).value.month == ws.cell(row=i,column=d).value.month and ws.cell(row=i,column=a).value.year == 2022:
EndDate = date(ws.cell(row=i,column=a).value.year, ws.cell(row=i,column=a).value.month,
calendar.monthrange(ws.cell(row=i,column=a).value.year,
ws.cell(row=i,column=a).value.month)[1])
gg_lav = (EndDate - datetime.date(ws.cell(row=i,column=a).value)).days
if gg_lav >= 15:
return 1
else:
return 0
else:
return 1
wb1 = Workbook()
ws1 = wb1.create_sheet()
for i in range(2,95882):
if ws.cell(row = i, column = c).value == None:
ws1.cell(row = i, column = 1, value = check2(a, d, i))
else:
ws1.cell(row = i, column = 1, value = check1(a, d, c, i))
What am I doing wrong? Should I use another library or I'm making the code uselessy memory consuming?
Thank you very much for any help!
Update: I think that the problem was with openpyxl. First I tried to reduce the number of observation, from 95K to almost 5K, but it required two and half hour to complete the task.
So I used numpy and it took 55 seconds. Yeah, that's the difference in processing speed.
Here I post the code:
with open('data.csv','r') as f:
data = list(csv.reader(f,delimiter =';'))
arr = np.array(data)
arr = np.resize(arr,(4797,13))
I had to change of course the code in this section:
a = 3
d = 0
c = 4
def check1(a,d,c,i):
if int(arr[i][a]) > int(arr[i][d]):
return 0
else:
za = datetime.fromordinal((int(arr[i][a]) + 693594))
zd = datetime.fromordinal((int(arr[i][d]) + 693594))
da = date(za.year, za.month, za.day)
dd = date(zd.year, zd.month, zd.day)
if za.month == zd.month and za.year + 1899 == 2022:
EndDate = date(za.year, za.month,
calendar.monthrange(za.year,
za.month)[1])
gg_lav = (EndDate - da).days
if gg_lav >= 15:
zc = datetime.fromordinal((int(arr[i][c]) + 693594))
dc = date(zc.year, zc.month, zc.day)
EndDate2 = date(zc.year,zc.month-1,
calendar.monthrange(zc.year,
zc.month-1)[1])
if zd.month == EndDate2.month and zc.year == 2022:
gg_lav2 = (dc-EndDate2).days
if gg_lav2 >= 15:
return 1
else:
return 0
else:
return 1
else:
return 0
else:
return 1
I don't report the check2 function
fte = np.array(10)
for i in range(1,4797):
if arr[i][c] == '':
fte = np.append(fte,check2(a,d,i))
else:
fte = np.append(fte,check1(a, d, c, i))
print(i)

Python returns local variable referenced before error

I edit this post for your comments. Thank you :-)
The prev_fs_cell is the variable whose value can be nan or str. (ex. nan <-> "1,244,234" )
If prev_fs_cell is nan, I want not to process self._strat(self, curr_year), but it has an error...
## GLOBAL & API ###
STOCK_START="2015.01.01"
FS_START="2014.01.01"
END="2021.09.01"
SHORT=10
LONG=60
CURR_YEAR=2021
API_key=dart_config.API_key
DART=OpenDartReader(API_key)
account_nm_list=["유동자산","비유동자산","유동부채","비유동부채","자산총계","부채총계","매출액","영업이익","당기순이익"]
KOSPI_stock_code=stock.get_market_ticker_list(market="KOSPI")
class Strategy():
def __init__(self):
self.buy_signal=pd.DataFrame(columns=['open','unit'])
self.sell_signal = pd.DataFrame(columns=['open', 'unit'])
self.trade = pd.DataFrame(columns=['stock', 'cash'])
self.position=0
self.unit=1
self.cash=100000000 # 1억
def set_data(self, indicator_data, finance_data):
self.indicator_data=indicator_data
self.indicator_data.rename(columns={self.indicator_data.columns[0]:'date'}, inplace=True)
self.indicator_data = self.indicator_data.set_index('date')
self.indicator_data.index = pd.to_datetime(self.indicator_data.index, format="%Y-%m-%d")
self.fs_data=finance_data
self.fs_data.rename(columns={self.fs_data.columns[0]:'year'}, inplace=True)
self.fs_data = self.fs_data.set_index('year')
self.min_year=int(self.fs_data.index.min()) # str type
def _buy(self, row):
if (row['open']*self.unit) <= self.cash:
new_buy_row = pd.Series([row['open'], self.unit], index = self.buy_signal.columns, name=str(row.name))
self.buy_signal = self.buy_signal.append(new_buy_row)
self.position += self.unit
stock_amt = self.position * row['open']
self.cash -= row['open']*self.unit
new_trade_row = pd.Series([stock_amt, self.cash], index = self.trade.columns, name = str(row.name))
self.trade = self.trade.append(new_trade_row)
def _sell(self, row):
new_sell_row = pd.Series([row['open'], int(self.position/4)+1], index = self.sell_signal.columns, name=str(row.name))
self.sell_signal = self.sell_signal.append(new_sell_row)
self.position -= int(self.position/4)+1
stock_amt = self.position * row['open']
self.cash += row['open']*self.unit
new_trade_row = pd.Series([stock_amt, self.cash], index = self.trade.columns, name = str(row.name))
self.trade = self.trade.append(new_trade_row)
def _strat(self, row, curr_year):
fs = self.fs_data
prev_year = curr_year - 1
curr_rev = int(fs.loc[curr_year, '매출액'].replace(",",""))
prev_rev = int(fs.loc[prev_year, '매출액'].replace(",",""))
rev_growth=(curr_rev-prev_rev)/prev_rev
curr_ni = int(fs.loc[curr_year, '당기순이익'].replace(",",""))
prev_ni = int(fs.loc[prev_year, '당기순이익'].replace(",",""))
ni_growth=(curr_ni-prev_ni)/prev_ni
curr_asset = int(fs.loc[curr_year, '유동자산'].replace(",",""))
noncurr_asset = int(fs.loc[prev_year, "비유동자산"].replace(",",""))
curr_asset_rat = curr_asset / noncurr_asset
if (row.rsi<0.65) & (rev_growth>0.005) & (1.3< curr_asset_rat):# & (curr_asset_rat<2.3):
self._buy(row)
elif (row.Golden == False):
if ni_growth <= 0.001 :
if self.position:
self._sell(row)
# a=1
def run(self):
dates = self.indicator_data.index
fs = self.fs_data
#print(fs.index)
for date in dates:
curr_year = date.year
row = self.indicator_data.loc[date]
#print(curr_year, type(curr_year))
#pdb.set_trace()
try:
curr_fs_cell = fs.loc[curr_year].iloc[0].replace(",","")
try:
prev_fs_cell = fs.loc[curr_year-1].iloc[0].replace(",","")
except:
prev_fs_cell = None
except:
curr_fs_cell = None
if (curr_fs_cell == None) | (prev_fs_cell == None):
#print("fs data is empty")
continue
else:
#print(prev_fs_cell)
self._strat(row, curr_year)
for code in KOSPI_stock_code:
FS = load_data("FS_"+code)
indi = load_data("indicator_"+code)
today = dt.today()
strategy = Strategy()
strategy.set_data(indi, FS)
strategy.run()
buy = strategy.buy_signal
sell = strategy.sell_signal
unit = strategy.unit
remain_stock = buy['unit'].sum() - sell['unit'].sum()
remain = int(get_data(str(code)+".KS", today).iloc[0]['open'])*int(remain_stock)
total_buy = int((buy['open'].sum()))*unit
total_sell = int(sell['open'].sum())*unit
profit = int(remain) + int(total_sell) - int(total_buy)
if total_buy:
return_rate = profit / total_buy
trade = strategy.trade
total_return_per_day = trade['stock']+trade['cash']
residual = total_return_per_day - return_rate
sample_var = residual**2 / (trade.shape[0]-1)
sample_dev = np.sqrt(sample_var)
Rf=0.01
sharp = (return_rate - Rf) / (sample_dev)
results[code]['return'] = return_rate
results[code]['sharp'] = sharp
else:
print("No buy due to strict condition")
I have tried to make backtest code for investing into Korean stocks by using financial sheet and stock price sheet and indicator sheet.
And my code return error like the below.
UnboundLocalError Traceback (most recent call last)
<ipython-input-13-caf2b218f860> in <module>()
10 strategy = Strategy()
11 strategy.set_data(indi, FS)
---> 12 strategy.run()
13
14 buy = strategy.buy_signal
<ipython-input-12-2d41db386a22> in run(self)
84 curr_fs_cell = None
85
---> 86 if (curr_fs_cell == None) | (prev_fs_cell == None):
87 #print("fs data is empty")
88 continue
UnboundLocalError: local variable 'prev_fs_cell' referenced before assignment
Actually there is no global variable whose name is prev_fs_cell, but it is only in that class. Why this error occurs?

While and for loop with global variable not working Python updated

Working for single symbol
todate = zerodha.get_trade_day(datetime.now().astimezone(to_india) - timedelta(days=0))
fromdate = zerodha.get_trade_day(datetime.now().astimezone(to_india) - timedelta(days=5))
symbol = "ZINC20MAYFUT"
instype = "MCX"
Timeinterval = "5minute"
tradeDir = 0 #neutral
while (True):
histdata1 = zerodha.get_history(symbol, fromdate, todate, Timeinterval, instype)
df = pd.DataFrame(histdata1)
df = heikinashi(df)
df = bollinger_bands(df,field='h_close',period=20, numsd=2)
df1 =pd.DataFrame(df, columns=['date','volume','close','h_close','middle_band', 'upper_band'])
pp = pd.DataFrame(df1.tail(3))
print(pp)
dfCToList = pp['h_close'].tolist()
dfCList = list(pp['h_close'])
dfHValues = pp['h_close'].values
dfBMValues = pp['middle_band'].values
H_last = dfHValues[2] # tail 1
BM_last = dfBMValues[2] # tail 1
if (H_last > BM_last and (tradeDir == 0 or tradeDir == -1)):
print("buy")
tradeDir = 1 # up
if (H_last < BM_last and (tradeDir == 0 or tradeDir == 1)):
print("SELL")
tradeDir = -1 # down
# pdb.set_trace()
Question: When conditions meet its Printing "BUY/SELL" again and again. I want to just print a single time when condition meet the first time
todate = zerodha.get_trade_day(datetime.now().astimezone(to_india) - timedelta(days=0))
fromdate = zerodha.get_trade_day(datetime.now().astimezone(to_india) - timedelta(days=5))
tradeDir = 0 #neutral
def script():
global tradeDir
##For historical Data##
symbol = ["ZINC20MAYFUT" ,"CRUDEOIL20MAYFUT","GOLD20JUNFUT"]
instype = "MCX"
Timeinterval = "5minute"
for symbol in symbol:
global tradeDir
histdata1 = zerodha.get_history(symbol, fromdate, todate, Timeinterval, instype)
df = pd.DataFrame(histdata1)
df = heikinashi(df)
df = bollinger_bands(df,field='h_close',period=20, numsd=2)
df1 =pd.DataFrame(df, columns=['date','volume','close','h_close','middle_band', 'upper_band'])
pp = pd.DataFrame(df1.tail(3))
print(pp)
dfCToList = pp['h_close'].tolist()
dfCList = list(pp['h_close'])
dfHValues = pp['h_close'].values
dfBMValues = pp['middle_band'].values
H_last = dfHValues[2] # tail 1
BM_last = dfBMValues[2] # tail 1
if (H_last > BM_last and (tradeDir == 0 or tradeDir == -1)):
print("buy")
tradeDir = 1 # up
if (H_last < BM_last and (tradeDir == 0 or tradeDir == 1)):
print("SELL")
tradeDir = -1 # down
# pdb.set_trace()
while True:
try:
script()
except Exception as e:
sleep(2)
continue
When conditions meet its Printing "BUY/SELL" again and again. I want to just print a single time when condition meet the first time full Script and should run continuously
If you want the code to stop looping after the first time it prints "buy" or "SELL", you just need to add a break statement after each of the prints (inside the scope of the containing if blocks).

Single list.count instead of multiple

Im parsed list of crew witch one looks like:
20;mechanic;0;68
21;cook;0;43
22;scientist;0;79
23;manager;1;65
24;mechanic;1;41
etc
And now I'm trying to figure out how to count number of workers who have 60 or more stamina( the last element in each employee )
There is my code:
with open('employee.txt', 'r') as employee_list:
count = 0
for employee in employee_list.readlines():
employee_data = employee.rstrip().split(';')
if int(employee_data[3]) >= 60:
count += 1
print(count)
Print from terminal:
1
2
3
...
90
And there is the right answer I think, but is there anyway to get only one 'total' count, not a 90ty strings ?
Just print one line after the loop is done.
with open('employee.txt', 'r') as employee_list:
count = 0
for employee in employee_list.readlines():
employee_data = employee.rstrip().split(';')
if int(employee_data[3]) >= 60:
count += 1
print(count)
But I would also recommend using pandas for data manipulation. For example:
df = pd.read_csv('employee.txt', sep=';')
df.columns = ['col1', 'col2', 'col3', 'stamina']
Then just filter and get the size:
df[df.stamina >= 60].size
So after a day of thinking I wrote this and get right answer ( maybe someone will find this helpful):
def total_resist_count():
# with open('employee.txt', 'r') as employee_list:
employee_list = [input() for i in range(120)]
candidates = []
for employee in employee_list:
employee_data = employee.rstrip().split(';')
if int(employee_data[3]) >= 60:
candidates.append(employee_data)
return candidates
required_professionals = {
'computers specialist': 5,
'cook': 3,
'doctor': 5,
'electrical engineer': 4,
'manager': 1,
'mechanic': 8,
'scientist': 14
}
expedition_total = 40
female_min = 21
male_min = 12
def validate_solution(cur_team, num_females, num_males):
global expedition_total, female_min, male_min
if sum(cur_team) != expedition_total or num_females < female_min or num_males < male_min:
return False
num_of_free_vacancies = 0
for k in required_professionals:
num_of_free_vacancies += required_professionals[k]
if num_of_free_vacancies > 0:
return False
return True
TEAM = None
def backtrack(candidates, cur_team, num_females, num_males):
global required_professionals, expedition_total, TEAM
if sum(cur_team) > expedition_total or TEAM is not None:
return
if validate_solution(cur_team, num_females, num_males):
team = []
for i, used in enumerate(cur_team):
if used == 1:
team.append(candidates[i])
TEAM = team
return
for i in range(len(candidates)):
if cur_team[i] == 0 and required_professionals[candidates[i][1]] > 0:
cur_team[i] = 1
required_professionals[candidates[i][1]] -= 1
if candidates[i][2] == '1':
backtrack(candidates, cur_team, num_females, num_males + 1)
else:
backtrack(candidates, cur_team, num_females + 1, num_males)
required_professionals[candidates[i][1]] += 1
cur_team[i] = 0
if __name__ == '__main__':
ec = decode_fcc_message()
candidates = total_resist_count(ec)
cur_team = [0] * len(candidates)
backtrack(candidates, cur_team, 0, 0)
s = ""
for t in TEAM:
s += str(t[0]) + ';'
print(s)

Python Last 6 Results, removing the last

I just can't get it done. Therefore I'll post the full code.
The .csv used is from http://www.football-data.co.uk/mmz4281/1415/E0.csv
Now when run, the variables home_team_a, home_team_d, away_team_a and away_team_d are based on all of the previous matches but I want them to be based always on the last 6 matches.
import csv, math, ast, numpy as np
def poisson(actual, mean):
return math.pow(mean, actual) * math.exp(-mean) / math.factorial(actual)
csvFile = '20152016.csv'
team_list = []
k = open('team_list.txt', 'w')
k.write("""{
""")
csvRead = csv.reader(open(csvFile))
next(csvRead)
for row in csvRead:
if row[2] not in team_list:
team_list.append(row[2])
if row[3] not in team_list:
team_list.append(row[3])
team_list.sort()
for team in team_list:
k.write(""" '%s': {'home_goals': 0, 'away_goals': 0, 'home_conceded': 0, 'away_conceded': 0, 'home_games': 0, 'away_games': 0, 'alpha_h': 0, 'beta_h': 0, 'alpha_a': 0, 'beta_a': 0},
""" % (team))
k.write("}")
k.close()
s = open('team_list.txt', 'r').read()
dict = ast.literal_eval(s)
GAMES_PLAYED = 0
WEEKS_WAIT = 4
TOTAL_VALUE = 0
csvRead = csv.reader(open(csvFile))
next(csvRead)
for game in csvRead:
home_team = game[2]
away_team = game[3]
home_goals = int(game[4])
away_goals = int(game[5])
home_win_prob = 0
draw_win_prob = 0
away_win_prob = 0
curr_home_goals = 0
curr_away_goals = 0
avg_home_goals = 1
avg_away_goals = 1
team_bet = ''
ev_bet = ''
# GETTING UPDATED VARIABLES
for key, value in dict.items():
curr_home_goals += dict[key]['home_goals']
curr_away_goals += dict[key]['away_goals']
if GAMES_PLAYED > (WEEKS_WAIT * 10):
avg_home_goals = curr_home_goals / (GAMES_PLAYED)
avg_away_goals = curr_away_goals / (GAMES_PLAYED)
# CALCULATING FACTORS
if GAMES_PLAYED > (WEEKS_WAIT * 10):
home_team_a = (dict[home_team]['alpha_h'] + dict[home_team]['alpha_a']) / 2
away_team_a = (dict[away_team]['alpha_h'] + dict[away_team]['alpha_a']) / 2
home_team_d = (dict[home_team]['beta_h'] + dict[home_team]['beta_a']) / 2
away_team_d = (dict[away_team]['beta_h'] + dict[away_team]['beta_a']) / 2
home_team_exp = avg_home_goals * home_team_a * away_team_d
away_team_exp = avg_away_goals * away_team_a * home_team_d
# RUNNING POISSON
l = open('poisson.txt', 'w')
for i in range(10):
for j in range(10):
prob = poisson(i, home_team_exp) * poisson(j, away_team_exp)
l.write("Prob%s%s = %s\n" % (i, j, prob))
l.close()
with open('poisson.txt') as f:
for line in f:
home_goals_m = int(line.split(' = ')[0][4])
away_goals_m = int(line.split(' = ')[0][5])
prob = float(line.split(' = ')[1])
if home_goals_m > away_goals_m:
home_win_prob += prob
elif home_goals_m == away_goals_m:
draw_win_prob += prob
elif home_goals_m < away_goals_m:
away_win_prob += prob
#CALCULATE VALUE
bet365odds_h, bet365odds_d, bet365odds_a = float(game[23]), float(game[24]), float(game[25])
ev_h = (home_win_prob * (bet365odds_h - 1)) - (1 - home_win_prob)
ev_d = (draw_win_prob * (bet365odds_d - 1)) - (1 - draw_win_prob)
ev_a = (away_win_prob * (bet365odds_a - 1)) - (1 - away_win_prob)
highestEV = max(ev_h, ev_d, ev_a)
if (ev_h == highestEV) and (ev_h > 0):
team_bet = home_team
ev_bet = ev_h
if home_goals > away_goals:
TOTAL_VALUE += (bet365odds_h - 1)
else:
TOTAL_VALUE -= 1
elif (ev_d == highestEV) and (ev_d > 0):
team_bet = 'Draw'
ev_bet = ev_d
if home_goals == away_goals:
TOTAL_VALUE += (bet365odds_d - 1)
else:
TOTAL_VALUE -= 1
elif (ev_a == highestEV) and (ev_a > 0):
team_bet = away_team
ev_bet = ev_a
if home_goals < away_goals:
TOTAL_VALUE += (bet365odds_a - 1)
else:
TOTAL_VALUE -= 1
if (team_bet != '') and (ev_bet != ''):
print ("Bet on '%s' (EV = %s)" % (team_bet, ev_bet))
print (TOTAL_VALUE)
# UPDATE VARIABLES AFTER MATCH HAS BEEN PLAYED
dict[home_team]['home_goals'] += home_goals
dict[home_team]['home_conceded'] += away_goals
dict[home_team]['home_games'] += 1
dict[away_team]['away_goals'] += away_goals
dict[away_team]['away_conceded'] += home_goals
dict[away_team]['away_games'] += 1
GAMES_PLAYED += 1
# CREATE FACTORS
if GAMES_PLAYED > (WEEKS_WAIT * 10):
for key, value in dict.items():
alpha_h = (dict[key]['home_goals'] / dict[key]['home_games']) / avg_home_goals
beta_h = (dict[key]['home_conceded'] / dict[key]['home_games']) / avg_away_goals
alpha_a = (dict[key]['away_goals'] / dict[key]['away_games']) / avg_away_goals
beta_a = (dict[key]['away_conceded'] / dict[key]['away_games']) / avg_home_goals
dict[key]['alpha_h'] = alpha_h
dict[key]['beta_h'] = beta_h
dict[key]['alpha_a'] = alpha_a
dict[key]['beta_a'] = beta_a
Use a deque to keep the 6 most recent items in memory; adding a new record will "push out" the oldest one.
import collections
import itertools
import csv
with open("foo.csv") as fh:
# Skip the first 44 rows
csv_read = islice(csv.reader(fh), 44, None)
# Initialize the deque with the next 6 rows
d = collections.deque(islice(csv_read, 6), 6)
for record in csv_read:
d.append(record)
print(list(d)) # Rows 46-51, then 47-52, then 48-53, etc
Because you set the maximum length of the deque to 6, each append to a "full" deque pushes out the older one. On the first iteration, d.append pushes out row 45 and adds row 51. On the next iteration, adding row 52 pushes out row 46, etc.
In general, a deque is a data structure that is like a combination of a queue and a stack; you can add or remove items to either end efficiently, but accessing an arbitrary item or modifying the "middle" is slow. Here, we're taking advantage of the fact that appending to a full deque causes an implicit removal from the opposite end.
How about:
if seen_records == 200:
recs = list(csvRead)[seen_records - 6:seen_records + 1]
You can do something like this....
previous_index = 0
previous_max = 6 # max number of previous numbers to remember
previous = [None for _ in range(previous_max)]
csvFile = 'X.csv'
seen_records = 0
csvRead = csv.reader(open(csvFile))
# Enumerate over the records to keep track of the index of each one
for i, records in enumerate(csvRead):
if (i > 50):
seen_records =+ 1
if previous_index == previous_max:
previous_index = 0 # Reset to the beginning when we reach the end
# Store the record and increment the index to the next location
previous[previous_index] = record
previous_index += 1
This creates a very basic array of length previous_max and just stores the oldest data at index 0 and newest at previous_max -1.

Categories

Resources