I would like to simulate a seven game baseball playoff series. Let's say I have the the win probabilities for each game in the series. I would like to know the probabilities for each possible series outcome. ie TeamA in 4 games, TeamB in 4 games, TeamA in 5 games, etc.
This is what I came up with and it seems to work but I think it could be done better.
winPercGM1 = .5
winPercGM2 = .56
winPercGM3 = .47
winPercGM4 = .55
winPercGM5 = .59
winPercGM6 = .59
winPercGM7 = .38
winPercs = [winPercGM1, winPercGM2, winPercGM3, winPercGM4, winPercGM5, winPercGM6, winPercGM7]
def WinSeries():
teamAwins = 0
teamBwins = 0
for perc in winPercs:
if teamAwins == 4:
break
elif teamBwins == 4:
break
elif perc > np.random.random():
teamAwins += 1
else:
teamBwins += 1
return teamAwins, teamBwins
def RunFun(n):
teamAWins = []
teamBWins = []
for i in xrange(n):
result = WinSeries()
teamAWin = result[0]
teamBWin = result[1]
teamAWins.append(teamAWin)
teamBWins.append(teamBWin)
return teamAWins, teamBWins
n = 500000
results = RunFun(n)
teamAwinSeries = results[0]
teamBwinSeries = results[1]
teamBin4 = teamAwinSeries.count(0)/n
teamBin5 = teamAwinSeries.count(1)/n
teamBin6 = teamAwinSeries.count(2)/n
teamBin7 = teamAwinSeries.count(3) / n
teamAin4 = teamBwinSeries.count(0)/n
teamAin5 = teamBwinSeries.count(1)/n
teamAin6 = teamBwinSeries.count(2)/n
teamAin7 = teamBwinSeries.count(3) / n
This can be done easily with numpy (Python 2.7)
import numpy as np
probs = np.array([.5 ,.56 ,.47 ,.55 ,.59 ,.59 ,.38])
nsims = 500000
chance = np.random.uniform(size=(nsims, 7))
teamAWins = (chance > probs[None, :]).astype('i4')
teamBWins = 1 - teamAWins
teamAwincount = {}
teamBwincount = {}
for ngames in range(4, 8):
afilt = teamAWins[:, :ngames].sum(axis=1) == 4
bfilt = teamBWins[:, :ngames].sum(axis=1) == 4
teamAwincount[ngames] = afilt.sum()
teamBwincount[ngames] = bfilt.sum()
teamAWins = teamAWins[~afilt]
teamBWins = teamBWins[~bfilt]
teamAwinprops = {k : 1. * count/nsims for k, count in teamAwincount.iteritems()}
teamBwinprops = {k : 1. * count/nsims for k, count in teamBwincount.iteritems()}
Output:
>>> sum(teamAwinprops.values()) + sum(teamBwinprops.values())
1.0
>>> teamAwincount
{4: 26186, 5: 47062, 6: 59222, 7: 95381}
>>> teamBwincount
{4: 36187, 5: 79695, 6: 97802, 7: 58465}
Related
I am trying to create an indicator that will find all the divergences between 2 signals.
The output of the function so far looks like this
But the problem is that is painfully slow when I am trying to use it with long signals. Could any of you guys help me to make it faster if is possible?
My code:
def find_divergence(price: pd.Series, indicator: pd.Series, width_divergence: int, order: int):
div = pd.DataFrame(index=range(price.size), columns=[
f"Bullish_{width_divergence}_{order}",
f"Berish_{width_divergence}_{order}"
])
div[f'Bullish_idx_{width_divergence}_{order}'] = False
div[f'Berish_idx_{width_divergence}_{order}'] = False
def calc_argrelextrema(price_: np.numarray):
return argrelextrema(price_, np.less_equal, order=order)[0]
price_ranges = []
for i in range(len(price)):
price_ranges.append(price.values[0:i + 1])
f = []
with ThreadPoolExecutor(max_workers=16) as exe:
for i in price_ranges:
f.append(exe.submit(calc_argrelextrema, i))
prices_lows = SortedSet()
for r in concurrent.futures.as_completed(f):
data = r.result()
for d in reversed(data):
if d not in prices_lows:
prices_lows.add(d)
else:
break
price_lows_idx = pd.Series(prices_lows)
for idx_1 in range(price_lows_idx.size):
min_price = price[price_lows_idx[idx_1]]
min_indicator = indicator[price_lows_idx[idx_1]]
for idx_2 in range(idx_1 + 1, idx_1 + width_divergence):
if idx_2 >= price_lows_idx.size:
break
if price[price_lows_idx[idx_2]] < min_price:
min_price = price[price_lows_idx[idx_2]]
if indicator[price_lows_idx[idx_2]] < min_indicator:
min_indicator = indicator[price_lows_idx[idx_2]]
consistency_price_rd = min_price == price[price_lows_idx[idx_2]]
consistency_indicator_rd = min_indicator == indicator[price_lows_idx[idx_1]]
consistency_price_hd = min_price == price[price_lows_idx[idx_1]]
consistency_indicator_hd = min_indicator == indicator[price_lows_idx[idx_2]]
diff_price = price[price_lows_idx[idx_1]] - price[price_lows_idx[idx_2]] # should be neg
diff_indicator = indicator[price_lows_idx[idx_1]] - indicator[price_lows_idx[idx_2]] # should be pos
is_regular_divergence = diff_price > 0 and diff_indicator < 0
is_hidden_divergence = diff_price < 0 and diff_indicator > 0
if is_regular_divergence and consistency_price_rd and consistency_indicator_rd:
div.at[price_lows_idx[idx_2], f'Bullish_{width_divergence}_{order}'] = (price_lows_idx[idx_1], price_lows_idx[idx_2])
div.at[price_lows_idx[idx_2], f'Bullish_idx_{width_divergence}_{order}'] = True
elif is_hidden_divergence and consistency_price_hd and consistency_indicator_hd:
div.at[price_lows_idx[idx_2], f'Berish_{width_divergence}_{order}'] = (price_lows_idx[idx_1], price_lows_idx[idx_2])
div.at[price_lows_idx[idx_2], f'Berish_idx_{width_divergence}_{order}'] = True
return div
So I have an application in Python that calculates the variable number in the "PV = nRT" chemical equation. The code is like this:
r = 0.082
# Variables
p = float(input('Pressure = '))
p_unit = input('Unit = ')
print('_____________________')
v = float(input('Volume = '))
v_unit = input('Unit = ')
print('_____________________')
n = float(input('Moles = '))
print('_____________________')
t = float(input('Temperature = '))
t_unit = input('Unit = ')
# Unit Conversion
if p_unit == 'bar':
p = p * 0.987
if v_unit == 'cm3':
v = v / 1000
if v_unit == 'm3':
v = v * 1000
if t_unit == 'c':
t = t + 273
if t_unit == 'f':
t = ((t - 32) * (5 / 9)) + 273
# Solve Equation
def calc():
if p == 000:
return (n * r * t) / v
if v == 000:
return (n * r * t) / p
if n == 000:
return (p * v) / (r * t)
if t == 000:
return (p * v) / (n * r)
and then at the end I run the function to get the result. But the problem is I want to convert the result to a Scientific Number (e.g. 0.005 = 5 x 10^-3). I tried the solution below:
def conv_to_sci(num):
i = 0
if num > 10:
while num > 10:
num / 10
i = i - 1
if num < 10:
while num < 10:
num * 10
i = i + 1
return num + "x 10^" + i
but it didn't work. Any questions?
I'd just use numpy to get scientific notation
import numpy as np
num = 0.005
num_sc = np.format_float_scientific(num)
>>> num_sc
'5.e-03'
Use str.format
"{:.0e}".format(0.005)
This will print:
'5e-03'
Or,
def conv_to_sci(num):
i = 0
while int(num) != num:
num *= 10
i += 1
return "{0} x 10^{1}".format(int(num), i)
conv_to_sci(0.005)
Will give: '5 x 10^3'
l1 = number_of_employees - no_shift + 1
l2 = range(l1)
l3 = range(l1-2)
l4 = []
print("---------l2----value",l2)
shift_time = (24/no_shift)
shift_time1 = (24/no_shift1 )
shift_timing = []
shift_timing1 = []
days = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday","Saturday","Sunday"]
weekday = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"]
weekend = ["Saturday","Sunday"]
data= [[l3, [1], [2]],
[l3, [1], [2]],
[l2, [1], [2]],
[l2, [1], [2]],
[l2, [1], [2]]]
print(" ------data list-----", data)
data1 = [ [[1], [2]],
[[1], [2]]]
if number_of_employees < no_shift:
print ("Not possible to schedule with the given constraints")
exit()
for day in range(0, no_shift):
t = (start_time + shift_time)
if t > 24 :
t = (t-24)
a = (str(start_time) + '-' + str(t))
shift_timing.append(a)
if t < 24:
start_time = t
elif t >= 24:
start_time = 0
for day in range(0, no_shift1):
t = (start_time1 + shift_time1)
if t > 24 :
t = (t-24)
a = (str(start_time1) + '-' + str(t))
shift_timing1.append(a)
if t < 24:
start_time1 = t
elif t >= 24:
start_time1 = 0
employee_array = []
cfg.read('config.ini')
l = dict(cfg.items('staffData'))
employee_array = list(l.values())
orignal_employee_array = employee_array
print (json.dumps(employee_array))
flag = 1
if number_of_employees % 2 == 0:
employee_count_for_loop = (number_of_employees / 2 + 1)
elif number_of_employees % 2 != 0:
employee_count_for_loop = (number_of_employees + 1)
y = 0
z = 0
while flag < employee_count_for_loop:
print("------------------------------- WEEK", flag, '--------------------------------')
y = 0
for day in range(0, 7):
if day == 5 or day == 6:
z = 0
if number_of_employees < 5:
for shift in range(1, 2):
data1[y][z] = employee_array[shift + no_shift]
z += 1
else:
for shift in range(0, 2):
data1[y][z] = employee_array[shift + number_of_employees - 2]
z += 1
y+=1
elif day == 2 or day == 3 or day == 4:
for shift in range(0, no_shift):
p = shift
if shift == 0:
data[day][shift][shift] = employee_array[shift]
data[day][shift][shift] = json.dumps(data[day][shift][shift])
for shift_number in range(no_shift, number_of_employees ):
data[day][shift][p + 1] = employee_array[shift_number]
data[day][shift][p + 1] = json.dumps(data[day][shift][p + 1])
p = p + 1
else:
data[day][shift] = employee_array[shift]
else:
for shift in range(0, no_shift):
p = shift
if shift == 0:
data[day][shift][shift] = employee_array[shift]
data[day][shift][shift]= json.dumps(data[day][shift][shift])
for shift_number in range(no_shift, number_of_employees-2):
data[day][shift][p + 1] = employee_array[shift_number]
data[day][shift][p + 1] = json.dumps(data[day][shift][p + 1])
p = p + 1
else:
data[day][shift] = employee_array[shift]
print("------EmployeeArray---", employee_array)
employee_array = employee_array[-2:] + employee_array[:-2]
if employee_array == orignal_employee_array:
if no_shift == 2:
employees_array = employee_array[-1:] + employee_array[:-1]
orignal_employee_array = employee_array
employee_array = employee_array[-4:] + employee_array[:-4]
orignal_employee_array = employee_array
flag = flag + 1
print ("*****WEEKDAY*****")
midx = pd.MultiIndex.from_product([weekday, shift_timing])
test = pd.DataFrame(data, index=weekday ,columns=shift_timing)
test1 = tabulate(test, headers=shift_timing1, tablefmt='orgtbl')
l4.append(test)
print ("*****WEEKEND*****")
midx = pd.MultiIndex.from_product([weekend, shift_timing1])
test = pd.DataFrame(data1, index=weekend, columns=shift_timing1)
test2 = (tabulate(test,headers=shift_timing1,tablefmt='orgtbl'))
l4.append(test)
pd.concat(l4,ignore_index = False).to_csv('file1.csv')
I tried implementing it using pandas and csv writer
I want each dataFrame/table to have it own index not a common index not able to find the solution for this anywhere in the internet.
I even tried csv writer the way its getting printed in the csv file is not correct.
I got the answer by combining the functionality of both, csvwriter and pandas,
with the following line:
df.to_csv(path_or_buf=csv_file)
YOU can refer to this link for more information: https://www.journaldev.com/33511/pandas-to_csv-convert-dataframe-to-csv
Im parsed list of crew witch one looks like:
20;mechanic;0;68
21;cook;0;43
22;scientist;0;79
23;manager;1;65
24;mechanic;1;41
etc
And now I'm trying to figure out how to count number of workers who have 60 or more stamina( the last element in each employee )
There is my code:
with open('employee.txt', 'r') as employee_list:
count = 0
for employee in employee_list.readlines():
employee_data = employee.rstrip().split(';')
if int(employee_data[3]) >= 60:
count += 1
print(count)
Print from terminal:
1
2
3
...
90
And there is the right answer I think, but is there anyway to get only one 'total' count, not a 90ty strings ?
Just print one line after the loop is done.
with open('employee.txt', 'r') as employee_list:
count = 0
for employee in employee_list.readlines():
employee_data = employee.rstrip().split(';')
if int(employee_data[3]) >= 60:
count += 1
print(count)
But I would also recommend using pandas for data manipulation. For example:
df = pd.read_csv('employee.txt', sep=';')
df.columns = ['col1', 'col2', 'col3', 'stamina']
Then just filter and get the size:
df[df.stamina >= 60].size
So after a day of thinking I wrote this and get right answer ( maybe someone will find this helpful):
def total_resist_count():
# with open('employee.txt', 'r') as employee_list:
employee_list = [input() for i in range(120)]
candidates = []
for employee in employee_list:
employee_data = employee.rstrip().split(';')
if int(employee_data[3]) >= 60:
candidates.append(employee_data)
return candidates
required_professionals = {
'computers specialist': 5,
'cook': 3,
'doctor': 5,
'electrical engineer': 4,
'manager': 1,
'mechanic': 8,
'scientist': 14
}
expedition_total = 40
female_min = 21
male_min = 12
def validate_solution(cur_team, num_females, num_males):
global expedition_total, female_min, male_min
if sum(cur_team) != expedition_total or num_females < female_min or num_males < male_min:
return False
num_of_free_vacancies = 0
for k in required_professionals:
num_of_free_vacancies += required_professionals[k]
if num_of_free_vacancies > 0:
return False
return True
TEAM = None
def backtrack(candidates, cur_team, num_females, num_males):
global required_professionals, expedition_total, TEAM
if sum(cur_team) > expedition_total or TEAM is not None:
return
if validate_solution(cur_team, num_females, num_males):
team = []
for i, used in enumerate(cur_team):
if used == 1:
team.append(candidates[i])
TEAM = team
return
for i in range(len(candidates)):
if cur_team[i] == 0 and required_professionals[candidates[i][1]] > 0:
cur_team[i] = 1
required_professionals[candidates[i][1]] -= 1
if candidates[i][2] == '1':
backtrack(candidates, cur_team, num_females, num_males + 1)
else:
backtrack(candidates, cur_team, num_females + 1, num_males)
required_professionals[candidates[i][1]] += 1
cur_team[i] = 0
if __name__ == '__main__':
ec = decode_fcc_message()
candidates = total_resist_count(ec)
cur_team = [0] * len(candidates)
backtrack(candidates, cur_team, 0, 0)
s = ""
for t in TEAM:
s += str(t[0]) + ';'
print(s)
I just can't get it done. Therefore I'll post the full code.
The .csv used is from http://www.football-data.co.uk/mmz4281/1415/E0.csv
Now when run, the variables home_team_a, home_team_d, away_team_a and away_team_d are based on all of the previous matches but I want them to be based always on the last 6 matches.
import csv, math, ast, numpy as np
def poisson(actual, mean):
return math.pow(mean, actual) * math.exp(-mean) / math.factorial(actual)
csvFile = '20152016.csv'
team_list = []
k = open('team_list.txt', 'w')
k.write("""{
""")
csvRead = csv.reader(open(csvFile))
next(csvRead)
for row in csvRead:
if row[2] not in team_list:
team_list.append(row[2])
if row[3] not in team_list:
team_list.append(row[3])
team_list.sort()
for team in team_list:
k.write(""" '%s': {'home_goals': 0, 'away_goals': 0, 'home_conceded': 0, 'away_conceded': 0, 'home_games': 0, 'away_games': 0, 'alpha_h': 0, 'beta_h': 0, 'alpha_a': 0, 'beta_a': 0},
""" % (team))
k.write("}")
k.close()
s = open('team_list.txt', 'r').read()
dict = ast.literal_eval(s)
GAMES_PLAYED = 0
WEEKS_WAIT = 4
TOTAL_VALUE = 0
csvRead = csv.reader(open(csvFile))
next(csvRead)
for game in csvRead:
home_team = game[2]
away_team = game[3]
home_goals = int(game[4])
away_goals = int(game[5])
home_win_prob = 0
draw_win_prob = 0
away_win_prob = 0
curr_home_goals = 0
curr_away_goals = 0
avg_home_goals = 1
avg_away_goals = 1
team_bet = ''
ev_bet = ''
# GETTING UPDATED VARIABLES
for key, value in dict.items():
curr_home_goals += dict[key]['home_goals']
curr_away_goals += dict[key]['away_goals']
if GAMES_PLAYED > (WEEKS_WAIT * 10):
avg_home_goals = curr_home_goals / (GAMES_PLAYED)
avg_away_goals = curr_away_goals / (GAMES_PLAYED)
# CALCULATING FACTORS
if GAMES_PLAYED > (WEEKS_WAIT * 10):
home_team_a = (dict[home_team]['alpha_h'] + dict[home_team]['alpha_a']) / 2
away_team_a = (dict[away_team]['alpha_h'] + dict[away_team]['alpha_a']) / 2
home_team_d = (dict[home_team]['beta_h'] + dict[home_team]['beta_a']) / 2
away_team_d = (dict[away_team]['beta_h'] + dict[away_team]['beta_a']) / 2
home_team_exp = avg_home_goals * home_team_a * away_team_d
away_team_exp = avg_away_goals * away_team_a * home_team_d
# RUNNING POISSON
l = open('poisson.txt', 'w')
for i in range(10):
for j in range(10):
prob = poisson(i, home_team_exp) * poisson(j, away_team_exp)
l.write("Prob%s%s = %s\n" % (i, j, prob))
l.close()
with open('poisson.txt') as f:
for line in f:
home_goals_m = int(line.split(' = ')[0][4])
away_goals_m = int(line.split(' = ')[0][5])
prob = float(line.split(' = ')[1])
if home_goals_m > away_goals_m:
home_win_prob += prob
elif home_goals_m == away_goals_m:
draw_win_prob += prob
elif home_goals_m < away_goals_m:
away_win_prob += prob
#CALCULATE VALUE
bet365odds_h, bet365odds_d, bet365odds_a = float(game[23]), float(game[24]), float(game[25])
ev_h = (home_win_prob * (bet365odds_h - 1)) - (1 - home_win_prob)
ev_d = (draw_win_prob * (bet365odds_d - 1)) - (1 - draw_win_prob)
ev_a = (away_win_prob * (bet365odds_a - 1)) - (1 - away_win_prob)
highestEV = max(ev_h, ev_d, ev_a)
if (ev_h == highestEV) and (ev_h > 0):
team_bet = home_team
ev_bet = ev_h
if home_goals > away_goals:
TOTAL_VALUE += (bet365odds_h - 1)
else:
TOTAL_VALUE -= 1
elif (ev_d == highestEV) and (ev_d > 0):
team_bet = 'Draw'
ev_bet = ev_d
if home_goals == away_goals:
TOTAL_VALUE += (bet365odds_d - 1)
else:
TOTAL_VALUE -= 1
elif (ev_a == highestEV) and (ev_a > 0):
team_bet = away_team
ev_bet = ev_a
if home_goals < away_goals:
TOTAL_VALUE += (bet365odds_a - 1)
else:
TOTAL_VALUE -= 1
if (team_bet != '') and (ev_bet != ''):
print ("Bet on '%s' (EV = %s)" % (team_bet, ev_bet))
print (TOTAL_VALUE)
# UPDATE VARIABLES AFTER MATCH HAS BEEN PLAYED
dict[home_team]['home_goals'] += home_goals
dict[home_team]['home_conceded'] += away_goals
dict[home_team]['home_games'] += 1
dict[away_team]['away_goals'] += away_goals
dict[away_team]['away_conceded'] += home_goals
dict[away_team]['away_games'] += 1
GAMES_PLAYED += 1
# CREATE FACTORS
if GAMES_PLAYED > (WEEKS_WAIT * 10):
for key, value in dict.items():
alpha_h = (dict[key]['home_goals'] / dict[key]['home_games']) / avg_home_goals
beta_h = (dict[key]['home_conceded'] / dict[key]['home_games']) / avg_away_goals
alpha_a = (dict[key]['away_goals'] / dict[key]['away_games']) / avg_away_goals
beta_a = (dict[key]['away_conceded'] / dict[key]['away_games']) / avg_home_goals
dict[key]['alpha_h'] = alpha_h
dict[key]['beta_h'] = beta_h
dict[key]['alpha_a'] = alpha_a
dict[key]['beta_a'] = beta_a
Use a deque to keep the 6 most recent items in memory; adding a new record will "push out" the oldest one.
import collections
import itertools
import csv
with open("foo.csv") as fh:
# Skip the first 44 rows
csv_read = islice(csv.reader(fh), 44, None)
# Initialize the deque with the next 6 rows
d = collections.deque(islice(csv_read, 6), 6)
for record in csv_read:
d.append(record)
print(list(d)) # Rows 46-51, then 47-52, then 48-53, etc
Because you set the maximum length of the deque to 6, each append to a "full" deque pushes out the older one. On the first iteration, d.append pushes out row 45 and adds row 51. On the next iteration, adding row 52 pushes out row 46, etc.
In general, a deque is a data structure that is like a combination of a queue and a stack; you can add or remove items to either end efficiently, but accessing an arbitrary item or modifying the "middle" is slow. Here, we're taking advantage of the fact that appending to a full deque causes an implicit removal from the opposite end.
How about:
if seen_records == 200:
recs = list(csvRead)[seen_records - 6:seen_records + 1]
You can do something like this....
previous_index = 0
previous_max = 6 # max number of previous numbers to remember
previous = [None for _ in range(previous_max)]
csvFile = 'X.csv'
seen_records = 0
csvRead = csv.reader(open(csvFile))
# Enumerate over the records to keep track of the index of each one
for i, records in enumerate(csvRead):
if (i > 50):
seen_records =+ 1
if previous_index == previous_max:
previous_index = 0 # Reset to the beginning when we reach the end
# Store the record and increment the index to the next location
previous[previous_index] = record
previous_index += 1
This creates a very basic array of length previous_max and just stores the oldest data at index 0 and newest at previous_max -1.