First of all sorry for any common mistake. I'm not a native english speaker and I'm new to python.
As lug_par is a list with 4 items, the second for loop should do 4 iterations. In the third for loop the condition of the if is true so it should execute the break and get out of the loop. My problem is that it doesn't get into the third iteration, as if the len(lug_par) was 2 and not 4. It jumps straight into the cont_y = cont_y + 1
import re
string = "R(95DHS(60PST_35FDP_05MTR)_05A(95DHS"
lug_par = [s.start() for s in re.finditer('\(', string)]
lug_par_cierra = [s.start() for s in re.finditer('\)', string)]
cont_y = 0
for i in (0, len(lug_par_cierra)):
cont_x = 0
for j in (0, len(lug_par)):
if(lug_par[cont_x] > lug_par_cierra[cont_y]):
c = lug_par.index(cont_x)
borra = lug_par_cierra.index(c)
break
else:
print lug_par[cont_x]
cont_x = cont_x + 1
cont_y = cont_y + 1
I think you just forgot the calls to range() in the for-loops. It should work as you expect once they are corrected.
Here is the corrected code:
import re
string = "R(95DHS(60PST_35FDP_05MTR)_05A(95DHS"
lug_par = [s.start() for s in re.finditer('\(', string)]
lug_par_cierra = [s.start() for s in re.finditer('\)', string)]
cont_y = 0
for i in range(0, len(lug_par_cierra)): #range() added
cont_x = 0
for j in range(0, len(lug_par)): #range() added
if(lug_par[cont_x] > lug_par_cierra[cont_y]):
c = lug_par.index(cont_x)
borra = lug_par_cierra.index(c)
break
else:
print lug_par[cont_x]
cont_x = cont_x + 1
cont_y = cont_y + 1
Related
Im parsed list of crew witch one looks like:
20;mechanic;0;68
21;cook;0;43
22;scientist;0;79
23;manager;1;65
24;mechanic;1;41
etc
And now I'm trying to figure out how to count number of workers who have 60 or more stamina( the last element in each employee )
There is my code:
with open('employee.txt', 'r') as employee_list:
count = 0
for employee in employee_list.readlines():
employee_data = employee.rstrip().split(';')
if int(employee_data[3]) >= 60:
count += 1
print(count)
Print from terminal:
1
2
3
...
90
And there is the right answer I think, but is there anyway to get only one 'total' count, not a 90ty strings ?
Just print one line after the loop is done.
with open('employee.txt', 'r') as employee_list:
count = 0
for employee in employee_list.readlines():
employee_data = employee.rstrip().split(';')
if int(employee_data[3]) >= 60:
count += 1
print(count)
But I would also recommend using pandas for data manipulation. For example:
df = pd.read_csv('employee.txt', sep=';')
df.columns = ['col1', 'col2', 'col3', 'stamina']
Then just filter and get the size:
df[df.stamina >= 60].size
So after a day of thinking I wrote this and get right answer ( maybe someone will find this helpful):
def total_resist_count():
# with open('employee.txt', 'r') as employee_list:
employee_list = [input() for i in range(120)]
candidates = []
for employee in employee_list:
employee_data = employee.rstrip().split(';')
if int(employee_data[3]) >= 60:
candidates.append(employee_data)
return candidates
required_professionals = {
'computers specialist': 5,
'cook': 3,
'doctor': 5,
'electrical engineer': 4,
'manager': 1,
'mechanic': 8,
'scientist': 14
}
expedition_total = 40
female_min = 21
male_min = 12
def validate_solution(cur_team, num_females, num_males):
global expedition_total, female_min, male_min
if sum(cur_team) != expedition_total or num_females < female_min or num_males < male_min:
return False
num_of_free_vacancies = 0
for k in required_professionals:
num_of_free_vacancies += required_professionals[k]
if num_of_free_vacancies > 0:
return False
return True
TEAM = None
def backtrack(candidates, cur_team, num_females, num_males):
global required_professionals, expedition_total, TEAM
if sum(cur_team) > expedition_total or TEAM is not None:
return
if validate_solution(cur_team, num_females, num_males):
team = []
for i, used in enumerate(cur_team):
if used == 1:
team.append(candidates[i])
TEAM = team
return
for i in range(len(candidates)):
if cur_team[i] == 0 and required_professionals[candidates[i][1]] > 0:
cur_team[i] = 1
required_professionals[candidates[i][1]] -= 1
if candidates[i][2] == '1':
backtrack(candidates, cur_team, num_females, num_males + 1)
else:
backtrack(candidates, cur_team, num_females + 1, num_males)
required_professionals[candidates[i][1]] += 1
cur_team[i] = 0
if __name__ == '__main__':
ec = decode_fcc_message()
candidates = total_resist_count(ec)
cur_team = [0] * len(candidates)
backtrack(candidates, cur_team, 0, 0)
s = ""
for t in TEAM:
s += str(t[0]) + ';'
print(s)
Basically in the last for loop the k variable uses the number of items in the list and then I have a false and unique answer rather than multiple answers I want to do some sort of n roots of a complex number (if my question isn't clear sorry i'm not a native english speaker I'll do my best to make it clearer)
from math import *
deg = int(input("entrez le degré:"))
re = int(input("le réel:"))
im = int(input("l'imaginaire:"))
counter = 0
while counter < deg:
counter = counter + 1
kkk = []
kkk.append(counter)
r = sqrt(pow(re,2)+pow(im,2))
if im != 0:
teton = round(pi/radians(degrees(acos(re/r))),1)
else:
teton = round(pi/radians(degrees(acos(im/r))),1)
if round(r) != r:
r = "sqrt(",(pow(re,2)+pow(im,2)),")"
else:
r = r
teta = "pi/%s" %teton
print("z = ",r,"e^i",teta,)
for k in kkk:
if re != 0 or im != 0:
print(r,"e^i*2*",teta,"*",k,"pi")
else:
print(r,"^1/",deg,"e^i(",teta,"/",deg," +(2",k,"pi)/",deg)
print(k)
If I understood the problem correctly, you are saying that for loop is not iterating over all the items in the list kkk.
if you check your code the list kkk always have only one item as you are initializing and appending item in same loop.
please move below statement out of the first loop.
kkk = []
like below.
from math import *
deg = int(input("entrez le degré:"))
re = int(input("le réel:"))
im = int(input("l'imaginaire:"))
counter = 0
kkk = []
while counter < deg:
counter = counter + 1
kkk.append(counter)
r = sqrt(pow(re,2)+pow(im,2))
if im != 0:
teton = round(pi/radians(degrees(acos(re/r))),1)
else:
teton = round(pi/radians(degrees(acos(im/r))),1)
if round(r) != r:
r = "sqrt(",(pow(re,2)+pow(im,2)),")"
else:
r = r
teta = "pi/%s" %teton
print("z = ",r,"e^i",teta,)
for k in kkk:
if re != 0 or im != 0:
print(r,"e^i*2*",teta,"*",k,"pi")
else:
print(r,"^1/",deg,"e^i(",teta,"/",deg," +(2",k,"pi)/",deg)
print(k)
I just can't get it done. Therefore I'll post the full code.
The .csv used is from http://www.football-data.co.uk/mmz4281/1415/E0.csv
Now when run, the variables home_team_a, home_team_d, away_team_a and away_team_d are based on all of the previous matches but I want them to be based always on the last 6 matches.
import csv, math, ast, numpy as np
def poisson(actual, mean):
return math.pow(mean, actual) * math.exp(-mean) / math.factorial(actual)
csvFile = '20152016.csv'
team_list = []
k = open('team_list.txt', 'w')
k.write("""{
""")
csvRead = csv.reader(open(csvFile))
next(csvRead)
for row in csvRead:
if row[2] not in team_list:
team_list.append(row[2])
if row[3] not in team_list:
team_list.append(row[3])
team_list.sort()
for team in team_list:
k.write(""" '%s': {'home_goals': 0, 'away_goals': 0, 'home_conceded': 0, 'away_conceded': 0, 'home_games': 0, 'away_games': 0, 'alpha_h': 0, 'beta_h': 0, 'alpha_a': 0, 'beta_a': 0},
""" % (team))
k.write("}")
k.close()
s = open('team_list.txt', 'r').read()
dict = ast.literal_eval(s)
GAMES_PLAYED = 0
WEEKS_WAIT = 4
TOTAL_VALUE = 0
csvRead = csv.reader(open(csvFile))
next(csvRead)
for game in csvRead:
home_team = game[2]
away_team = game[3]
home_goals = int(game[4])
away_goals = int(game[5])
home_win_prob = 0
draw_win_prob = 0
away_win_prob = 0
curr_home_goals = 0
curr_away_goals = 0
avg_home_goals = 1
avg_away_goals = 1
team_bet = ''
ev_bet = ''
# GETTING UPDATED VARIABLES
for key, value in dict.items():
curr_home_goals += dict[key]['home_goals']
curr_away_goals += dict[key]['away_goals']
if GAMES_PLAYED > (WEEKS_WAIT * 10):
avg_home_goals = curr_home_goals / (GAMES_PLAYED)
avg_away_goals = curr_away_goals / (GAMES_PLAYED)
# CALCULATING FACTORS
if GAMES_PLAYED > (WEEKS_WAIT * 10):
home_team_a = (dict[home_team]['alpha_h'] + dict[home_team]['alpha_a']) / 2
away_team_a = (dict[away_team]['alpha_h'] + dict[away_team]['alpha_a']) / 2
home_team_d = (dict[home_team]['beta_h'] + dict[home_team]['beta_a']) / 2
away_team_d = (dict[away_team]['beta_h'] + dict[away_team]['beta_a']) / 2
home_team_exp = avg_home_goals * home_team_a * away_team_d
away_team_exp = avg_away_goals * away_team_a * home_team_d
# RUNNING POISSON
l = open('poisson.txt', 'w')
for i in range(10):
for j in range(10):
prob = poisson(i, home_team_exp) * poisson(j, away_team_exp)
l.write("Prob%s%s = %s\n" % (i, j, prob))
l.close()
with open('poisson.txt') as f:
for line in f:
home_goals_m = int(line.split(' = ')[0][4])
away_goals_m = int(line.split(' = ')[0][5])
prob = float(line.split(' = ')[1])
if home_goals_m > away_goals_m:
home_win_prob += prob
elif home_goals_m == away_goals_m:
draw_win_prob += prob
elif home_goals_m < away_goals_m:
away_win_prob += prob
#CALCULATE VALUE
bet365odds_h, bet365odds_d, bet365odds_a = float(game[23]), float(game[24]), float(game[25])
ev_h = (home_win_prob * (bet365odds_h - 1)) - (1 - home_win_prob)
ev_d = (draw_win_prob * (bet365odds_d - 1)) - (1 - draw_win_prob)
ev_a = (away_win_prob * (bet365odds_a - 1)) - (1 - away_win_prob)
highestEV = max(ev_h, ev_d, ev_a)
if (ev_h == highestEV) and (ev_h > 0):
team_bet = home_team
ev_bet = ev_h
if home_goals > away_goals:
TOTAL_VALUE += (bet365odds_h - 1)
else:
TOTAL_VALUE -= 1
elif (ev_d == highestEV) and (ev_d > 0):
team_bet = 'Draw'
ev_bet = ev_d
if home_goals == away_goals:
TOTAL_VALUE += (bet365odds_d - 1)
else:
TOTAL_VALUE -= 1
elif (ev_a == highestEV) and (ev_a > 0):
team_bet = away_team
ev_bet = ev_a
if home_goals < away_goals:
TOTAL_VALUE += (bet365odds_a - 1)
else:
TOTAL_VALUE -= 1
if (team_bet != '') and (ev_bet != ''):
print ("Bet on '%s' (EV = %s)" % (team_bet, ev_bet))
print (TOTAL_VALUE)
# UPDATE VARIABLES AFTER MATCH HAS BEEN PLAYED
dict[home_team]['home_goals'] += home_goals
dict[home_team]['home_conceded'] += away_goals
dict[home_team]['home_games'] += 1
dict[away_team]['away_goals'] += away_goals
dict[away_team]['away_conceded'] += home_goals
dict[away_team]['away_games'] += 1
GAMES_PLAYED += 1
# CREATE FACTORS
if GAMES_PLAYED > (WEEKS_WAIT * 10):
for key, value in dict.items():
alpha_h = (dict[key]['home_goals'] / dict[key]['home_games']) / avg_home_goals
beta_h = (dict[key]['home_conceded'] / dict[key]['home_games']) / avg_away_goals
alpha_a = (dict[key]['away_goals'] / dict[key]['away_games']) / avg_away_goals
beta_a = (dict[key]['away_conceded'] / dict[key]['away_games']) / avg_home_goals
dict[key]['alpha_h'] = alpha_h
dict[key]['beta_h'] = beta_h
dict[key]['alpha_a'] = alpha_a
dict[key]['beta_a'] = beta_a
Use a deque to keep the 6 most recent items in memory; adding a new record will "push out" the oldest one.
import collections
import itertools
import csv
with open("foo.csv") as fh:
# Skip the first 44 rows
csv_read = islice(csv.reader(fh), 44, None)
# Initialize the deque with the next 6 rows
d = collections.deque(islice(csv_read, 6), 6)
for record in csv_read:
d.append(record)
print(list(d)) # Rows 46-51, then 47-52, then 48-53, etc
Because you set the maximum length of the deque to 6, each append to a "full" deque pushes out the older one. On the first iteration, d.append pushes out row 45 and adds row 51. On the next iteration, adding row 52 pushes out row 46, etc.
In general, a deque is a data structure that is like a combination of a queue and a stack; you can add or remove items to either end efficiently, but accessing an arbitrary item or modifying the "middle" is slow. Here, we're taking advantage of the fact that appending to a full deque causes an implicit removal from the opposite end.
How about:
if seen_records == 200:
recs = list(csvRead)[seen_records - 6:seen_records + 1]
You can do something like this....
previous_index = 0
previous_max = 6 # max number of previous numbers to remember
previous = [None for _ in range(previous_max)]
csvFile = 'X.csv'
seen_records = 0
csvRead = csv.reader(open(csvFile))
# Enumerate over the records to keep track of the index of each one
for i, records in enumerate(csvRead):
if (i > 50):
seen_records =+ 1
if previous_index == previous_max:
previous_index = 0 # Reset to the beginning when we reach the end
# Store the record and increment the index to the next location
previous[previous_index] = record
previous_index += 1
This creates a very basic array of length previous_max and just stores the oldest data at index 0 and newest at previous_max -1.
lines=[]
count1 = 0
count2 = 0
count3 = 0
count4 = 0
count5 = 0
count6 = 0
count7 = 0
count8 = 0
count9 = 0
allcount = 0
with open('city_all.txt', 'r') as file:
for line in file:
lines.append(line.strip())
for x in range(0,len(lines)):
if lines[x].isdigit():
allcount+=1
string = lines[x]
if string[0]=="1":
count1+=1
elif string[0]=="2":
count2+=1
elif string[0]=="3":
count3+=1
elif string[0]=="4":
count4+=1
elif string[0]=="5":
count5+=1
elif string[0]=="6":
count6+=1
elif string[0]=="7":
count7+=1
elif string[0]=="8":
count8+=1
elif string[0]=="9":
count9+=1
print(count1/allcount)
print('{:.1%}'.format(count1/allcount))
Wondering if there is anyway to not have to declare all my variables, and compact all the if statements?Trying to make a program to help compute Benfold's law, so I am putting a txt file into a list, then going through each element and checking what the starting digit is.
You can simplify it a bit:
counts = [0 for _ in range (10) ]
with open('city_all.txt', 'r') as f:
for line in (x.strip () for x in f):
if line.isdigit():
allcount += 1
try: counts[int(line)] += 1
except IndexError: pass
I have 67000 files, I need to read them and extract similarities between the words, but when I run the code my laptop becomes much slower, I can't open any other application, and then a memory overflow error shows up (even when I run on around 10 000 of the files). Is there a way to clear the memory after every for loop maybe, or will running the code on all files be impossible to do? Below is the code:
def isAscii(s):
for c in s:
if c not in string.printable:
return False
return True
windowSize = 2
relationTable = {}
probabilities = {}
wordCount = {}
totalWordCount = 0
def sim(w1, w2):
numerator = 0
denominator = 0
if (w1 in relationTable) and (w2 in relationTable):
rtw1 = {}
rtw2 = {}
rtw1 = relationTable[w1]
rtw2 = relationTable[w2]
for word in rtw1:
rtw1_PMI = rtw1[word]['pairPMI']
denominator += rtw1_PMI
if(word in rtw2):
rtw2_PMI = rtw2[word]['pairPMI']
numerator += (rtw1_PMI + rtw2_PMI)
for word in rtw2:
rtw2_PMI = rtw2[word]['pairPMI']
denominator += rtw2_PMI
if(denominator != 0):
return float(numerator)/denominator
else:
return 0
else:
return -1
AllNotes = {}
AllNotes = os.listdir("C:/Users/nerry-san/Desktop/EECE 502/MedicalNotes")
fileStopPunctuations = open('C:/Users/nerry-san/Desktop/EECE 502/stopPunctuations.txt')
stopPunctuations = nltk.word_tokenize(fileStopPunctuations.read())
for x in range (0, 10):
fileToRead = open('C:/Users/nerry-san/Desktop/EECE 502/MedicalNotes/%s'%(AllNotes[x]))
case1 = fileToRead.read()
text = nltk.WordPunctTokenizer().tokenize(case1.lower())
final_text = []
for index in range(len(text)):
word = text[index]
if (word not in stopPunctuations):
final_text.append(word)
for index in range (len(final_text)):
w1 = final_text[index]
if(isAscii(w1)):
for index2 in range(-windowSize, windowSize+1):
if (index2 != 0):
if ( index + index2 ) in range (0, len(final_text)):
w2 = final_text[index + index2]
if(isAscii(w2)):
totalWordCount += 1
if (w1 not in wordCount):
wordCount[w1] = {}
wordCount[w1]['wCount'] = 0
try:
wordCount[w1][w2]['count'] += 1
wordCount[w1]['wCount'] += 1
except KeyError:
wordCount[w1][w2] = {'count':1}
wordCount[w1]['wCount'] += 1
for word in wordCount:
probabilities[word]={}
probabilities[word]['wordProb'] = float (wordCount[word]['wCount'])/ totalWordCount
for word in wordCount:
relationTable[word] = {}
for word2 in wordCount[word]:
if ( word2 != 'wCount'):
pairProb = float(wordCount[word][word2]['count'])/(wordCount[word]['wCount'])
relationTable[word][word2] = {}
relationTable[word][word2]['pairPMI'] = math.log(float(pairProb)/(probabilities[word]['wordProb'] * probabilities[word2]['wordProb']),2)
l = []
for word in relationTable:
l.append(word)
for index in range (0, len(l)):
word = l[index]
simValues = []
for index2 in range (0, len(l)):
word2 = l[index2]
if(word!= word2):
simVal = sim(word,word2)
if(simVal > 0):
simValues.append([word2, simVal])
simValues.sort(key= operator.itemgetter(1), reverse = True)
Every time you open a file, use the "with" statement. This will ensure the file is closed when the loop finishes (or rather when the with block is exited.