Nested for loop using dict - python

I am doing a Coursera python exercise and having trouble writing my code.
The question is as following:
Write a program to read through the mbox-short.txt and figure out who has the sent the greatest number of mail messages. The program looks for 'From ' lines and takes the second word of those lines as the person who sent the mail.
The program creates a Python dictionary that maps the sender's mail address to a count of the number of times they appear in the file. After the dictionary is produced, the program reads through the dictionary using a maximum loop to find the most prolific committer.
The sample text file is in this line:
http://www.pythonlearn.com/code/mbox-short.txt
And the expected output should be:
cwen#iupui.edu 5
This is my code:
name = raw_input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
count = dict()
for line in handle:
word = line.split()
if line.startswith('From '):
email = word[1]
for sender in email:
if sender not in count:
count[sender] = count.get(sender, 0) + 1
bigcount = None
bigname = None
for name,count in count.items():
if bigname is None or count > bigcount:
bigname = name
bigcount = count
print bigname, bigcount
The output I have is:
. 1
I think there is something wrong in "for sender in email" part, but couldn't figure out how it results in the undesired output.

The following loop is not appropriate in this situation because you are basically iterating over all the characters of the email address.
for sender in email:
...
That is why you are getting a character . when you print the email address with the largest count. You can easily see the effects once you print the count at the end of the loop.
Following checking is also redundant as you are implicitly checking it when you are getting the dictionary value with get method.
if sender not in count:
...
So, the final corrected code should be something like this.
name = raw_input("Enter file:")
if len(name) < 1:
name = "mbox-short.txt"
handle = open(name)
count = dict()
for line in handle:
word = line.split()
if line.startswith('From '):
count[word[1]] = count.get(word[1], 0) + 1
largest = 0
email = ''
for k in count:
if count[k] > largest:
largest = count[k]
email = k
print largest, email

fname = input("Enter The File Name")
fhandle = open(fname,'r')
sender = dict()
for line in fhandle:
if line.startswith("From "):
sender[line.split()[1]] = sender.get(line.split()[1],0) + 1
max_key = None
max_val = None
for key,value in sender.items():
if max_val is None or max_val < value :
max_val = value
max_key = key
print(max_key,max_val)

name = raw_input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
words = list()
counts = dict()
for line in handle:
words = line.split()
if words == []: continue
if words[0] != 'From': continue
counts[words[1]] = counts.get(words[1],0) + 1
#print counts
maxval = None
maxkey = None
for kee, val in counts.items():
if maxval == None: maxval = val
if maxval < val:
maxval = val
maxkey = kee
print maxkey, maxval

name = raw_input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
fl = open(name)
#fl=open('C:\Users\Algoritm\Documents\Python Coursera\mbox-short.txt')
lst=list()
count=dict()
#scan the file and create a list
for lines_in_the_file in fl:
xx=lines_in_the_file.rstrip().split()
if not lines_in_the_file.startswith('From '): continue #if in the line keep it
word=lines_in_the_file.split()
#print word[1]
xx=word[1]
#for index in xx: #find repeted words in the list Word
lst.append(xx)
#print lst
lis=lst
for x in lis:
count[x]=count.get(x,0)+1
#print count
bigcount=None
bigwords=None
for x, y in count.items():
if bigcount is None or y>bigcount:
bigwords=x
bigcount=y
print bigwords, bigcount

name = input("Enter the file name:")
handle = open(name)
new = dict()
#count = 0
for line in handle:
word = line.split()
if line.startswith("From "):
new[word[1]] = new.get(word[1],0) + 1
largest = 0
email = None
for k,v in new.items():
if email is None or v > largest:
largest = v
email = k
print (email,largest)

fname=input('enter the file name: ')
d=dict()
try:
fhand=open(fname,'r')
except:
print('file not found')
exit()
for line in fhand:
if line.startswith("From:"):
srt=line.find(' ')
sl=line[srt:-1]
if sl not in d:
d[sl]=1
else:
d[sl]+=1
print(d)
largest= 0
email=''
for key in d:
if d[key] > largest:
largest=d[key]
email=key
print(email,': ',largest)

I am taking the same Coursera Python course. Since I am new at it, I am sharing my code for the Assignment. To me the key part was first to use if not line, then split it.
counts=dict()
fname=input('Enter file: ')
if len(fname)<1:
fname='mbox-short.txt'
else:
print('Error')
quit()
fhand=open(fname)
for line in fhand:
if not line.startswith('From '):
continue
words=line.split()
counts[words[1]]=counts.get(words[1],0)+1
key=None
num=0
for k,v in counts.items():
if key is None or v > num:
num=v
key=k
print (num, key)

name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
name = "mbox-short.txt"
handle = open(name)
text = handle.read()
#words = text.split()
words = list()
for line in handle:
if not line.startswith("From:") : continue
line = line.split()
words.append(line[1])
counts = dict()
for word in words:
counts[word] = counts.get(word, 0) + 1
maxval = None
maxkey = None
for key,val in counts.items() :
# if maxval == None : maxval = val
if val > maxval:
maxval = val
maxkey = key
print (maxkey, maxval)

counts = dict()
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
fhand = open(name)
for line in fhand:
line = line.rstrip()
if not line.startswith('From ') : continue
words = line.split()
counts[words[1]]=counts.get(words[1],0)+1
st = 0
for k in counts:
if counts[k] > st :
st = counts[k]
addy = k
print (addy, st)

Related

Python - Write a new row for each list data under same header into csv

I have a text file, 'student.txt'. Some keys have multiple values. I only want data that is tied to the name, and the sibling & hobby values below that name.
'student.txt'
ignore me
name-> Alice
name-> Sam
sibling-> Kate,
unwanted
sibling-> Luke,
hobby_1-> football
hobby_2-> games
name-> Ramsay
hobby_1-> dance
unwanted data
hobby_2-> swimming
hobby_3-> jogging
ignore data
Code I've done:
file = open("student.txt", "r")
with open("student.csv", "w") as writer:
main_dict = {}
student_dict = {"Siblings": "N/A", "Hobbies": "N/A"}
sibling_list = []
hobby_list = []
flag = True
writer.write ('name,siblings,hobbies\n')
header = 'Name,Siblings,Hobbies'.split(',')
sib_str = ''
hob_str =''
for eachline in file:
try:
key, value = eachline.split("-> ")
value = value.strip(",\n")
if flag:
if key == "name":
print (key,value)
if len(sibling_list) > 0:
main_dict[name]["Siblings"] = sib_str
#print (main_dict)
if len(hobby_list) > 0:
main_dict[name]["Hobbies"] = hob_str
sibling_list = []
hobby_list = []
name = value
main_dict[name] = student_dict.copy()
main_dict[name]["Name"] = name
elif key == "sibling":
sibling_list.append(value)
sib_str= ' '.join(sibling_list).replace(' ', '\n')
elif key.startswith("hobby"):
hobby_list.append(value)
hob_str = ' '.join(hobby_list)
if len(sibling_list) > 0:
main_dict[name]["Siblings"] = sib_str
if len(hobby_list) > 0:
main_dict[name]["Hobbies"] = hob_str
if 'name' in eachline:
if 'name' in eachline:
flag = True
else:
flag = False
except:
pass
for eachname in main_dict.keys():
for eachkey in header:
writer.write(str(main_dict[eachname][eachkey]))
writer.write (',')
if 'Hobbies' in eachkey:
writer.write ('\n')
CSV Output from Code above:
Expected CSV Output:
P.S: I can't seem to figure out how to not forgo the try/pass. As some lines (without '->') are unwanted, and I can't use the eachline.split("-> "). Would appreciate help on this too.
Thanks so much!
The code below gives the csv file which you can import in your Excel and it will be in exact format you are expecting.
You can use something like
if "->" not in line:
continue
To skip lines that don't contain "->" values, see in the code below:
import csv
file = open("student.txt", "r")
students = {}
name = ""
for line in file:
if "->" not in line:
continue
line = line.strip(",\n")
line = line.replace(" ", "")
key, value = line.split("->")
if key == "name":
name = value
students[name] = {}
students[name]["siblings"] = []
students[name]["hobbies"] = []
else:
if "sibling" in key:
students[name]["siblings"].append(value)
elif "hobby" in key:
students[name]["hobbies"].append(value)
#print(students)
csvlines = []
for student in students:
name = student
hobbies = students[name]["hobbies"]
siblings = students[name]["siblings"]
maxlength = 0
if len(hobbies) > len(siblings) :
maxlength = len(hobbies)
else:
maxlength = len(siblings)
if maxlength == 0:
csvlines.append([name, "N/A", "N/A"])
continue
for i in range(maxlength):
if i < len(siblings):
siblingvalue = siblings[i]
elif i == len(siblings):
siblingvalue = "N/A"
else:
siblingvalue = ""
if i < len(hobbies):
hobbyvalue = hobbies[i]
elif i == len(siblings):
hobbyvalue = "N/A"
else:
hobbyvalue = ""
if i == 0:
csvlines.append([name, siblingvalue, hobbyvalue])
else:
csvlines.append(["", siblingvalue, hobbyvalue])
print(csvlines)
fields = ["name", "siblings", "hobbies"]
with open("students.csv", 'w') as csvfile:
# creating a csv writer object
csvwriter = csv.writer(csvfile)
# writing the fields
csvwriter.writerow(fields)
# writing the data rows
csvwriter.writerows(csvlines)

Write a program to read through the mbox-short.txt and figure out the distribution by hour of the day for each of the messages

10.2 Write a program to read through the mbox-short.txt and figure out the distribution by hour of the day for each of the messages. You can pull the hour out from the 'From ' line by finding the time and then splitting the string a second time using a colon.
From stephen.marquard#uct.ac.za Sat Jan 5 09:14:16 2008
Once you have accumulated the counts for each hour, print out the counts, sorted by hour as shown below.
My Code:
fname = input("Enter file:")
fhandle = open(fname)
dic={}
for line in fhandle:
if not line.startswith("From "):
continue
else:
line=line.split()
line=line[5] # accesing the list using index and splitting it
line=line[0:2]
for bline in line:
dic[bline]=dic.get(bline,0)+1 # Using this line we created a dictionary having keys and values
#Now it's time to access the dictionary and sort in some way.
lst=[]
for k1,v1 in dic.items(): # dictionary er key value pair access korar jonno items method use kora hoyechhe
lst.append((k1,v1)) # dictionary er keys and corresponding values ke lst te append korlam
lst.sort() #lst take sort korlam. sorting is done through key
#print(lst)
for k1,v1 in lst: # we are able to access this list using key value pair as it was basically a dictionary before, It is just appended
print(k1,v1)
#print(dic)
#print(dic)
Desired Output:
04 3
06 1
07 1
09 2
10 3
11 6
14 1
15 2
16 4
17 2
18 1
19 1
My Output:
enter image description here
I don't understand what's going wrong.
Working Code. Break down the code in to simple form as much i can. So it will be easy to understand for you.
d = dict()
lst = list()
fname = input('enter the file name : ')
try:
fopen = open(fname,'r')
except:
print('wrong file name !!!')
for line in fopen:
stline = line.strip()
if stline.startswith('From:'):
continue
elif stline.startswith('From'):
spline = stline.split()
time = spline[5]
tsplit = time.split(':')
t1 = tsplit[0].split()
for t in t1:
if t not in d:
d[t] = 1
else:
d[t] = d[t] + 1
for k,v in d.items():
lst.append((k,v))
lst = sorted(lst)
for k,v in lst:
print(k,v)
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
emailcount = dict()
for line in handle:
if not line.startswith("From "): continue
line = line.split()
line = line[1]
emailcount[line] = emailcount.get(line, 0) +1
bigcount = None
bigword = None
for word,count in emailcount.items():
if bigcount == None or count > bigcount:
bigcount = count
bigword = word
print(bigword, bigcount)
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
counts = {}
for line in handle:
word = line.split()
if len(word) < 3 or word[0] != "From" : continue
full_hour = word[5]
hour = full_hour.split(":")
hour = str(hour[:1])
hour = hour[2:4]
if hour in counts :
counts[hour] = 1 + counts[hour]
else :
counts.update({hour:1})
lst = list()
for k, v in counts.items():
new_tup = (k, v)
lst.append(new_tup)
lst = sorted(lst)
for k, v in lst:
print(k,v)
counts=dict()
fill=open("mbox-short.txt")
for line in fill :
if line.startswith("From "):
x=line.split()
b=x[5]
y=b.split(":")
f=y[0]
counts[f]=counts.get(f,0)+1
l=list()
for k,v in counts.items():
l.append((k,v))
l.sort()
for k,v in l:
print(k,v)
I listen carefully in the online lessons, This is my code by what i learned in class. I think it will be easy for you to understand.
fn = input('Please enter file: ')
if len(fn) < 1: fn = 'mbox-short.txt'
hand = open(fn)
di = dict()
for line in hand:
ls = line.strip()
wds = line.split()
if 'From' in wds and len(wds) > 2:
hours = ls.split()
hour = hours[-2].split(':')
ch = hour[0]
di[ch] = di.get(ch, 0) + 1
tmp = list()
for h,t in di.items():
newt = (h,t)
tmp.append(newt)
tmp = sorted(tmp)
for h,t in tmp:
print(h,t)
name = input("Enter file:")
if len(name) < 1:
name = "mbox-short.txt"
handle = open(name)
counts = dict()
for line in handle:
line = line.strip()
if not line.startswith("From ") : continue
line = line.split()
hr = line[5].split(":")
hr = hr[0:1]
for piece in hr:
counts[piece] = counts.get(piece,0) + 1
lst = list()
for k,v in counts.items():
lst.append((k,v))
lst = sorted(lst)
for k,v in lst:
print(k,v)
fname = input("Enter file:")
fhandle = open(fname)
dic={}
for line in fhandle:
if not line.startswith('From '):
continue
else:
line=line.split()
line=line[5] # accesing the list using index and splitting it
line=line.split(':')
bline=line[0]
#for bline in line:
#print(bline)
dic[bline]=dic.get(bline,0)+1 # Using this line we created a
dictionary having keys and values
#Now it's time to access the dictionary and sort in some way.
lst=[]
for k1,v1 in dic.items(): # dictionary er key value pair access korar jonno items method use kora hoyechhe
lst.append((k1,v1)) # dictionary er keys and corresponding values ke lst te append korlam
lst.sort() #lst take sort korlam. sorting is done through key
#print(lst)
for k1,v1 in lst: # we are able to access this list using key value pair as it was basically a dictionary before, It is just appended
print(k1,v1)
#print(dic)
#print(dic)

How do I extract floating point values from .txt file to use for calculation in python?

count = 0
fname = input("Enter file name: ")
fh = open(fname)
for line in fh:
if line.startswith("X-DSPAM-Confidence:") :
print(line)
count = count + 1
print(count)
There is a file with 27 lines like X-DSPAM-Confidence : 0.xxxxx, I need to extract the numerical value from each of them to be used for calculations.
Try to use split(':'):
Code:
count = 0
fname = input("Enter file name: ")
fh = open(fname)
for line in fh:
if line.startswith("X-DSPAM-Confidence:") :
print(line)
value = line.split(':')[-1] # will split line into 'X-DSPAM-Confidence' and 'value'
# if you have ',' at the end of the line, simply do this:
value = value.strip(',')
value = float(value)
print(value)
count = count + 1
print(count)
As long as the format is exactly as you described it, you can use the code below:
float(line.split(':')[1])
If there's more variation in the text than what you described, you might need to try regex.
You can use str.rfind(':') to get the position of : and then do a string slice to get the value.
count = 0
fname = input("Enter file name: ")
fh = open(fname)
for line in fh:
if line.startswith("X-DSPAM-Confidence:") :
print(line)
value = line[line.rfind(':'):] # will take the last occurrence of : to slice the line
print(value)
count = count + 1
print(count)
fname = input("Enter file name: ")
fh = open(fname)
count = 0
pos = 0
ans = None
total = 0
for line in fh:
if not line.startswith("X-DSPAM-Confidence:") :
continue
else :
count = count + 1
pos = line.find(':')
ans = line[pos+1 : ]
total = total + float(ans)
avg = total/count
fname = input("Enter file name: ")
fh = open(fname)
val = 0
count = 0
for line in fh:
if line.startswith("X-DSPAM-Confidence:") :
count = count + 1
val=val + float(line[line.find('0'):])
elif not line.startswith("X-DSPAM-Confidence:") :
continue
print("Average spam confidence:",val/count)
fname = input("Enter file name:")
fh = open(fname)
count = 0
s=0
for line in fh:
if not line.startswith("X-DSPAM-Confidence:"):
continue
count = count+1
pos = line.find('0')
floatingP = float(line[pos:])
s += floatingP
print(s/count)

Invalid Output for Coursera Python Assignment

I'm doing the Coursera Python for Everybody stream and I'm stuck on Assignment 10.2. I'm getting invalid output for it. Here is what the assignment asks:
Write a program to read through the mbox-short.txt and figure out the
distribution by hour of the day for each of the messages. You can pull
the hour out from the 'From ' line by finding the time and then
splitting the string a second time using a colon.
From stephen.marquard#uct.ac.za Sat Jan 5 09:14:16 2008
Once you have accumulated the counts for each hour, print out the
counts, sorted by hour as shown below.
Here is my code:
name = raw_input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
counts = dict()
lst = list()
for line in handle:
line = line.rstrip()
if not line.startswith('From '):
continue
words = line.split()
words = words[5]
words = words.split(":")
for word in counts:
counts[word] = counts.get(word, 0) + 1
lst = list()
for key, val in counts.items():
lst.append((key, val))
lst.sort()
print lst
Let me know what I'm doing wrong. Any advice or hint is appreciated.
I think you are iterating through the wrong thing in the inner loop: it should be for word in words, not for word in counts.
name = raw_input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
hours = dict()
for line in handle:
if line.startswith("From "):
hour = line.split()[5].split(':')[0]
hours[hour] = hours.get(hour, 0) + 1
for key, value in sorted(hours.items(), None):
print key, value
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
counts = {}
for line in handle:
if not line.startswith("From "):continue
time = line.split()
time = time[5]
hour = time.split(':')
hour = hour[0]
counts[hour] = counts.get(hour, 0) + 1
for k, v in sorted(counts.items()):
print (k,v)
counts = dict()
for line in handle:
if line.startswith ('From '):
words = line.split()
hour=words[5].split(':')
counts[hour[0]]= counts.get(hour[0],0)+1
lst=list()
for key, val in counts.items():
lst.append((key,val))
lst=sorted(lst)
for a,b in lst:
print (a,b)
name =input("Enter file:")
if len(name) < 1:
name = "mbox-short.txt"
handle = open(name)
counts = dict()
for line in handle:
if line.startswith("From "):
time = line.split()[5].split(":")
counts [time[0]] = counts.get(time[0], 0) + 1
#print sorted( [ (v,k) for k,v in counts.items()] )
list = list()
for key, value in counts.items():
list.append( (key,value) )
list.sort()
for hour, counts in list:
print (hour, counts)
file = open('words.txt')
dic = dict()
lst = list()
for line in file :
line = line.rstrip()
if not line.startswith('From '):
continue
words = line.split()
words= words[5].split(':')
words = words[0]
dic[words] = dic.get(words,0)+1
for k,v in dic.items():
lst.append((k,v))
lst.sort()
for k,v in lst:
print(k,v)
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
g={}
for line in handle :
if not line.startswith('From'): continue
werds=line.split()[5:6]
for werd in werds :
we=werd.split(':')[0]
g[we]=g.get(we,0)+1
lst=list()
for v,k in g.items() :
new=(v,k)
lst.append(new)
lst=sorted(lst)
for v,k in lst :
print(v,k)
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
one = dict()
for line1 in handle:
if line1.startswith("From "):
lst1 = line1.split()
lst2 = lst1[5].split(":")
word = lst2[0]
one[word] = one.get(word,0) + 1
lst3 = list()
for k,v in one.items():
tup = (k,v)
lst3.append(tup)
lst3 = sorted(lst3,)
for k,v in lst3:
print(k,v)
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
h = dict()
for line in handle:
if line.startswith('From '):
l = line.split()[5].split(':')[0]
h[l] = h.get(l, 0) +1
for k,v in sorted(h.items(), None):
print(k,v)
This worked for me:-
Opening the file
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
lst = list()
counts = dict()
Splitting each line of the file to get words, then word at 6th place, then in that first letter of that 6th word splitted by ':', appending that into the list
for lines in handle:
if not lines.startswith('From '): continue
words = lines.split()
words = words[5]
words = words.split(':')
lst.append(words[0])
Now counting the letters occurred at different no. of times
for i in lst:
counts[i] = counts.get(i,0) + 1
Finally sorting them by key(here time)
for k, v in sorted(counts.items()):
print(k,v)
name = input("Enter file: ")
if len(name) < 1:
name = "mbox-short.txt"
handle = open(name)
hours = dict()
for line in handle:
# Skipping lines we don't need
if not line.startswith("From "):
continue
words = line.split()
# Finding text in the line that we need
time = words[5]
time = time.split(":")
hour_time = time[0]
# Adding to the dictionary and checking if it already there
hours[hour_time] = hours.get(hour_time, 0) + 1
#Sorting dictionary using sorted() method
hours_sorted = sorted(hours.items())
for key, value in sorted(hours.items()):
print(key, value)
name = raw_input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
hours = dict()
for line in handle:
if line.startswith("From "):
hour = line.split()[5].split(':')[0]
hours[hour] = hours.get(hour, 0) + 1
for key, value in sorted(hours.items(), None):
print key, value

compare an exact word with the txt file

i am trying to get the exact word match from my file along with their line no.
like when i search for abc10 it gives me all the possible answers e.g abc102 abc103 etc
how can i limitize my code to only print what i commanded..
here is my code!
lineNo = 0
linesFound = []
inFile= open('rxmop.txt', 'r')
sKeyword = input("enter word ")
done = False
while not done :
pos = inFile.tell()
sLine = inFile.readline()
if sLine == "" :
done = True
break
if (sLine.find( sKeyword ) != -1):
print ("Found at line: "+str(lineNo))
tTuple = lineNo, pos
linesFound.append( tTuple )
lineNo = lineNo + 1
done = False
while not done :
command = int( input("Enter the line you want to view: ") )
if command == -1 :
done = True
break
for tT in linesFound :
if command == tT[0] :
inFile.seek( tT[1] )
lLine = inFile.readline()
print ("The line at position " + str(tT[1]) + "is: " + lLine)
"like when i search for abc10 it gives me all the possible answers e.g abc102 abc103 etc"
You split each record and compare whole "words" only.
to_find = "RXOTG-10"
list_of_possibles = ["RXOTG-10 QTA5777 HYB SY G12",
"RXOTG-100 QTA9278 HYB SY G12"]
for rec in list_of_possibles:
words_list=rec.strip().split()
if to_find in words_list:
print "found", rec
else:
print " NOT found", rec

Categories

Resources