I'm doing the Coursera Python for Everybody stream and I'm stuck on Assignment 10.2. I'm getting invalid output for it. Here is what the assignment asks:
Write a program to read through the mbox-short.txt and figure out the
distribution by hour of the day for each of the messages. You can pull
the hour out from the 'From ' line by finding the time and then
splitting the string a second time using a colon.
From stephen.marquard#uct.ac.za Sat Jan 5 09:14:16 2008
Once you have accumulated the counts for each hour, print out the
counts, sorted by hour as shown below.
Here is my code:
name = raw_input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
counts = dict()
lst = list()
for line in handle:
line = line.rstrip()
if not line.startswith('From '):
continue
words = line.split()
words = words[5]
words = words.split(":")
for word in counts:
counts[word] = counts.get(word, 0) + 1
lst = list()
for key, val in counts.items():
lst.append((key, val))
lst.sort()
print lst
Let me know what I'm doing wrong. Any advice or hint is appreciated.
I think you are iterating through the wrong thing in the inner loop: it should be for word in words, not for word in counts.
name = raw_input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
hours = dict()
for line in handle:
if line.startswith("From "):
hour = line.split()[5].split(':')[0]
hours[hour] = hours.get(hour, 0) + 1
for key, value in sorted(hours.items(), None):
print key, value
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
counts = {}
for line in handle:
if not line.startswith("From "):continue
time = line.split()
time = time[5]
hour = time.split(':')
hour = hour[0]
counts[hour] = counts.get(hour, 0) + 1
for k, v in sorted(counts.items()):
print (k,v)
counts = dict()
for line in handle:
if line.startswith ('From '):
words = line.split()
hour=words[5].split(':')
counts[hour[0]]= counts.get(hour[0],0)+1
lst=list()
for key, val in counts.items():
lst.append((key,val))
lst=sorted(lst)
for a,b in lst:
print (a,b)
name =input("Enter file:")
if len(name) < 1:
name = "mbox-short.txt"
handle = open(name)
counts = dict()
for line in handle:
if line.startswith("From "):
time = line.split()[5].split(":")
counts [time[0]] = counts.get(time[0], 0) + 1
#print sorted( [ (v,k) for k,v in counts.items()] )
list = list()
for key, value in counts.items():
list.append( (key,value) )
list.sort()
for hour, counts in list:
print (hour, counts)
file = open('words.txt')
dic = dict()
lst = list()
for line in file :
line = line.rstrip()
if not line.startswith('From '):
continue
words = line.split()
words= words[5].split(':')
words = words[0]
dic[words] = dic.get(words,0)+1
for k,v in dic.items():
lst.append((k,v))
lst.sort()
for k,v in lst:
print(k,v)
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
g={}
for line in handle :
if not line.startswith('From'): continue
werds=line.split()[5:6]
for werd in werds :
we=werd.split(':')[0]
g[we]=g.get(we,0)+1
lst=list()
for v,k in g.items() :
new=(v,k)
lst.append(new)
lst=sorted(lst)
for v,k in lst :
print(v,k)
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
one = dict()
for line1 in handle:
if line1.startswith("From "):
lst1 = line1.split()
lst2 = lst1[5].split(":")
word = lst2[0]
one[word] = one.get(word,0) + 1
lst3 = list()
for k,v in one.items():
tup = (k,v)
lst3.append(tup)
lst3 = sorted(lst3,)
for k,v in lst3:
print(k,v)
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
h = dict()
for line in handle:
if line.startswith('From '):
l = line.split()[5].split(':')[0]
h[l] = h.get(l, 0) +1
for k,v in sorted(h.items(), None):
print(k,v)
This worked for me:-
Opening the file
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
lst = list()
counts = dict()
Splitting each line of the file to get words, then word at 6th place, then in that first letter of that 6th word splitted by ':', appending that into the list
for lines in handle:
if not lines.startswith('From '): continue
words = lines.split()
words = words[5]
words = words.split(':')
lst.append(words[0])
Now counting the letters occurred at different no. of times
for i in lst:
counts[i] = counts.get(i,0) + 1
Finally sorting them by key(here time)
for k, v in sorted(counts.items()):
print(k,v)
name = input("Enter file: ")
if len(name) < 1:
name = "mbox-short.txt"
handle = open(name)
hours = dict()
for line in handle:
# Skipping lines we don't need
if not line.startswith("From "):
continue
words = line.split()
# Finding text in the line that we need
time = words[5]
time = time.split(":")
hour_time = time[0]
# Adding to the dictionary and checking if it already there
hours[hour_time] = hours.get(hour_time, 0) + 1
#Sorting dictionary using sorted() method
hours_sorted = sorted(hours.items())
for key, value in sorted(hours.items()):
print(key, value)
name = raw_input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
hours = dict()
for line in handle:
if line.startswith("From "):
hour = line.split()[5].split(':')[0]
hours[hour] = hours.get(hour, 0) + 1
for key, value in sorted(hours.items(), None):
print key, value
Related
The text file looks like
data/File_10265.data:
Apple:2kg
Apple:3kg
Banana:1kg
Banana:4kg
Some string1
data/File_10276.data:
Apple:6kg
Apple:5kg
Apple:3kg
Banana:2kg
Banana:4kg
Banana:2kg
Banana:4kg
Extra line
data/File_10278.data:
Apple:3kg
Banana:2kg
Banana:4kg
Banana:2kg
Banana:7kg
Some words
The code is as follows:
import re
import pandas as pd
f = open("Samplefruit.txt", "r")
lines = f.readlines()
Apple_count=0
Banana_count=0
File_count=0
Filename_list=[]
Apple_list=[]
Banana_list=[]
for line in lines:
match1=re.findall('data/(?P<File>[^\/]+(?=\..*data))',line)
if match1:
Filename_list.append(match1[0])
print('Match found:',match1)
if line.startswith("Apple"):
Apple_count+=1
elif line.startswith("Banana"):
Banana_count+=1
Apple_list.append(Apple_count)
Banana_list.append(Banana_count)
df=pd.DataFrame({'Filename': Filename_list,'Apple':
Apple_list,'Banana':
Banana_list})
The desired output:
Filename: |Apple |Banana
File_10265|2 |2
File_10276|3 |4
File_10278|1 |4
Maybe there is a more efficient way to do this but here's one solution:
with open('filetest.txt') as f:
lines = f.readlines()
unique_lines = list(dict.fromkeys(lines))
for line in unique_lines:
print(line + str(lines.count(line)))
f1 = open('file.txt', 'a')
f1.write(line + str(lines.count(line)))
f1.close()
You simply open the file, read all lines into a list, then get rid of any duplicates. Then you loop through the list (now with the duplicates removed), and use the .count (docs) function to get the number of occurrences of each unique item in the list.
Try this,
pattern = re.compile(r"data/File_[\d]+.data:")
lines = text.split("\n")
files = itertools.groupby(lines, lambda line:pattern.search(line) == None)
for k, content in files:
if k == True:
content = list(content)
all_words = list(set(content))
counts = {word:content.count(word) for word in all_words if word != ""}
print(counts)
Output -
{'Banana:': 2, 'Apple:': 2}
{'Banana:': 4, 'Apple:': 3}
{'Banana:': 4, 'Apple:': 1}
NOTE: New changes have been made to the code as per the changes in the question.
Try this:
import re
text = {}
def unit_cal(val1, val2): #function to add quantities with units and return the final answer with units
q1 = re.findall("[0-9]+", val1)
unit = re.findall("[a-zA-Z]+", val1)
if (val2 != False):
q2 = re.findall("[0-9]+", val2)
ans = int(q1[0]) + int(q2[0])
else:
ans = int(q1[0])
return str(ans) + unit[0] #remove + unit[0] to return only the value
with open("item.txt", "r") as f1:
for line in f1:
if ("data" in line):
temp_key = line
k = {}
text[temp_key] = k
elif (line.strip() != ""):
temp_word = line.strip().split(":")
if temp_word[0] in text[temp_key]:
text[temp_key][temp_word[0]] = unit_cal(temp_word[1], text[temp_key][temp_word[0]])
else:
text[temp_key][temp_word[0]] = unit_cal(temp_word[1], False)
final_text = ""
for main_key in text:
final_text += main_key + "\n"
for sub_key in text[main_key]:
final_text += sub_key + " : " + str(text[main_key][sub_key]) + "\n\n"
print(final_text) #final output is displayed in the idle
with open("new_items.txt", "w") as f2:
f2.write(final_text) #the output is also written to a new file
Output:
data/File_10265.data:
Apple : 5kg
Banana : 5kg
data/File_10276.data:
Apple : 14kg
Banana : 12kg
data/File_10278.data:
Apple : 3kg
Banana : 15kg
Here, I have posted an answer. Thanks, #Mani,#CarySwoveland, #Zero, and #M B for your support. The code is as follows:
import pandas as pd
text = {}
with open(r"Samplefruit.txt", "r") as file:
for line in file:
if "data" in line:
Filename=line.split('/')[-1].split('.')[0]
Apple_count=0
Banana_count=0
print('----------------')
print(Filename)
elif ("Apple" in line or "Banana" in line):
if line.startswith("Apple"):
Apple_count+=1
elif line.startswith("Banana"):
Banana_count+=1
print('Apple:',Apple_count)
print('Banana:',Banana_count)
text[Filename] = {'Apple':Apple_count,'Banana':Banana_count}
File_list.append(Filename)
df = pd.DataFrame(
{"Filename": text.keys(), "Apple": [x['Apple'] for x in text.values()],"Banana": [x['Banana'] for x in text.values()]}
)
print(df)
10.2 Write a program to read through the mbox-short.txt and figure out the distribution by hour of the day for each of the messages. You can pull the hour out from the 'From ' line by finding the time and then splitting the string a second time using a colon.
From stephen.marquard#uct.ac.za Sat Jan 5 09:14:16 2008
Once you have accumulated the counts for each hour, print out the counts, sorted by hour as shown below.
My Code:
fname = input("Enter file:")
fhandle = open(fname)
dic={}
for line in fhandle:
if not line.startswith("From "):
continue
else:
line=line.split()
line=line[5] # accesing the list using index and splitting it
line=line[0:2]
for bline in line:
dic[bline]=dic.get(bline,0)+1 # Using this line we created a dictionary having keys and values
#Now it's time to access the dictionary and sort in some way.
lst=[]
for k1,v1 in dic.items(): # dictionary er key value pair access korar jonno items method use kora hoyechhe
lst.append((k1,v1)) # dictionary er keys and corresponding values ke lst te append korlam
lst.sort() #lst take sort korlam. sorting is done through key
#print(lst)
for k1,v1 in lst: # we are able to access this list using key value pair as it was basically a dictionary before, It is just appended
print(k1,v1)
#print(dic)
#print(dic)
Desired Output:
04 3
06 1
07 1
09 2
10 3
11 6
14 1
15 2
16 4
17 2
18 1
19 1
My Output:
enter image description here
I don't understand what's going wrong.
Working Code. Break down the code in to simple form as much i can. So it will be easy to understand for you.
d = dict()
lst = list()
fname = input('enter the file name : ')
try:
fopen = open(fname,'r')
except:
print('wrong file name !!!')
for line in fopen:
stline = line.strip()
if stline.startswith('From:'):
continue
elif stline.startswith('From'):
spline = stline.split()
time = spline[5]
tsplit = time.split(':')
t1 = tsplit[0].split()
for t in t1:
if t not in d:
d[t] = 1
else:
d[t] = d[t] + 1
for k,v in d.items():
lst.append((k,v))
lst = sorted(lst)
for k,v in lst:
print(k,v)
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
emailcount = dict()
for line in handle:
if not line.startswith("From "): continue
line = line.split()
line = line[1]
emailcount[line] = emailcount.get(line, 0) +1
bigcount = None
bigword = None
for word,count in emailcount.items():
if bigcount == None or count > bigcount:
bigcount = count
bigword = word
print(bigword, bigcount)
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
counts = {}
for line in handle:
word = line.split()
if len(word) < 3 or word[0] != "From" : continue
full_hour = word[5]
hour = full_hour.split(":")
hour = str(hour[:1])
hour = hour[2:4]
if hour in counts :
counts[hour] = 1 + counts[hour]
else :
counts.update({hour:1})
lst = list()
for k, v in counts.items():
new_tup = (k, v)
lst.append(new_tup)
lst = sorted(lst)
for k, v in lst:
print(k,v)
counts=dict()
fill=open("mbox-short.txt")
for line in fill :
if line.startswith("From "):
x=line.split()
b=x[5]
y=b.split(":")
f=y[0]
counts[f]=counts.get(f,0)+1
l=list()
for k,v in counts.items():
l.append((k,v))
l.sort()
for k,v in l:
print(k,v)
I listen carefully in the online lessons, This is my code by what i learned in class. I think it will be easy for you to understand.
fn = input('Please enter file: ')
if len(fn) < 1: fn = 'mbox-short.txt'
hand = open(fn)
di = dict()
for line in hand:
ls = line.strip()
wds = line.split()
if 'From' in wds and len(wds) > 2:
hours = ls.split()
hour = hours[-2].split(':')
ch = hour[0]
di[ch] = di.get(ch, 0) + 1
tmp = list()
for h,t in di.items():
newt = (h,t)
tmp.append(newt)
tmp = sorted(tmp)
for h,t in tmp:
print(h,t)
name = input("Enter file:")
if len(name) < 1:
name = "mbox-short.txt"
handle = open(name)
counts = dict()
for line in handle:
line = line.strip()
if not line.startswith("From ") : continue
line = line.split()
hr = line[5].split(":")
hr = hr[0:1]
for piece in hr:
counts[piece] = counts.get(piece,0) + 1
lst = list()
for k,v in counts.items():
lst.append((k,v))
lst = sorted(lst)
for k,v in lst:
print(k,v)
fname = input("Enter file:")
fhandle = open(fname)
dic={}
for line in fhandle:
if not line.startswith('From '):
continue
else:
line=line.split()
line=line[5] # accesing the list using index and splitting it
line=line.split(':')
bline=line[0]
#for bline in line:
#print(bline)
dic[bline]=dic.get(bline,0)+1 # Using this line we created a
dictionary having keys and values
#Now it's time to access the dictionary and sort in some way.
lst=[]
for k1,v1 in dic.items(): # dictionary er key value pair access korar jonno items method use kora hoyechhe
lst.append((k1,v1)) # dictionary er keys and corresponding values ke lst te append korlam
lst.sort() #lst take sort korlam. sorting is done through key
#print(lst)
for k1,v1 in lst: # we are able to access this list using key value pair as it was basically a dictionary before, It is just appended
print(k1,v1)
#print(dic)
#print(dic)
count = 0
fname = input("Enter file name: ")
fh = open(fname)
for line in fh:
if line.startswith("X-DSPAM-Confidence:") :
print(line)
count = count + 1
print(count)
There is a file with 27 lines like X-DSPAM-Confidence : 0.xxxxx, I need to extract the numerical value from each of them to be used for calculations.
Try to use split(':'):
Code:
count = 0
fname = input("Enter file name: ")
fh = open(fname)
for line in fh:
if line.startswith("X-DSPAM-Confidence:") :
print(line)
value = line.split(':')[-1] # will split line into 'X-DSPAM-Confidence' and 'value'
# if you have ',' at the end of the line, simply do this:
value = value.strip(',')
value = float(value)
print(value)
count = count + 1
print(count)
As long as the format is exactly as you described it, you can use the code below:
float(line.split(':')[1])
If there's more variation in the text than what you described, you might need to try regex.
You can use str.rfind(':') to get the position of : and then do a string slice to get the value.
count = 0
fname = input("Enter file name: ")
fh = open(fname)
for line in fh:
if line.startswith("X-DSPAM-Confidence:") :
print(line)
value = line[line.rfind(':'):] # will take the last occurrence of : to slice the line
print(value)
count = count + 1
print(count)
fname = input("Enter file name: ")
fh = open(fname)
count = 0
pos = 0
ans = None
total = 0
for line in fh:
if not line.startswith("X-DSPAM-Confidence:") :
continue
else :
count = count + 1
pos = line.find(':')
ans = line[pos+1 : ]
total = total + float(ans)
avg = total/count
fname = input("Enter file name: ")
fh = open(fname)
val = 0
count = 0
for line in fh:
if line.startswith("X-DSPAM-Confidence:") :
count = count + 1
val=val + float(line[line.find('0'):])
elif not line.startswith("X-DSPAM-Confidence:") :
continue
print("Average spam confidence:",val/count)
fname = input("Enter file name:")
fh = open(fname)
count = 0
s=0
for line in fh:
if not line.startswith("X-DSPAM-Confidence:"):
continue
count = count+1
pos = line.find('0')
floatingP = float(line[pos:])
s += floatingP
print(s/count)
I am doing a Coursera python exercise and having trouble writing my code.
The question is as following:
Write a program to read through the mbox-short.txt and figure out who has the sent the greatest number of mail messages. The program looks for 'From ' lines and takes the second word of those lines as the person who sent the mail.
The program creates a Python dictionary that maps the sender's mail address to a count of the number of times they appear in the file. After the dictionary is produced, the program reads through the dictionary using a maximum loop to find the most prolific committer.
The sample text file is in this line:
http://www.pythonlearn.com/code/mbox-short.txt
And the expected output should be:
cwen#iupui.edu 5
This is my code:
name = raw_input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
count = dict()
for line in handle:
word = line.split()
if line.startswith('From '):
email = word[1]
for sender in email:
if sender not in count:
count[sender] = count.get(sender, 0) + 1
bigcount = None
bigname = None
for name,count in count.items():
if bigname is None or count > bigcount:
bigname = name
bigcount = count
print bigname, bigcount
The output I have is:
. 1
I think there is something wrong in "for sender in email" part, but couldn't figure out how it results in the undesired output.
The following loop is not appropriate in this situation because you are basically iterating over all the characters of the email address.
for sender in email:
...
That is why you are getting a character . when you print the email address with the largest count. You can easily see the effects once you print the count at the end of the loop.
Following checking is also redundant as you are implicitly checking it when you are getting the dictionary value with get method.
if sender not in count:
...
So, the final corrected code should be something like this.
name = raw_input("Enter file:")
if len(name) < 1:
name = "mbox-short.txt"
handle = open(name)
count = dict()
for line in handle:
word = line.split()
if line.startswith('From '):
count[word[1]] = count.get(word[1], 0) + 1
largest = 0
email = ''
for k in count:
if count[k] > largest:
largest = count[k]
email = k
print largest, email
fname = input("Enter The File Name")
fhandle = open(fname,'r')
sender = dict()
for line in fhandle:
if line.startswith("From "):
sender[line.split()[1]] = sender.get(line.split()[1],0) + 1
max_key = None
max_val = None
for key,value in sender.items():
if max_val is None or max_val < value :
max_val = value
max_key = key
print(max_key,max_val)
name = raw_input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
words = list()
counts = dict()
for line in handle:
words = line.split()
if words == []: continue
if words[0] != 'From': continue
counts[words[1]] = counts.get(words[1],0) + 1
#print counts
maxval = None
maxkey = None
for kee, val in counts.items():
if maxval == None: maxval = val
if maxval < val:
maxval = val
maxkey = kee
print maxkey, maxval
name = raw_input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
fl = open(name)
#fl=open('C:\Users\Algoritm\Documents\Python Coursera\mbox-short.txt')
lst=list()
count=dict()
#scan the file and create a list
for lines_in_the_file in fl:
xx=lines_in_the_file.rstrip().split()
if not lines_in_the_file.startswith('From '): continue #if in the line keep it
word=lines_in_the_file.split()
#print word[1]
xx=word[1]
#for index in xx: #find repeted words in the list Word
lst.append(xx)
#print lst
lis=lst
for x in lis:
count[x]=count.get(x,0)+1
#print count
bigcount=None
bigwords=None
for x, y in count.items():
if bigcount is None or y>bigcount:
bigwords=x
bigcount=y
print bigwords, bigcount
name = input("Enter the file name:")
handle = open(name)
new = dict()
#count = 0
for line in handle:
word = line.split()
if line.startswith("From "):
new[word[1]] = new.get(word[1],0) + 1
largest = 0
email = None
for k,v in new.items():
if email is None or v > largest:
largest = v
email = k
print (email,largest)
fname=input('enter the file name: ')
d=dict()
try:
fhand=open(fname,'r')
except:
print('file not found')
exit()
for line in fhand:
if line.startswith("From:"):
srt=line.find(' ')
sl=line[srt:-1]
if sl not in d:
d[sl]=1
else:
d[sl]+=1
print(d)
largest= 0
email=''
for key in d:
if d[key] > largest:
largest=d[key]
email=key
print(email,': ',largest)
I am taking the same Coursera Python course. Since I am new at it, I am sharing my code for the Assignment. To me the key part was first to use if not line, then split it.
counts=dict()
fname=input('Enter file: ')
if len(fname)<1:
fname='mbox-short.txt'
else:
print('Error')
quit()
fhand=open(fname)
for line in fhand:
if not line.startswith('From '):
continue
words=line.split()
counts[words[1]]=counts.get(words[1],0)+1
key=None
num=0
for k,v in counts.items():
if key is None or v > num:
num=v
key=k
print (num, key)
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
name = "mbox-short.txt"
handle = open(name)
text = handle.read()
#words = text.split()
words = list()
for line in handle:
if not line.startswith("From:") : continue
line = line.split()
words.append(line[1])
counts = dict()
for word in words:
counts[word] = counts.get(word, 0) + 1
maxval = None
maxkey = None
for key,val in counts.items() :
# if maxval == None : maxval = val
if val > maxval:
maxval = val
maxkey = key
print (maxkey, maxval)
counts = dict()
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
fhand = open(name)
for line in fhand:
line = line.rstrip()
if not line.startswith('From ') : continue
words = line.split()
counts[words[1]]=counts.get(words[1],0)+1
st = 0
for k in counts:
if counts[k] > st :
st = counts[k]
addy = k
print (addy, st)
I have a list like this
GroupID,Number
yellow,1
yellow,2
tan,0
blue,1
black,2
black,3
What I want is this
GroupID,Number
yellow,3
tan, 0
blue,1
black,5
So I want to add the numbers associated with each groupID.
This is what I got, but have difficulty with the result statement:
from collections import defaultdict
d = defaultdict(list)
f = open("metal_modules.csv","r")
sheet = f.readlines()
#print sheet
for line in sheet[1:]:
#print line
spl = line.strip().split(",")
#print spl[1]
name = spl[0]
d[name].append(spl[1])
outfile = open("out.txt","w")
result = ""
for v in d.values():
result = #here I need to sum the number in column two for each key in the dictionary#
#print result
outfile.write(result)
f.close()
outfile.close()
keep it simple
result = ""
for group in d:
result += "%s, %s\n" % (group, sum(n for n in d[group]))
You could try the below if the order won't be an important issue for you.
from collections import defaultdict
with open('infile') as f:
d = defaultdict(list)
h = f.readline()
m = f.readlines()
for i in m:
s = i.rstrip().split(',')
d[s[0]].append(s[1])
with open('outfile', 'w') as w:
w.write(h)
for i in d.items():
w.write(i[0]+","+str(sum(map(int,i[1])))+"\n")
Take a look at the following:
with open("metal_modules.csv","r") as f:
sheet = f.readlines()
counter = {}
for line in sheet[1:]:
k,v = line.split(",")
if k in counter:
counter[k] += int(v)
else:
counter[k] = int(v)
with open("out.txt","w") as outfile:
result = "GroupID,Number\n"
for item in counter:
result += "%s,%s\n" % (item,counter[item])
outfile.write(result.strip())