last matched string is being skipped in python - python

Team,
my last string ml31 that is in log file is being skipped from getting evaluated and final resultant dictionary is missing its entry. any hint?
ml1
/core
/home
ml2
/var
/home
/lib
cpuml3
/home
/root
/raid
ml31
/home
/root
/raid
import os
homedir=os.environ.get('HOME')
print(homedir)
key_str = "ml"
val_list = []
key = ''
my_dict = {}
with open(homedir + '/backup/file2dict.result') as file2dict:
for line in file2dict:
words = line.split()
for aWord in words:
if key_str in aWord:
if key:
my_dict[key] = val_list
print(my_dict)
val_list = []
key = aWord
else:
key = aWord
else:
val_list.append(aWord)
print(my_dict)
output
{'ml1': ['/core', '/home'], 'ml2': ['/var', '/home', '/lib'], 'cpuml3': ['/home', '/root', '/raid']}
expected
{'ml1': ['/core', '/home'], 'ml2': ['/var', '/home', '/lib'], 'cpuml3': ['/home', '/root', '/raid'], 'ml31': ['/home', '/root', '/raid'] }

You assign the list to the key in the dict my_dict[key] = val_list when you reach a new key, so it doesn't come up for the last one, you need to add it at the end too
with open(homedir + '/backup/file2dict.result') as file2dict:
for line in file2dict:
words = line.split()
for aWord in words:
if key_str in aWord:
if key:
my_dict[key] = val_list
val_list = []
key = aWord
else:
key = aWord
else:
val_list.append(aWord)
my_dict[key] = val_list
You can improve it with collections.defaultdict
key = ''
key_str = "ml"
my_dict = defaultdict(list)
# from pathlib import Path
content = Path(homedir + '/backup/file2dict.result').read_text().splitlines()
for word in content:
if key_str in word:
key = word
elif word: # ensure not use empty lines
my_dict[key].append(word)

Related

Write a program to read through the mbox-short.txt and figure out the distribution by hour of the day for each of the messages

10.2 Write a program to read through the mbox-short.txt and figure out the distribution by hour of the day for each of the messages. You can pull the hour out from the 'From ' line by finding the time and then splitting the string a second time using a colon.
From stephen.marquard#uct.ac.za Sat Jan 5 09:14:16 2008
Once you have accumulated the counts for each hour, print out the counts, sorted by hour as shown below.
My Code:
fname = input("Enter file:")
fhandle = open(fname)
dic={}
for line in fhandle:
if not line.startswith("From "):
continue
else:
line=line.split()
line=line[5] # accesing the list using index and splitting it
line=line[0:2]
for bline in line:
dic[bline]=dic.get(bline,0)+1 # Using this line we created a dictionary having keys and values
#Now it's time to access the dictionary and sort in some way.
lst=[]
for k1,v1 in dic.items(): # dictionary er key value pair access korar jonno items method use kora hoyechhe
lst.append((k1,v1)) # dictionary er keys and corresponding values ke lst te append korlam
lst.sort() #lst take sort korlam. sorting is done through key
#print(lst)
for k1,v1 in lst: # we are able to access this list using key value pair as it was basically a dictionary before, It is just appended
print(k1,v1)
#print(dic)
#print(dic)
Desired Output:
04 3
06 1
07 1
09 2
10 3
11 6
14 1
15 2
16 4
17 2
18 1
19 1
My Output:
enter image description here
I don't understand what's going wrong.
Working Code. Break down the code in to simple form as much i can. So it will be easy to understand for you.
d = dict()
lst = list()
fname = input('enter the file name : ')
try:
fopen = open(fname,'r')
except:
print('wrong file name !!!')
for line in fopen:
stline = line.strip()
if stline.startswith('From:'):
continue
elif stline.startswith('From'):
spline = stline.split()
time = spline[5]
tsplit = time.split(':')
t1 = tsplit[0].split()
for t in t1:
if t not in d:
d[t] = 1
else:
d[t] = d[t] + 1
for k,v in d.items():
lst.append((k,v))
lst = sorted(lst)
for k,v in lst:
print(k,v)
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
emailcount = dict()
for line in handle:
if not line.startswith("From "): continue
line = line.split()
line = line[1]
emailcount[line] = emailcount.get(line, 0) +1
bigcount = None
bigword = None
for word,count in emailcount.items():
if bigcount == None or count > bigcount:
bigcount = count
bigword = word
print(bigword, bigcount)
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
counts = {}
for line in handle:
word = line.split()
if len(word) < 3 or word[0] != "From" : continue
full_hour = word[5]
hour = full_hour.split(":")
hour = str(hour[:1])
hour = hour[2:4]
if hour in counts :
counts[hour] = 1 + counts[hour]
else :
counts.update({hour:1})
lst = list()
for k, v in counts.items():
new_tup = (k, v)
lst.append(new_tup)
lst = sorted(lst)
for k, v in lst:
print(k,v)
counts=dict()
fill=open("mbox-short.txt")
for line in fill :
if line.startswith("From "):
x=line.split()
b=x[5]
y=b.split(":")
f=y[0]
counts[f]=counts.get(f,0)+1
l=list()
for k,v in counts.items():
l.append((k,v))
l.sort()
for k,v in l:
print(k,v)
I listen carefully in the online lessons, This is my code by what i learned in class. I think it will be easy for you to understand.
fn = input('Please enter file: ')
if len(fn) < 1: fn = 'mbox-short.txt'
hand = open(fn)
di = dict()
for line in hand:
ls = line.strip()
wds = line.split()
if 'From' in wds and len(wds) > 2:
hours = ls.split()
hour = hours[-2].split(':')
ch = hour[0]
di[ch] = di.get(ch, 0) + 1
tmp = list()
for h,t in di.items():
newt = (h,t)
tmp.append(newt)
tmp = sorted(tmp)
for h,t in tmp:
print(h,t)
name = input("Enter file:")
if len(name) < 1:
name = "mbox-short.txt"
handle = open(name)
counts = dict()
for line in handle:
line = line.strip()
if not line.startswith("From ") : continue
line = line.split()
hr = line[5].split(":")
hr = hr[0:1]
for piece in hr:
counts[piece] = counts.get(piece,0) + 1
lst = list()
for k,v in counts.items():
lst.append((k,v))
lst = sorted(lst)
for k,v in lst:
print(k,v)
fname = input("Enter file:")
fhandle = open(fname)
dic={}
for line in fhandle:
if not line.startswith('From '):
continue
else:
line=line.split()
line=line[5] # accesing the list using index and splitting it
line=line.split(':')
bline=line[0]
#for bline in line:
#print(bline)
dic[bline]=dic.get(bline,0)+1 # Using this line we created a
dictionary having keys and values
#Now it's time to access the dictionary and sort in some way.
lst=[]
for k1,v1 in dic.items(): # dictionary er key value pair access korar jonno items method use kora hoyechhe
lst.append((k1,v1)) # dictionary er keys and corresponding values ke lst te append korlam
lst.sort() #lst take sort korlam. sorting is done through key
#print(lst)
for k1,v1 in lst: # we are able to access this list using key value pair as it was basically a dictionary before, It is just appended
print(k1,v1)
#print(dic)
#print(dic)

Adding comma separated items on separate lines to a dict in Python

I am using python 2.7, and using python dicts.
I have my output like this:
goods: apples, oranges
trunk_names: trunk01, trunk02, trunk03,trunk04,
trunk05,trunk06, trunk07,trunk08,
trunk09,trunk10, trunk11,trunk12
My code:
d = {}
for line in output.split("\n"):
if ":" not in line:
continue
key, value = line.strip().split(":", 1)
d[key] = value
Expected key and its value:
trunk_names: trunk01, trunk02, trunk03,trunk04,trunk05,trunk06, trunk07,trunk08,trunk09,trunk10, trunk11,trunk12
Actual key and values being output:
trunk_names: trunk01, trunk02, trunk03,trunk04,
from collections import defaultdict
output = '''
goods: apples, oranges
trunk_names: trunk01, trunk02, trunk03,trunk04,
trunk05,trunk06, trunk07,trunk08,
trunk09,trunk10, trunk11,trunk12
'''
d = defaultdict(list)
current_key = None
for line in output.split('\n')[1:]:
if ":" in line:
current_key = line.split(':')[0].strip()
values = line.split(':')[1]
else:
values = line
d[current_key] += [
value.strip()
for value in values.split(',')
if value.strip()
]
print(d)
gives:
defaultdict(<type 'list'>, {'trunk_names': ['trunk01', 'trunk02', 'trunk03', 'trunk04', 'trunk05', 'trunk06', 'trunk07', 'trunk08', 'trunk09', 'trunk10', 'trunk11', 'trunk12'], 'goods': ['apples', 'oranges']})
How stable is your structure, if it is very stable and the data quality is high then your can simplify by testing if the line.endswith(','):
In []:
d = {}
f = iter(output.split('\n'))
for line in f:
key, line = map(str.strip, line.split(':', 1))
while line.endswith(','):
line += next(f)
d[key] = [i.strip() for i in line.split(',')]
pprint.pprint(d)
Out[]:
{'goods': ['apples', 'oranges'],
'trunk_names': ['trunk01',
'trunk02',
'trunk03',
'trunk04',
'trunk05',
'trunk06',
'trunk07',
'trunk08',
'trunk09',
'trunk10',
'trunk11',
'trunk12']}

Invalid Output for Coursera Python Assignment

I'm doing the Coursera Python for Everybody stream and I'm stuck on Assignment 10.2. I'm getting invalid output for it. Here is what the assignment asks:
Write a program to read through the mbox-short.txt and figure out the
distribution by hour of the day for each of the messages. You can pull
the hour out from the 'From ' line by finding the time and then
splitting the string a second time using a colon.
From stephen.marquard#uct.ac.za Sat Jan 5 09:14:16 2008
Once you have accumulated the counts for each hour, print out the
counts, sorted by hour as shown below.
Here is my code:
name = raw_input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
counts = dict()
lst = list()
for line in handle:
line = line.rstrip()
if not line.startswith('From '):
continue
words = line.split()
words = words[5]
words = words.split(":")
for word in counts:
counts[word] = counts.get(word, 0) + 1
lst = list()
for key, val in counts.items():
lst.append((key, val))
lst.sort()
print lst
Let me know what I'm doing wrong. Any advice or hint is appreciated.
I think you are iterating through the wrong thing in the inner loop: it should be for word in words, not for word in counts.
name = raw_input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
hours = dict()
for line in handle:
if line.startswith("From "):
hour = line.split()[5].split(':')[0]
hours[hour] = hours.get(hour, 0) + 1
for key, value in sorted(hours.items(), None):
print key, value
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
counts = {}
for line in handle:
if not line.startswith("From "):continue
time = line.split()
time = time[5]
hour = time.split(':')
hour = hour[0]
counts[hour] = counts.get(hour, 0) + 1
for k, v in sorted(counts.items()):
print (k,v)
counts = dict()
for line in handle:
if line.startswith ('From '):
words = line.split()
hour=words[5].split(':')
counts[hour[0]]= counts.get(hour[0],0)+1
lst=list()
for key, val in counts.items():
lst.append((key,val))
lst=sorted(lst)
for a,b in lst:
print (a,b)
name =input("Enter file:")
if len(name) < 1:
name = "mbox-short.txt"
handle = open(name)
counts = dict()
for line in handle:
if line.startswith("From "):
time = line.split()[5].split(":")
counts [time[0]] = counts.get(time[0], 0) + 1
#print sorted( [ (v,k) for k,v in counts.items()] )
list = list()
for key, value in counts.items():
list.append( (key,value) )
list.sort()
for hour, counts in list:
print (hour, counts)
file = open('words.txt')
dic = dict()
lst = list()
for line in file :
line = line.rstrip()
if not line.startswith('From '):
continue
words = line.split()
words= words[5].split(':')
words = words[0]
dic[words] = dic.get(words,0)+1
for k,v in dic.items():
lst.append((k,v))
lst.sort()
for k,v in lst:
print(k,v)
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
g={}
for line in handle :
if not line.startswith('From'): continue
werds=line.split()[5:6]
for werd in werds :
we=werd.split(':')[0]
g[we]=g.get(we,0)+1
lst=list()
for v,k in g.items() :
new=(v,k)
lst.append(new)
lst=sorted(lst)
for v,k in lst :
print(v,k)
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
one = dict()
for line1 in handle:
if line1.startswith("From "):
lst1 = line1.split()
lst2 = lst1[5].split(":")
word = lst2[0]
one[word] = one.get(word,0) + 1
lst3 = list()
for k,v in one.items():
tup = (k,v)
lst3.append(tup)
lst3 = sorted(lst3,)
for k,v in lst3:
print(k,v)
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
h = dict()
for line in handle:
if line.startswith('From '):
l = line.split()[5].split(':')[0]
h[l] = h.get(l, 0) +1
for k,v in sorted(h.items(), None):
print(k,v)
This worked for me:-
Opening the file
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
lst = list()
counts = dict()
Splitting each line of the file to get words, then word at 6th place, then in that first letter of that 6th word splitted by ':', appending that into the list
for lines in handle:
if not lines.startswith('From '): continue
words = lines.split()
words = words[5]
words = words.split(':')
lst.append(words[0])
Now counting the letters occurred at different no. of times
for i in lst:
counts[i] = counts.get(i,0) + 1
Finally sorting them by key(here time)
for k, v in sorted(counts.items()):
print(k,v)
name = input("Enter file: ")
if len(name) < 1:
name = "mbox-short.txt"
handle = open(name)
hours = dict()
for line in handle:
# Skipping lines we don't need
if not line.startswith("From "):
continue
words = line.split()
# Finding text in the line that we need
time = words[5]
time = time.split(":")
hour_time = time[0]
# Adding to the dictionary and checking if it already there
hours[hour_time] = hours.get(hour_time, 0) + 1
#Sorting dictionary using sorted() method
hours_sorted = sorted(hours.items())
for key, value in sorted(hours.items()):
print(key, value)
name = raw_input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
hours = dict()
for line in handle:
if line.startswith("From "):
hour = line.split()[5].split(':')[0]
hours[hour] = hours.get(hour, 0) + 1
for key, value in sorted(hours.items(), None):
print key, value

make dictionary from list in python

I have a list of path like ['aaa/aaa.csv', 'aaa/bbb.csv', 'aaa/ccc.csv'].
How can it be converted to dictionary like {'aaa':['aaa.csv', 'bbb.csv', 'ccc.csv'] and so on with first folder in path is equal to others?
I tried this code, but got confused what to do next.
list_split = [i.split('/') for i in list]
dic = {}
list_temp = []
for item in list_split:
list_temp.append(item)
if len(list_temp) < 2:
pass
else:
for itemm in list_temp:
pass
dic = {}
lst = ['aaa/aaa.csv', 'aaa/bbb.csv', 'aaa/ccc.csv']
for item in lst:
slash = item.find('/')
key = item[:slash]
val = item[slash+1:]
if dic.has_key(key):
dic[key].append(val)
else:
dic[key] = [val]
>>> dic
{'aaa': ['aaa.csv', 'bbb.csv', 'ccc.csv']}
original_list = ['aaa/aaa.csv', 'aaa/bbb.csv', 'aaa/ccc.csv', 'x/1.csv', 'y/2.csv'] # i added a couple more items to (hopefully) prove it works
dic = {}
for item in original_list:
path = item.split('/')
if path[0] not in dic:
dic[path[0]] = []
dic[path[0]].append('/'.join(path[1:]))
You can try this:
>>> L = ['aaa/aaa.csv', 'aaa/bbb.csv', 'aaa/ccc.csv']
>>> list_split = [tuple(i.split('/')) for i in L]
>>> newdict = {}
>>> for (key,item) in list_split:
if key not in newdict:
newdict[key]=[]
newdict[key].append(item)
Output:
{'aaa': ['aaa.csv', 'bbb.csv', 'ccc.csv']}
You can also use a defaultdict for this:
from collections import defaultdict
paths = ['aaa/aaa.csv', 'aaa/bbb.csv', 'aaa/ccc.csv', 'bbb/ccc.csv', 'aaa/bbb/ccc.csv']
dict = defaultdict(list)
for *key, value in [x.split('/') for x in paths]:
dict['/'.join(key)].append(value)
print(dict.items())
print(dict['aaa'])
This will also work for nested directories by putting the full path as the key.
path_dict = {}
for path in path_list:
if '/' in path:
path_dict[path.split('/')[0]] = path.split('/')[1:]

Adding to a dictionary in python instead of appending

My raw data is:
abc 123
abc 456
def 789
def 101112
I want to put this into a dictionary where the first column is the key and the second column is the value. In the dictionary I currently have:
{'abc': ['123', '456'], 'def': ['789', '101112']}
instead of appending the values I want to add them to the original value so that it looks like:
{'abc': ['579'], 'def': ['101901']}
My current code is:
d = defaultdict(list)
infile = open('test.csv','r')
lines = infile.readlines()[2:-1]
for item in lines:
key, value = [a.strip() for a in item.split(' ')]
d[key].append(value)
d = defaultdict(list)
infile = open('test.csv','r')
lines = infile.readlines()[2:-1]
for item in lines:
key, value = [a.strip() for a in item.split(' ')]
if key in d:
d[key][0] = str(int(d[key][0]) + value)
else:
d[key].append(str(value))
d = defaultdict(list)
with open('test.csv', 'r') as infile:
for line in infile.readlines()[2:-1]:
key, value = [a.strip() for a in item.split(' ')]
d[key] = str(int(d[key] + value))
Here is version using default python dict:
d = dict()
infile = open('test.csv', 'r')
lines = infile.readlines()[2:-1]
for line in lines:
k, v = [i.strip() for i in line.split(' ')]
if k in d:
d[k] = [str(int(d[k][0]) + int(v))]
else:
d[k] = [v]
print d

Categories

Resources