Team,
my last string ml31 that is in log file is being skipped from getting evaluated and final resultant dictionary is missing its entry. any hint?
ml1
/core
/home
ml2
/var
/home
/lib
cpuml3
/home
/root
/raid
ml31
/home
/root
/raid
import os
homedir=os.environ.get('HOME')
print(homedir)
key_str = "ml"
val_list = []
key = ''
my_dict = {}
with open(homedir + '/backup/file2dict.result') as file2dict:
for line in file2dict:
words = line.split()
for aWord in words:
if key_str in aWord:
if key:
my_dict[key] = val_list
print(my_dict)
val_list = []
key = aWord
else:
key = aWord
else:
val_list.append(aWord)
print(my_dict)
output
{'ml1': ['/core', '/home'], 'ml2': ['/var', '/home', '/lib'], 'cpuml3': ['/home', '/root', '/raid']}
expected
{'ml1': ['/core', '/home'], 'ml2': ['/var', '/home', '/lib'], 'cpuml3': ['/home', '/root', '/raid'], 'ml31': ['/home', '/root', '/raid'] }
You assign the list to the key in the dict my_dict[key] = val_list when you reach a new key, so it doesn't come up for the last one, you need to add it at the end too
with open(homedir + '/backup/file2dict.result') as file2dict:
for line in file2dict:
words = line.split()
for aWord in words:
if key_str in aWord:
if key:
my_dict[key] = val_list
val_list = []
key = aWord
else:
key = aWord
else:
val_list.append(aWord)
my_dict[key] = val_list
You can improve it with collections.defaultdict
key = ''
key_str = "ml"
my_dict = defaultdict(list)
# from pathlib import Path
content = Path(homedir + '/backup/file2dict.result').read_text().splitlines()
for word in content:
if key_str in word:
key = word
elif word: # ensure not use empty lines
my_dict[key].append(word)
Related
10.2 Write a program to read through the mbox-short.txt and figure out the distribution by hour of the day for each of the messages. You can pull the hour out from the 'From ' line by finding the time and then splitting the string a second time using a colon.
From stephen.marquard#uct.ac.za Sat Jan 5 09:14:16 2008
Once you have accumulated the counts for each hour, print out the counts, sorted by hour as shown below.
My Code:
fname = input("Enter file:")
fhandle = open(fname)
dic={}
for line in fhandle:
if not line.startswith("From "):
continue
else:
line=line.split()
line=line[5] # accesing the list using index and splitting it
line=line[0:2]
for bline in line:
dic[bline]=dic.get(bline,0)+1 # Using this line we created a dictionary having keys and values
#Now it's time to access the dictionary and sort in some way.
lst=[]
for k1,v1 in dic.items(): # dictionary er key value pair access korar jonno items method use kora hoyechhe
lst.append((k1,v1)) # dictionary er keys and corresponding values ke lst te append korlam
lst.sort() #lst take sort korlam. sorting is done through key
#print(lst)
for k1,v1 in lst: # we are able to access this list using key value pair as it was basically a dictionary before, It is just appended
print(k1,v1)
#print(dic)
#print(dic)
Desired Output:
04 3
06 1
07 1
09 2
10 3
11 6
14 1
15 2
16 4
17 2
18 1
19 1
My Output:
enter image description here
I don't understand what's going wrong.
Working Code. Break down the code in to simple form as much i can. So it will be easy to understand for you.
d = dict()
lst = list()
fname = input('enter the file name : ')
try:
fopen = open(fname,'r')
except:
print('wrong file name !!!')
for line in fopen:
stline = line.strip()
if stline.startswith('From:'):
continue
elif stline.startswith('From'):
spline = stline.split()
time = spline[5]
tsplit = time.split(':')
t1 = tsplit[0].split()
for t in t1:
if t not in d:
d[t] = 1
else:
d[t] = d[t] + 1
for k,v in d.items():
lst.append((k,v))
lst = sorted(lst)
for k,v in lst:
print(k,v)
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
emailcount = dict()
for line in handle:
if not line.startswith("From "): continue
line = line.split()
line = line[1]
emailcount[line] = emailcount.get(line, 0) +1
bigcount = None
bigword = None
for word,count in emailcount.items():
if bigcount == None or count > bigcount:
bigcount = count
bigword = word
print(bigword, bigcount)
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
counts = {}
for line in handle:
word = line.split()
if len(word) < 3 or word[0] != "From" : continue
full_hour = word[5]
hour = full_hour.split(":")
hour = str(hour[:1])
hour = hour[2:4]
if hour in counts :
counts[hour] = 1 + counts[hour]
else :
counts.update({hour:1})
lst = list()
for k, v in counts.items():
new_tup = (k, v)
lst.append(new_tup)
lst = sorted(lst)
for k, v in lst:
print(k,v)
counts=dict()
fill=open("mbox-short.txt")
for line in fill :
if line.startswith("From "):
x=line.split()
b=x[5]
y=b.split(":")
f=y[0]
counts[f]=counts.get(f,0)+1
l=list()
for k,v in counts.items():
l.append((k,v))
l.sort()
for k,v in l:
print(k,v)
I listen carefully in the online lessons, This is my code by what i learned in class. I think it will be easy for you to understand.
fn = input('Please enter file: ')
if len(fn) < 1: fn = 'mbox-short.txt'
hand = open(fn)
di = dict()
for line in hand:
ls = line.strip()
wds = line.split()
if 'From' in wds and len(wds) > 2:
hours = ls.split()
hour = hours[-2].split(':')
ch = hour[0]
di[ch] = di.get(ch, 0) + 1
tmp = list()
for h,t in di.items():
newt = (h,t)
tmp.append(newt)
tmp = sorted(tmp)
for h,t in tmp:
print(h,t)
name = input("Enter file:")
if len(name) < 1:
name = "mbox-short.txt"
handle = open(name)
counts = dict()
for line in handle:
line = line.strip()
if not line.startswith("From ") : continue
line = line.split()
hr = line[5].split(":")
hr = hr[0:1]
for piece in hr:
counts[piece] = counts.get(piece,0) + 1
lst = list()
for k,v in counts.items():
lst.append((k,v))
lst = sorted(lst)
for k,v in lst:
print(k,v)
fname = input("Enter file:")
fhandle = open(fname)
dic={}
for line in fhandle:
if not line.startswith('From '):
continue
else:
line=line.split()
line=line[5] # accesing the list using index and splitting it
line=line.split(':')
bline=line[0]
#for bline in line:
#print(bline)
dic[bline]=dic.get(bline,0)+1 # Using this line we created a
dictionary having keys and values
#Now it's time to access the dictionary and sort in some way.
lst=[]
for k1,v1 in dic.items(): # dictionary er key value pair access korar jonno items method use kora hoyechhe
lst.append((k1,v1)) # dictionary er keys and corresponding values ke lst te append korlam
lst.sort() #lst take sort korlam. sorting is done through key
#print(lst)
for k1,v1 in lst: # we are able to access this list using key value pair as it was basically a dictionary before, It is just appended
print(k1,v1)
#print(dic)
#print(dic)
I am using python 2.7, and using python dicts.
I have my output like this:
goods: apples, oranges
trunk_names: trunk01, trunk02, trunk03,trunk04,
trunk05,trunk06, trunk07,trunk08,
trunk09,trunk10, trunk11,trunk12
My code:
d = {}
for line in output.split("\n"):
if ":" not in line:
continue
key, value = line.strip().split(":", 1)
d[key] = value
Expected key and its value:
trunk_names: trunk01, trunk02, trunk03,trunk04,trunk05,trunk06, trunk07,trunk08,trunk09,trunk10, trunk11,trunk12
Actual key and values being output:
trunk_names: trunk01, trunk02, trunk03,trunk04,
from collections import defaultdict
output = '''
goods: apples, oranges
trunk_names: trunk01, trunk02, trunk03,trunk04,
trunk05,trunk06, trunk07,trunk08,
trunk09,trunk10, trunk11,trunk12
'''
d = defaultdict(list)
current_key = None
for line in output.split('\n')[1:]:
if ":" in line:
current_key = line.split(':')[0].strip()
values = line.split(':')[1]
else:
values = line
d[current_key] += [
value.strip()
for value in values.split(',')
if value.strip()
]
print(d)
gives:
defaultdict(<type 'list'>, {'trunk_names': ['trunk01', 'trunk02', 'trunk03', 'trunk04', 'trunk05', 'trunk06', 'trunk07', 'trunk08', 'trunk09', 'trunk10', 'trunk11', 'trunk12'], 'goods': ['apples', 'oranges']})
How stable is your structure, if it is very stable and the data quality is high then your can simplify by testing if the line.endswith(','):
In []:
d = {}
f = iter(output.split('\n'))
for line in f:
key, line = map(str.strip, line.split(':', 1))
while line.endswith(','):
line += next(f)
d[key] = [i.strip() for i in line.split(',')]
pprint.pprint(d)
Out[]:
{'goods': ['apples', 'oranges'],
'trunk_names': ['trunk01',
'trunk02',
'trunk03',
'trunk04',
'trunk05',
'trunk06',
'trunk07',
'trunk08',
'trunk09',
'trunk10',
'trunk11',
'trunk12']}
I'm doing the Coursera Python for Everybody stream and I'm stuck on Assignment 10.2. I'm getting invalid output for it. Here is what the assignment asks:
Write a program to read through the mbox-short.txt and figure out the
distribution by hour of the day for each of the messages. You can pull
the hour out from the 'From ' line by finding the time and then
splitting the string a second time using a colon.
From stephen.marquard#uct.ac.za Sat Jan 5 09:14:16 2008
Once you have accumulated the counts for each hour, print out the
counts, sorted by hour as shown below.
Here is my code:
name = raw_input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
counts = dict()
lst = list()
for line in handle:
line = line.rstrip()
if not line.startswith('From '):
continue
words = line.split()
words = words[5]
words = words.split(":")
for word in counts:
counts[word] = counts.get(word, 0) + 1
lst = list()
for key, val in counts.items():
lst.append((key, val))
lst.sort()
print lst
Let me know what I'm doing wrong. Any advice or hint is appreciated.
I think you are iterating through the wrong thing in the inner loop: it should be for word in words, not for word in counts.
name = raw_input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
hours = dict()
for line in handle:
if line.startswith("From "):
hour = line.split()[5].split(':')[0]
hours[hour] = hours.get(hour, 0) + 1
for key, value in sorted(hours.items(), None):
print key, value
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
counts = {}
for line in handle:
if not line.startswith("From "):continue
time = line.split()
time = time[5]
hour = time.split(':')
hour = hour[0]
counts[hour] = counts.get(hour, 0) + 1
for k, v in sorted(counts.items()):
print (k,v)
counts = dict()
for line in handle:
if line.startswith ('From '):
words = line.split()
hour=words[5].split(':')
counts[hour[0]]= counts.get(hour[0],0)+1
lst=list()
for key, val in counts.items():
lst.append((key,val))
lst=sorted(lst)
for a,b in lst:
print (a,b)
name =input("Enter file:")
if len(name) < 1:
name = "mbox-short.txt"
handle = open(name)
counts = dict()
for line in handle:
if line.startswith("From "):
time = line.split()[5].split(":")
counts [time[0]] = counts.get(time[0], 0) + 1
#print sorted( [ (v,k) for k,v in counts.items()] )
list = list()
for key, value in counts.items():
list.append( (key,value) )
list.sort()
for hour, counts in list:
print (hour, counts)
file = open('words.txt')
dic = dict()
lst = list()
for line in file :
line = line.rstrip()
if not line.startswith('From '):
continue
words = line.split()
words= words[5].split(':')
words = words[0]
dic[words] = dic.get(words,0)+1
for k,v in dic.items():
lst.append((k,v))
lst.sort()
for k,v in lst:
print(k,v)
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
g={}
for line in handle :
if not line.startswith('From'): continue
werds=line.split()[5:6]
for werd in werds :
we=werd.split(':')[0]
g[we]=g.get(we,0)+1
lst=list()
for v,k in g.items() :
new=(v,k)
lst.append(new)
lst=sorted(lst)
for v,k in lst :
print(v,k)
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
one = dict()
for line1 in handle:
if line1.startswith("From "):
lst1 = line1.split()
lst2 = lst1[5].split(":")
word = lst2[0]
one[word] = one.get(word,0) + 1
lst3 = list()
for k,v in one.items():
tup = (k,v)
lst3.append(tup)
lst3 = sorted(lst3,)
for k,v in lst3:
print(k,v)
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
h = dict()
for line in handle:
if line.startswith('From '):
l = line.split()[5].split(':')[0]
h[l] = h.get(l, 0) +1
for k,v in sorted(h.items(), None):
print(k,v)
This worked for me:-
Opening the file
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
lst = list()
counts = dict()
Splitting each line of the file to get words, then word at 6th place, then in that first letter of that 6th word splitted by ':', appending that into the list
for lines in handle:
if not lines.startswith('From '): continue
words = lines.split()
words = words[5]
words = words.split(':')
lst.append(words[0])
Now counting the letters occurred at different no. of times
for i in lst:
counts[i] = counts.get(i,0) + 1
Finally sorting them by key(here time)
for k, v in sorted(counts.items()):
print(k,v)
name = input("Enter file: ")
if len(name) < 1:
name = "mbox-short.txt"
handle = open(name)
hours = dict()
for line in handle:
# Skipping lines we don't need
if not line.startswith("From "):
continue
words = line.split()
# Finding text in the line that we need
time = words[5]
time = time.split(":")
hour_time = time[0]
# Adding to the dictionary and checking if it already there
hours[hour_time] = hours.get(hour_time, 0) + 1
#Sorting dictionary using sorted() method
hours_sorted = sorted(hours.items())
for key, value in sorted(hours.items()):
print(key, value)
name = raw_input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
hours = dict()
for line in handle:
if line.startswith("From "):
hour = line.split()[5].split(':')[0]
hours[hour] = hours.get(hour, 0) + 1
for key, value in sorted(hours.items(), None):
print key, value
I have a list of path like ['aaa/aaa.csv', 'aaa/bbb.csv', 'aaa/ccc.csv'].
How can it be converted to dictionary like {'aaa':['aaa.csv', 'bbb.csv', 'ccc.csv'] and so on with first folder in path is equal to others?
I tried this code, but got confused what to do next.
list_split = [i.split('/') for i in list]
dic = {}
list_temp = []
for item in list_split:
list_temp.append(item)
if len(list_temp) < 2:
pass
else:
for itemm in list_temp:
pass
dic = {}
lst = ['aaa/aaa.csv', 'aaa/bbb.csv', 'aaa/ccc.csv']
for item in lst:
slash = item.find('/')
key = item[:slash]
val = item[slash+1:]
if dic.has_key(key):
dic[key].append(val)
else:
dic[key] = [val]
>>> dic
{'aaa': ['aaa.csv', 'bbb.csv', 'ccc.csv']}
original_list = ['aaa/aaa.csv', 'aaa/bbb.csv', 'aaa/ccc.csv', 'x/1.csv', 'y/2.csv'] # i added a couple more items to (hopefully) prove it works
dic = {}
for item in original_list:
path = item.split('/')
if path[0] not in dic:
dic[path[0]] = []
dic[path[0]].append('/'.join(path[1:]))
You can try this:
>>> L = ['aaa/aaa.csv', 'aaa/bbb.csv', 'aaa/ccc.csv']
>>> list_split = [tuple(i.split('/')) for i in L]
>>> newdict = {}
>>> for (key,item) in list_split:
if key not in newdict:
newdict[key]=[]
newdict[key].append(item)
Output:
{'aaa': ['aaa.csv', 'bbb.csv', 'ccc.csv']}
You can also use a defaultdict for this:
from collections import defaultdict
paths = ['aaa/aaa.csv', 'aaa/bbb.csv', 'aaa/ccc.csv', 'bbb/ccc.csv', 'aaa/bbb/ccc.csv']
dict = defaultdict(list)
for *key, value in [x.split('/') for x in paths]:
dict['/'.join(key)].append(value)
print(dict.items())
print(dict['aaa'])
This will also work for nested directories by putting the full path as the key.
path_dict = {}
for path in path_list:
if '/' in path:
path_dict[path.split('/')[0]] = path.split('/')[1:]
My raw data is:
abc 123
abc 456
def 789
def 101112
I want to put this into a dictionary where the first column is the key and the second column is the value. In the dictionary I currently have:
{'abc': ['123', '456'], 'def': ['789', '101112']}
instead of appending the values I want to add them to the original value so that it looks like:
{'abc': ['579'], 'def': ['101901']}
My current code is:
d = defaultdict(list)
infile = open('test.csv','r')
lines = infile.readlines()[2:-1]
for item in lines:
key, value = [a.strip() for a in item.split(' ')]
d[key].append(value)
d = defaultdict(list)
infile = open('test.csv','r')
lines = infile.readlines()[2:-1]
for item in lines:
key, value = [a.strip() for a in item.split(' ')]
if key in d:
d[key][0] = str(int(d[key][0]) + value)
else:
d[key].append(str(value))
d = defaultdict(list)
with open('test.csv', 'r') as infile:
for line in infile.readlines()[2:-1]:
key, value = [a.strip() for a in item.split(' ')]
d[key] = str(int(d[key] + value))
Here is version using default python dict:
d = dict()
infile = open('test.csv', 'r')
lines = infile.readlines()[2:-1]
for line in lines:
k, v = [i.strip() for i in line.split(' ')]
if k in d:
d[k] = [str(int(d[k][0]) + int(v))]
else:
d[k] = [v]
print d