Adding comma separated items on separate lines to a dict in Python

Adding comma separated items on separate lines to a dict in Python - python

I am using python 2.7, and using python dicts.
I have my output like this:
goods: apples, oranges
trunk_names: trunk01, trunk02, trunk03,trunk04,
trunk05,trunk06, trunk07,trunk08,
trunk09,trunk10, trunk11,trunk12
My code:
d = {}
for line in output.split("\n"):
if ":" not in line:
continue
key, value = line.strip().split(":", 1)
d[key] = value
Expected key and its value:
trunk_names: trunk01, trunk02, trunk03,trunk04,trunk05,trunk06, trunk07,trunk08,trunk09,trunk10, trunk11,trunk12
Actual key and values being output:
trunk_names: trunk01, trunk02, trunk03,trunk04,

from collections import defaultdict
output = '''
goods: apples, oranges
trunk_names: trunk01, trunk02, trunk03,trunk04,
trunk05,trunk06, trunk07,trunk08,
trunk09,trunk10, trunk11,trunk12
'''
d = defaultdict(list)
current_key = None
for line in output.split('\n')[1:]:
if ":" in line:
current_key = line.split(':')[0].strip()
values = line.split(':')[1]
else:
values = line
d[current_key] += [
value.strip()
for value in values.split(',')
if value.strip()
]
print(d)
gives:
defaultdict(<type 'list'>, {'trunk_names': ['trunk01', 'trunk02', 'trunk03', 'trunk04', 'trunk05', 'trunk06', 'trunk07', 'trunk08', 'trunk09', 'trunk10', 'trunk11', 'trunk12'], 'goods': ['apples', 'oranges']})

How stable is your structure, if it is very stable and the data quality is high then your can simplify by testing if the line.endswith(','):
In []:
d = {}
f = iter(output.split('\n'))
for line in f:
key, line = map(str.strip, line.split(':', 1))
while line.endswith(','):
line += next(f)
d[key] = [i.strip() for i in line.split(',')]
pprint.pprint(d)
Out[]:
{'goods': ['apples', 'oranges'],
'trunk_names': ['trunk01',
'trunk02',
'trunk03',
'trunk04',
'trunk05',
'trunk06',
'trunk07',
'trunk08',
'trunk09',
'trunk10',
'trunk11',
'trunk12']}

Related

last matched string is being skipped in python

Team,
my last string ml31 that is in log file is being skipped from getting evaluated and final resultant dictionary is missing its entry. any hint?
ml1
/core
/home
ml2
/var
/home
/lib
cpuml3
/home
/root
/raid
ml31
/home
/root
/raid
import os
homedir=os.environ.get('HOME')
print(homedir)
key_str = "ml"
val_list = []
key = ''
my_dict = {}
with open(homedir + '/backup/file2dict.result') as file2dict:
for line in file2dict:
words = line.split()
for aWord in words:
if key_str in aWord:
if key:
my_dict[key] = val_list
print(my_dict)
val_list = []
key = aWord
else:
key = aWord
else:
val_list.append(aWord)
print(my_dict)
output
{'ml1': ['/core', '/home'], 'ml2': ['/var', '/home', '/lib'], 'cpuml3': ['/home', '/root', '/raid']}
expected
{'ml1': ['/core', '/home'], 'ml2': ['/var', '/home', '/lib'], 'cpuml3': ['/home', '/root', '/raid'], 'ml31': ['/home', '/root', '/raid'] }

You assign the list to the key in the dict my_dict[key] = val_list when you reach a new key, so it doesn't come up for the last one, you need to add it at the end too
with open(homedir + '/backup/file2dict.result') as file2dict:
for line in file2dict:
words = line.split()
for aWord in words:
if key_str in aWord:
if key:
my_dict[key] = val_list
val_list = []
key = aWord
else:
key = aWord
else:
val_list.append(aWord)
my_dict[key] = val_list
You can improve it with collections.defaultdict
key = ''
key_str = "ml"
my_dict = defaultdict(list)
# from pathlib import Path
content = Path(homedir + '/backup/file2dict.result').read_text().splitlines()
for word in content:
if key_str in word:
key = word
elif word: # ensure not use empty lines
my_dict[key].append(word)

Write a program to read through the mbox-short.txt and figure out the distribution by hour of the day for each of the messages

10.2 Write a program to read through the mbox-short.txt and figure out the distribution by hour of the day for each of the messages. You can pull the hour out from the 'From ' line by finding the time and then splitting the string a second time using a colon.
From stephen.marquard#uct.ac.za Sat Jan 5 09:14:16 2008
Once you have accumulated the counts for each hour, print out the counts, sorted by hour as shown below.
My Code:
fname = input("Enter file:")
fhandle = open(fname)
dic={}
for line in fhandle:
if not line.startswith("From "):
continue
else:
line=line.split()
line=line[5] # accesing the list using index and splitting it
line=line[0:2]
for bline in line:
dic[bline]=dic.get(bline,0)+1 # Using this line we created a dictionary having keys and values
#Now it's time to access the dictionary and sort in some way.
lst=[]
for k1,v1 in dic.items(): # dictionary er key value pair access korar jonno items method use kora hoyechhe
lst.append((k1,v1)) # dictionary er keys and corresponding values ke lst te append korlam
lst.sort() #lst take sort korlam. sorting is done through key
#print(lst)
for k1,v1 in lst: # we are able to access this list using key value pair as it was basically a dictionary before, It is just appended
print(k1,v1)
#print(dic)
#print(dic)
Desired Output:
04 3
06 1
07 1
09 2
10 3
11 6
14 1
15 2
16 4
17 2
18 1
19 1
My Output:
enter image description here
I don't understand what's going wrong.

Working Code. Break down the code in to simple form as much i can. So it will be easy to understand for you.
d = dict()
lst = list()
fname = input('enter the file name : ')
try:
fopen = open(fname,'r')
except:
print('wrong file name !!!')
for line in fopen:
stline = line.strip()
if stline.startswith('From:'):
continue
elif stline.startswith('From'):
spline = stline.split()
time = spline[5]
tsplit = time.split(':')
t1 = tsplit[0].split()
for t in t1:
if t not in d:
d[t] = 1
else:
d[t] = d[t] + 1
for k,v in d.items():
lst.append((k,v))
lst = sorted(lst)
for k,v in lst:
print(k,v)

name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
emailcount = dict()
for line in handle:
if not line.startswith("From "): continue
line = line.split()
line = line[1]
emailcount[line] = emailcount.get(line, 0) +1
bigcount = None
bigword = None
for word,count in emailcount.items():
if bigcount == None or count > bigcount:
bigcount = count
bigword = word
print(bigword, bigcount)

name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
counts = {}
for line in handle:
word = line.split()
if len(word) < 3 or word[0] != "From" : continue
full_hour = word[5]
hour = full_hour.split(":")
hour = str(hour[:1])
hour = hour[2:4]
if hour in counts :
counts[hour] = 1 + counts[hour]
else :
counts.update({hour:1})
lst = list()
for k, v in counts.items():
new_tup = (k, v)
lst.append(new_tup)
lst = sorted(lst)
for k, v in lst:
print(k,v)

counts=dict()
fill=open("mbox-short.txt")
for line in fill :
if line.startswith("From "):
x=line.split()
b=x[5]
y=b.split(":")
f=y[0]
counts[f]=counts.get(f,0)+1
l=list()
for k,v in counts.items():
l.append((k,v))
l.sort()
for k,v in l:
print(k,v)

I listen carefully in the online lessons, This is my code by what i learned in class. I think it will be easy for you to understand.
fn = input('Please enter file: ')
if len(fn) < 1: fn = 'mbox-short.txt'
hand = open(fn)
di = dict()
for line in hand:
ls = line.strip()
wds = line.split()
if 'From' in wds and len(wds) > 2:
hours = ls.split()
hour = hours[-2].split(':')
ch = hour[0]
di[ch] = di.get(ch, 0) + 1
tmp = list()
for h,t in di.items():
newt = (h,t)
tmp.append(newt)
tmp = sorted(tmp)
for h,t in tmp:
print(h,t)

name = input("Enter file:")
if len(name) < 1:
name = "mbox-short.txt"
handle = open(name)
counts = dict()
for line in handle:
line = line.strip()
if not line.startswith("From ") : continue
line = line.split()
hr = line[5].split(":")
hr = hr[0:1]
for piece in hr:
counts[piece] = counts.get(piece,0) + 1
lst = list()
for k,v in counts.items():
lst.append((k,v))
lst = sorted(lst)
for k,v in lst:
print(k,v)

fname = input("Enter file:")
fhandle = open(fname)
dic={}
for line in fhandle:
if not line.startswith('From '):
continue
else:
line=line.split()
line=line[5] # accesing the list using index and splitting it
line=line.split(':')
bline=line[0]
#for bline in line:
#print(bline)
dic[bline]=dic.get(bline,0)+1 # Using this line we created a
dictionary having keys and values
#Now it's time to access the dictionary and sort in some way.
lst=[]
for k1,v1 in dic.items(): # dictionary er key value pair access korar jonno items method use kora hoyechhe
lst.append((k1,v1)) # dictionary er keys and corresponding values ke lst te append korlam
lst.sort() #lst take sort korlam. sorting is done through key
#print(lst)
for k1,v1 in lst: # we are able to access this list using key value pair as it was basically a dictionary before, It is just appended
print(k1,v1)
#print(dic)
#print(dic)

text file into a nested python dictionary more than one variable

I have a text file that is formatted as follows:
[one]
A = color
B = Petals
C = Junk
[two]
Z = 10
A = freq
corner = yes
[three]
D = code
status = 45
I'm trying to read this file into a nested dictionary so that it looks like this:
{'one':{'A':'color','B':'Petals','C':'Junk'},
{'two':{'Z':'10','A':'freq':'corner':'yes'},
{'three':{'D':'code','status':'45'}}
I tried
import re
ini_sections = []
ini_dict = {}
x = 0
with open(path,'r') as f:
for line in f:
re_found = re.findall('\[(.*?)\]',line)
re_found = ''.join(re_found)
ini_sections.append(re_found)
try:
if re_found:
next_line = next(f)
while re.findall('=',next_line):
key,value = next_line.rstrip('\n').split('=')
ini_dict.update({ini_sections[x]:{key.strip():value.strip()}})
next_line = next(f)
x +=1
except StopIteration:
print("EOF!")
Output:
for key, value in ini_dict.items():
print(key, value)
>>>one {'C':'Junk'}
two {'corner':'yes'}
three {'status':'45'}
But only the last items remain in the dictionary. Not sure why that is.

You don't need a separate ini_sections (you create them but you don't even iterate over each of them while printing). Create one ini_dict, which will contain the three main keys one, two, and three, each one with a value of a dict. The current dict's name will be re_found; update only that inside your loop.
Your original code, changed here and there:
import re
import pprint
ini_dict = {}
x = 0
with open('test.cfg','r') as f:
for line in f:
re_found = re.findall('\[(.*?)\]',line)
re_found = ''.join(re_found)
ini_dict[re_found] = dict()
try:
if re_found:
next_line = next(f)
while re.findall('=',next_line):
key,value = next_line.rstrip('\n').split('=')
ini_dict[re_found][key.strip()] = value.strip()
next_line = next(f)
except StopIteration:
print("EOF!")
pp = pprint.PrettyPrinter()
pp.pprint (ini_dict)
Result (indentation comes courtesy of prettyprint):
EOF!
{'one': {'A': 'color', 'B': 'Petals', 'C': 'Junk'},
'three': {'D': 'code', 'status': '45'},
'two': {'A': 'freq', 'Z': '10', 'corner': 'yes'}}

count numbers associated with category in list

I have a list like this
GroupID,Number
yellow,1
yellow,2
tan,0
blue,1
black,2
black,3
What I want is this
GroupID,Number
yellow,3
tan, 0
blue,1
black,5
So I want to add the numbers associated with each groupID.
This is what I got, but have difficulty with the result statement:
from collections import defaultdict
d = defaultdict(list)
f = open("metal_modules.csv","r")
sheet = f.readlines()
#print sheet
for line in sheet[1:]:
#print line
spl = line.strip().split(",")
#print spl[1]
name = spl[0]
d[name].append(spl[1])
outfile = open("out.txt","w")
result = ""
for v in d.values():
result = #here I need to sum the number in column two for each key in the dictionary#
#print result
outfile.write(result)
f.close()
outfile.close()

keep it simple
result = ""
for group in d:
result += "%s, %s\n" % (group, sum(n for n in d[group]))

You could try the below if the order won't be an important issue for you.
from collections import defaultdict
with open('infile') as f:
d = defaultdict(list)
h = f.readline()
m = f.readlines()
for i in m:
s = i.rstrip().split(',')
d[s[0]].append(s[1])
with open('outfile', 'w') as w:
w.write(h)
for i in d.items():
w.write(i[0]+","+str(sum(map(int,i[1])))+"\n")

Take a look at the following:
with open("metal_modules.csv","r") as f:
sheet = f.readlines()
counter = {}
for line in sheet[1:]:
k,v = line.split(",")
if k in counter:
counter[k] += int(v)
else:
counter[k] = int(v)
with open("out.txt","w") as outfile:
result = "GroupID,Number\n"
for item in counter:
result += "%s,%s\n" % (item,counter[item])
outfile.write(result.strip())

Adding to a dictionary in python instead of appending

My raw data is:
abc 123
abc 456
def 789
def 101112
I want to put this into a dictionary where the first column is the key and the second column is the value. In the dictionary I currently have:
{'abc': ['123', '456'], 'def': ['789', '101112']}
instead of appending the values I want to add them to the original value so that it looks like:
{'abc': ['579'], 'def': ['101901']}
My current code is:
d = defaultdict(list)
infile = open('test.csv','r')
lines = infile.readlines()[2:-1]
for item in lines:
key, value = [a.strip() for a in item.split(' ')]
d[key].append(value)

d = defaultdict(list)
infile = open('test.csv','r')
lines = infile.readlines()[2:-1]
for item in lines:
key, value = [a.strip() for a in item.split(' ')]
if key in d:
d[key][0] = str(int(d[key][0]) + value)
else:
d[key].append(str(value))

d = defaultdict(list)
with open('test.csv', 'r') as infile:
for line in infile.readlines()[2:-1]:
key, value = [a.strip() for a in item.split(' ')]
d[key] = str(int(d[key] + value))

Here is version using default python dict:
d = dict()
infile = open('test.csv', 'r')
lines = infile.readlines()[2:-1]
for line in lines:
k, v = [i.strip() for i in line.split(' ')]
if k in d:
d[k] = [str(int(d[k][0]) + int(v))]
else:
d[k] = [v]
print d

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Adding comma separated items on separate lines to a dict in Python - python

Related

last matched string is being skipped in python

Write a program to read through the mbox-short.txt and figure out the distribution by hour of the day for each of the messages

text file into a nested python dictionary more than one variable

count numbers associated with category in list

Adding to a dictionary in python instead of appending

Categories

Resources