My raw data is:
abc 123
abc 456
def 789
def 101112
I want to put this into a dictionary where the first column is the key and the second column is the value. In the dictionary I currently have:
{'abc': ['123', '456'], 'def': ['789', '101112']}
instead of appending the values I want to add them to the original value so that it looks like:
{'abc': ['579'], 'def': ['101901']}
My current code is:
d = defaultdict(list)
infile = open('test.csv','r')
lines = infile.readlines()[2:-1]
for item in lines:
key, value = [a.strip() for a in item.split(' ')]
d[key].append(value)
d = defaultdict(list)
infile = open('test.csv','r')
lines = infile.readlines()[2:-1]
for item in lines:
key, value = [a.strip() for a in item.split(' ')]
if key in d:
d[key][0] = str(int(d[key][0]) + value)
else:
d[key].append(str(value))
d = defaultdict(list)
with open('test.csv', 'r') as infile:
for line in infile.readlines()[2:-1]:
key, value = [a.strip() for a in item.split(' ')]
d[key] = str(int(d[key] + value))
Here is version using default python dict:
d = dict()
infile = open('test.csv', 'r')
lines = infile.readlines()[2:-1]
for line in lines:
k, v = [i.strip() for i in line.split(' ')]
if k in d:
d[k] = [str(int(d[k][0]) + int(v))]
else:
d[k] = [v]
print d
Related
10.2 Write a program to read through the mbox-short.txt and figure out the distribution by hour of the day for each of the messages. You can pull the hour out from the 'From ' line by finding the time and then splitting the string a second time using a colon.
From stephen.marquard#uct.ac.za Sat Jan 5 09:14:16 2008
Once you have accumulated the counts for each hour, print out the counts, sorted by hour as shown below.
My Code:
fname = input("Enter file:")
fhandle = open(fname)
dic={}
for line in fhandle:
if not line.startswith("From "):
continue
else:
line=line.split()
line=line[5] # accesing the list using index and splitting it
line=line[0:2]
for bline in line:
dic[bline]=dic.get(bline,0)+1 # Using this line we created a dictionary having keys and values
#Now it's time to access the dictionary and sort in some way.
lst=[]
for k1,v1 in dic.items(): # dictionary er key value pair access korar jonno items method use kora hoyechhe
lst.append((k1,v1)) # dictionary er keys and corresponding values ke lst te append korlam
lst.sort() #lst take sort korlam. sorting is done through key
#print(lst)
for k1,v1 in lst: # we are able to access this list using key value pair as it was basically a dictionary before, It is just appended
print(k1,v1)
#print(dic)
#print(dic)
Desired Output:
04 3
06 1
07 1
09 2
10 3
11 6
14 1
15 2
16 4
17 2
18 1
19 1
My Output:
enter image description here
I don't understand what's going wrong.
Working Code. Break down the code in to simple form as much i can. So it will be easy to understand for you.
d = dict()
lst = list()
fname = input('enter the file name : ')
try:
fopen = open(fname,'r')
except:
print('wrong file name !!!')
for line in fopen:
stline = line.strip()
if stline.startswith('From:'):
continue
elif stline.startswith('From'):
spline = stline.split()
time = spline[5]
tsplit = time.split(':')
t1 = tsplit[0].split()
for t in t1:
if t not in d:
d[t] = 1
else:
d[t] = d[t] + 1
for k,v in d.items():
lst.append((k,v))
lst = sorted(lst)
for k,v in lst:
print(k,v)
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
emailcount = dict()
for line in handle:
if not line.startswith("From "): continue
line = line.split()
line = line[1]
emailcount[line] = emailcount.get(line, 0) +1
bigcount = None
bigword = None
for word,count in emailcount.items():
if bigcount == None or count > bigcount:
bigcount = count
bigword = word
print(bigword, bigcount)
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
counts = {}
for line in handle:
word = line.split()
if len(word) < 3 or word[0] != "From" : continue
full_hour = word[5]
hour = full_hour.split(":")
hour = str(hour[:1])
hour = hour[2:4]
if hour in counts :
counts[hour] = 1 + counts[hour]
else :
counts.update({hour:1})
lst = list()
for k, v in counts.items():
new_tup = (k, v)
lst.append(new_tup)
lst = sorted(lst)
for k, v in lst:
print(k,v)
counts=dict()
fill=open("mbox-short.txt")
for line in fill :
if line.startswith("From "):
x=line.split()
b=x[5]
y=b.split(":")
f=y[0]
counts[f]=counts.get(f,0)+1
l=list()
for k,v in counts.items():
l.append((k,v))
l.sort()
for k,v in l:
print(k,v)
I listen carefully in the online lessons, This is my code by what i learned in class. I think it will be easy for you to understand.
fn = input('Please enter file: ')
if len(fn) < 1: fn = 'mbox-short.txt'
hand = open(fn)
di = dict()
for line in hand:
ls = line.strip()
wds = line.split()
if 'From' in wds and len(wds) > 2:
hours = ls.split()
hour = hours[-2].split(':')
ch = hour[0]
di[ch] = di.get(ch, 0) + 1
tmp = list()
for h,t in di.items():
newt = (h,t)
tmp.append(newt)
tmp = sorted(tmp)
for h,t in tmp:
print(h,t)
name = input("Enter file:")
if len(name) < 1:
name = "mbox-short.txt"
handle = open(name)
counts = dict()
for line in handle:
line = line.strip()
if not line.startswith("From ") : continue
line = line.split()
hr = line[5].split(":")
hr = hr[0:1]
for piece in hr:
counts[piece] = counts.get(piece,0) + 1
lst = list()
for k,v in counts.items():
lst.append((k,v))
lst = sorted(lst)
for k,v in lst:
print(k,v)
fname = input("Enter file:")
fhandle = open(fname)
dic={}
for line in fhandle:
if not line.startswith('From '):
continue
else:
line=line.split()
line=line[5] # accesing the list using index and splitting it
line=line.split(':')
bline=line[0]
#for bline in line:
#print(bline)
dic[bline]=dic.get(bline,0)+1 # Using this line we created a
dictionary having keys and values
#Now it's time to access the dictionary and sort in some way.
lst=[]
for k1,v1 in dic.items(): # dictionary er key value pair access korar jonno items method use kora hoyechhe
lst.append((k1,v1)) # dictionary er keys and corresponding values ke lst te append korlam
lst.sort() #lst take sort korlam. sorting is done through key
#print(lst)
for k1,v1 in lst: # we are able to access this list using key value pair as it was basically a dictionary before, It is just appended
print(k1,v1)
#print(dic)
#print(dic)
I have a text file that is formatted as follows:
[one]
A = color
B = Petals
C = Junk
[two]
Z = 10
A = freq
corner = yes
[three]
D = code
status = 45
I'm trying to read this file into a nested dictionary so that it looks like this:
{'one':{'A':'color','B':'Petals','C':'Junk'},
{'two':{'Z':'10','A':'freq':'corner':'yes'},
{'three':{'D':'code','status':'45'}}
I tried
import re
ini_sections = []
ini_dict = {}
x = 0
with open(path,'r') as f:
for line in f:
re_found = re.findall('\[(.*?)\]',line)
re_found = ''.join(re_found)
ini_sections.append(re_found)
try:
if re_found:
next_line = next(f)
while re.findall('=',next_line):
key,value = next_line.rstrip('\n').split('=')
ini_dict.update({ini_sections[x]:{key.strip():value.strip()}})
next_line = next(f)
x +=1
except StopIteration:
print("EOF!")
Output:
for key, value in ini_dict.items():
print(key, value)
>>>one {'C':'Junk'}
two {'corner':'yes'}
three {'status':'45'}
But only the last items remain in the dictionary. Not sure why that is.
You don't need a separate ini_sections (you create them but you don't even iterate over each of them while printing). Create one ini_dict, which will contain the three main keys one, two, and three, each one with a value of a dict. The current dict's name will be re_found; update only that inside your loop.
Your original code, changed here and there:
import re
import pprint
ini_dict = {}
x = 0
with open('test.cfg','r') as f:
for line in f:
re_found = re.findall('\[(.*?)\]',line)
re_found = ''.join(re_found)
ini_dict[re_found] = dict()
try:
if re_found:
next_line = next(f)
while re.findall('=',next_line):
key,value = next_line.rstrip('\n').split('=')
ini_dict[re_found][key.strip()] = value.strip()
next_line = next(f)
except StopIteration:
print("EOF!")
pp = pprint.PrettyPrinter()
pp.pprint (ini_dict)
Result (indentation comes courtesy of prettyprint):
EOF!
{'one': {'A': 'color', 'B': 'Petals', 'C': 'Junk'},
'three': {'D': 'code', 'status': '45'},
'two': {'A': 'freq', 'Z': '10', 'corner': 'yes'}}
I have a text file of format like this
10:45 a b c
x 0 1 2
y 4 5 6
z 7 8 9
I want to make x as key and 0,1,2 its value in the list.Same with y and z
while(1):
line = f.readline()
if time in line:
print (line)
L1.append(line)
for count,line in enumerate(f):
if (i < 3):
L1.append(line)
print ("Line{} : {}".format(count,line.strip()))
i=i+1
#print(L1)
for k in range(1):
print(L1[k])
test1 = L1[k]
a1 = test1.split()
print (a1[1])
dict = {a1[1]: L1[k] for a1[1] in L1[k]}
print (dict)
for k in range(1,3):
#print("hey")
print (L1[k]) #will list the single row
test = L1[k]
#print(test)
a = test.split()
print (a[0])
dict = {a[0]:L1[k] for a[0] in L1[k]}
print (dict)
Any idea what i am doing wrong here?
P.S. - I am new to python
You could try this:
my_dict = {}
lines = f.readLines()
lines.pop(0)
for line in lines:
line_list = line.split(' ')
key = line_list.pop(0)
my_dict.update({key: line_list})
This will accomplish what it is I think you need (assuming your text file is stored in the same directory and you replace 'test.txt' with your filename):
with open('test.txt', 'r') as values:
contents = values.read().strip().split()
new_dict = {}
i = 4
while i <= len(contents)-4:
new_dict.update({contents[i]: contents[i+1:i+4]})
i += 4
print(new_dict)
or this, if you want the values as integers:
with open('test.txt', 'r') as values:
contents = values.read().strip().split()
new_dict = {}
i = 4
while i <= len(contents)-4:
new_dict.update({contents[i]: [int(contents[i+1]),int(contents[i+2]),int(contents[i+3])]})
i += 4
print(new_dict)
Try this
import string
start_found = False
result_dict = {}
for line in open("stack1_input.txt", mode='r'):
if (line.startswith("10:45")):
start_found = True
continue
if start_found == True:
values = line.split()
if len(values) == 4:
result_dict[values[0]] = values[1:]
print (result_dict)
I am using python 2.7, and using python dicts.
I have my output like this:
goods: apples, oranges
trunk_names: trunk01, trunk02, trunk03,trunk04,
trunk05,trunk06, trunk07,trunk08,
trunk09,trunk10, trunk11,trunk12
My code:
d = {}
for line in output.split("\n"):
if ":" not in line:
continue
key, value = line.strip().split(":", 1)
d[key] = value
Expected key and its value:
trunk_names: trunk01, trunk02, trunk03,trunk04,trunk05,trunk06, trunk07,trunk08,trunk09,trunk10, trunk11,trunk12
Actual key and values being output:
trunk_names: trunk01, trunk02, trunk03,trunk04,
from collections import defaultdict
output = '''
goods: apples, oranges
trunk_names: trunk01, trunk02, trunk03,trunk04,
trunk05,trunk06, trunk07,trunk08,
trunk09,trunk10, trunk11,trunk12
'''
d = defaultdict(list)
current_key = None
for line in output.split('\n')[1:]:
if ":" in line:
current_key = line.split(':')[0].strip()
values = line.split(':')[1]
else:
values = line
d[current_key] += [
value.strip()
for value in values.split(',')
if value.strip()
]
print(d)
gives:
defaultdict(<type 'list'>, {'trunk_names': ['trunk01', 'trunk02', 'trunk03', 'trunk04', 'trunk05', 'trunk06', 'trunk07', 'trunk08', 'trunk09', 'trunk10', 'trunk11', 'trunk12'], 'goods': ['apples', 'oranges']})
How stable is your structure, if it is very stable and the data quality is high then your can simplify by testing if the line.endswith(','):
In []:
d = {}
f = iter(output.split('\n'))
for line in f:
key, line = map(str.strip, line.split(':', 1))
while line.endswith(','):
line += next(f)
d[key] = [i.strip() for i in line.split(',')]
pprint.pprint(d)
Out[]:
{'goods': ['apples', 'oranges'],
'trunk_names': ['trunk01',
'trunk02',
'trunk03',
'trunk04',
'trunk05',
'trunk06',
'trunk07',
'trunk08',
'trunk09',
'trunk10',
'trunk11',
'trunk12']}
I have a list like this
GroupID,Number
yellow,1
yellow,2
tan,0
blue,1
black,2
black,3
What I want is this
GroupID,Number
yellow,3
tan, 0
blue,1
black,5
So I want to add the numbers associated with each groupID.
This is what I got, but have difficulty with the result statement:
from collections import defaultdict
d = defaultdict(list)
f = open("metal_modules.csv","r")
sheet = f.readlines()
#print sheet
for line in sheet[1:]:
#print line
spl = line.strip().split(",")
#print spl[1]
name = spl[0]
d[name].append(spl[1])
outfile = open("out.txt","w")
result = ""
for v in d.values():
result = #here I need to sum the number in column two for each key in the dictionary#
#print result
outfile.write(result)
f.close()
outfile.close()
keep it simple
result = ""
for group in d:
result += "%s, %s\n" % (group, sum(n for n in d[group]))
You could try the below if the order won't be an important issue for you.
from collections import defaultdict
with open('infile') as f:
d = defaultdict(list)
h = f.readline()
m = f.readlines()
for i in m:
s = i.rstrip().split(',')
d[s[0]].append(s[1])
with open('outfile', 'w') as w:
w.write(h)
for i in d.items():
w.write(i[0]+","+str(sum(map(int,i[1])))+"\n")
Take a look at the following:
with open("metal_modules.csv","r") as f:
sheet = f.readlines()
counter = {}
for line in sheet[1:]:
k,v = line.split(",")
if k in counter:
counter[k] += int(v)
else:
counter[k] = int(v)
with open("out.txt","w") as outfile:
result = "GroupID,Number\n"
for item in counter:
result += "%s,%s\n" % (item,counter[item])
outfile.write(result.strip())