count numbers associated with category in list - python

I have a list like this
GroupID,Number
yellow,1
yellow,2
tan,0
blue,1
black,2
black,3
What I want is this
GroupID,Number
yellow,3
tan, 0
blue,1
black,5
So I want to add the numbers associated with each groupID.
This is what I got, but have difficulty with the result statement:
from collections import defaultdict
d = defaultdict(list)
f = open("metal_modules.csv","r")
sheet = f.readlines()
#print sheet
for line in sheet[1:]:
#print line
spl = line.strip().split(",")
#print spl[1]
name = spl[0]
d[name].append(spl[1])
outfile = open("out.txt","w")
result = ""
for v in d.values():
result = #here I need to sum the number in column two for each key in the dictionary#
#print result
outfile.write(result)
f.close()
outfile.close()

keep it simple
result = ""
for group in d:
result += "%s, %s\n" % (group, sum(n for n in d[group]))

You could try the below if the order won't be an important issue for you.
from collections import defaultdict
with open('infile') as f:
d = defaultdict(list)
h = f.readline()
m = f.readlines()
for i in m:
s = i.rstrip().split(',')
d[s[0]].append(s[1])
with open('outfile', 'w') as w:
w.write(h)
for i in d.items():
w.write(i[0]+","+str(sum(map(int,i[1])))+"\n")

Take a look at the following:
with open("metal_modules.csv","r") as f:
sheet = f.readlines()
counter = {}
for line in sheet[1:]:
k,v = line.split(",")
if k in counter:
counter[k] += int(v)
else:
counter[k] = int(v)
with open("out.txt","w") as outfile:
result = "GroupID,Number\n"
for item in counter:
result += "%s,%s\n" % (item,counter[item])
outfile.write(result.strip())

Related

Python Convert nested key value file to csv file

I've below file in txt format
<START>
<SUBSTART
COLA=123;
COLB=123;
COLc=ABC;
COLc=BCD;
COLD=DEF;
<SUBEND
<SUBSTART
COLA=456;
COLB=456;
COLc=def;
COLc=def;
COLD=xyz;
<SUBEND
<SUBSTART
COLA=789;
COLB=789;
COLc=ghi;
COLc=ghi;
COLD=xyz;
<SUBEND>
<END>
Expected output,
COLA,COLB,COLc,COLc,COLD
123,123,ABC,BCD,DEF
456,456,def,def,xyz
789,789,ghi,ghi,xyz
how could I implement it in this python?
I've tried using dictionary, since it has repitative keys.that is not working.
You need to write a small parser for your custom format.
Here is a very naive a simple example (updated to handle an arbitrary number of duplicates):
from collections import Counter
out = []
add = False
for line in text.split('\n'): # here you could read from file instead
if line.startswith(' '): # restarting cycle
if not add:
out.append({})
c = Counter() # counter for duplicates
add = True
k,v = line.strip(' ;').split('=')
c[k] += 1
if c[k]>1: # found a duplicated column, adding suffix
k += f'_{c[k]-1}'
out[-1][k] = v
else:
add = False
df = pd.DataFrame(out)
input:
text = '''<SUBSTART
COLA=A;
COLB=B;
COLc=C;
COLc=D;
COLc=E;
COLD=F;
<SUBEND
<SUBSTART
COLA=G;
COLB=H;
COLc=I;
COLc=J;
COLc=K;
COLD=L;
<SUBSTART
COLA=M;
COLB=N;
COLc=O;
COLc=P;
COLc=Q;
COLD=R;
<SUBEND>
<END>'''
output:
COLA COLB COLc COLc_1 COLc_2 COLD
0 A B C D E F
1 G H I J K L
2 M N O P Q R
if not startswith("<"), save
if startswith('<SUBEND'), add new newline
import json
def main():
datas = []
with open('example.txt', 'r') as f:
sub_datas = []
for line in f.readlines():
if not line.startswith('<'):
items = line.strip()[:-1].split("=")
sub_datas.append({
items[0]: items[1]
})
elif line.startswith('<SUBEND'):
datas.append(sub_datas)
sub_datas = []
print(json.dumps(datas, indent=4))
if __name__ == '__main__':
main()

Write a program to read through the mbox-short.txt and figure out the distribution by hour of the day for each of the messages

10.2 Write a program to read through the mbox-short.txt and figure out the distribution by hour of the day for each of the messages. You can pull the hour out from the 'From ' line by finding the time and then splitting the string a second time using a colon.
From stephen.marquard#uct.ac.za Sat Jan 5 09:14:16 2008
Once you have accumulated the counts for each hour, print out the counts, sorted by hour as shown below.
My Code:
fname = input("Enter file:")
fhandle = open(fname)
dic={}
for line in fhandle:
if not line.startswith("From "):
continue
else:
line=line.split()
line=line[5] # accesing the list using index and splitting it
line=line[0:2]
for bline in line:
dic[bline]=dic.get(bline,0)+1 # Using this line we created a dictionary having keys and values
#Now it's time to access the dictionary and sort in some way.
lst=[]
for k1,v1 in dic.items(): # dictionary er key value pair access korar jonno items method use kora hoyechhe
lst.append((k1,v1)) # dictionary er keys and corresponding values ke lst te append korlam
lst.sort() #lst take sort korlam. sorting is done through key
#print(lst)
for k1,v1 in lst: # we are able to access this list using key value pair as it was basically a dictionary before, It is just appended
print(k1,v1)
#print(dic)
#print(dic)
Desired Output:
04 3
06 1
07 1
09 2
10 3
11 6
14 1
15 2
16 4
17 2
18 1
19 1
My Output:
enter image description here
I don't understand what's going wrong.
Working Code. Break down the code in to simple form as much i can. So it will be easy to understand for you.
d = dict()
lst = list()
fname = input('enter the file name : ')
try:
fopen = open(fname,'r')
except:
print('wrong file name !!!')
for line in fopen:
stline = line.strip()
if stline.startswith('From:'):
continue
elif stline.startswith('From'):
spline = stline.split()
time = spline[5]
tsplit = time.split(':')
t1 = tsplit[0].split()
for t in t1:
if t not in d:
d[t] = 1
else:
d[t] = d[t] + 1
for k,v in d.items():
lst.append((k,v))
lst = sorted(lst)
for k,v in lst:
print(k,v)
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
emailcount = dict()
for line in handle:
if not line.startswith("From "): continue
line = line.split()
line = line[1]
emailcount[line] = emailcount.get(line, 0) +1
bigcount = None
bigword = None
for word,count in emailcount.items():
if bigcount == None or count > bigcount:
bigcount = count
bigword = word
print(bigword, bigcount)
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
counts = {}
for line in handle:
word = line.split()
if len(word) < 3 or word[0] != "From" : continue
full_hour = word[5]
hour = full_hour.split(":")
hour = str(hour[:1])
hour = hour[2:4]
if hour in counts :
counts[hour] = 1 + counts[hour]
else :
counts.update({hour:1})
lst = list()
for k, v in counts.items():
new_tup = (k, v)
lst.append(new_tup)
lst = sorted(lst)
for k, v in lst:
print(k,v)
counts=dict()
fill=open("mbox-short.txt")
for line in fill :
if line.startswith("From "):
x=line.split()
b=x[5]
y=b.split(":")
f=y[0]
counts[f]=counts.get(f,0)+1
l=list()
for k,v in counts.items():
l.append((k,v))
l.sort()
for k,v in l:
print(k,v)
I listen carefully in the online lessons, This is my code by what i learned in class. I think it will be easy for you to understand.
fn = input('Please enter file: ')
if len(fn) < 1: fn = 'mbox-short.txt'
hand = open(fn)
di = dict()
for line in hand:
ls = line.strip()
wds = line.split()
if 'From' in wds and len(wds) > 2:
hours = ls.split()
hour = hours[-2].split(':')
ch = hour[0]
di[ch] = di.get(ch, 0) + 1
tmp = list()
for h,t in di.items():
newt = (h,t)
tmp.append(newt)
tmp = sorted(tmp)
for h,t in tmp:
print(h,t)
name = input("Enter file:")
if len(name) < 1:
name = "mbox-short.txt"
handle = open(name)
counts = dict()
for line in handle:
line = line.strip()
if not line.startswith("From ") : continue
line = line.split()
hr = line[5].split(":")
hr = hr[0:1]
for piece in hr:
counts[piece] = counts.get(piece,0) + 1
lst = list()
for k,v in counts.items():
lst.append((k,v))
lst = sorted(lst)
for k,v in lst:
print(k,v)
fname = input("Enter file:")
fhandle = open(fname)
dic={}
for line in fhandle:
if not line.startswith('From '):
continue
else:
line=line.split()
line=line[5] # accesing the list using index and splitting it
line=line.split(':')
bline=line[0]
#for bline in line:
#print(bline)
dic[bline]=dic.get(bline,0)+1 # Using this line we created a
dictionary having keys and values
#Now it's time to access the dictionary and sort in some way.
lst=[]
for k1,v1 in dic.items(): # dictionary er key value pair access korar jonno items method use kora hoyechhe
lst.append((k1,v1)) # dictionary er keys and corresponding values ke lst te append korlam
lst.sort() #lst take sort korlam. sorting is done through key
#print(lst)
for k1,v1 in lst: # we are able to access this list using key value pair as it was basically a dictionary before, It is just appended
print(k1,v1)
#print(dic)
#print(dic)

Lists in dictionary

I have a text file of format like this
10:45 a b c
x 0 1 2
y 4 5 6
z 7 8 9
I want to make x as key and 0,1,2 its value in the list.Same with y and z
while(1):
line = f.readline()
if time in line:
print (line)
L1.append(line)
for count,line in enumerate(f):
if (i < 3):
L1.append(line)
print ("Line{} : {}".format(count,line.strip()))
i=i+1
#print(L1)
for k in range(1):
print(L1[k])
test1 = L1[k]
a1 = test1.split()
print (a1[1])
dict = {a1[1]: L1[k] for a1[1] in L1[k]}
print (dict)
for k in range(1,3):
#print("hey")
print (L1[k]) #will list the single row
test = L1[k]
#print(test)
a = test.split()
print (a[0])
dict = {a[0]:L1[k] for a[0] in L1[k]}
print (dict)
Any idea what i am doing wrong here?
P.S. - I am new to python
You could try this:
my_dict = {}
lines = f.readLines()
lines.pop(0)
for line in lines:
line_list = line.split(' ')
key = line_list.pop(0)
my_dict.update({key: line_list})
This will accomplish what it is I think you need (assuming your text file is stored in the same directory and you replace 'test.txt' with your filename):
with open('test.txt', 'r') as values:
contents = values.read().strip().split()
new_dict = {}
i = 4
while i <= len(contents)-4:
new_dict.update({contents[i]: contents[i+1:i+4]})
i += 4
print(new_dict)
or this, if you want the values as integers:
with open('test.txt', 'r') as values:
contents = values.read().strip().split()
new_dict = {}
i = 4
while i <= len(contents)-4:
new_dict.update({contents[i]: [int(contents[i+1]),int(contents[i+2]),int(contents[i+3])]})
i += 4
print(new_dict)
Try this
import string
start_found = False
result_dict = {}
for line in open("stack1_input.txt", mode='r'):
if (line.startswith("10:45")):
start_found = True
continue
if start_found == True:
values = line.split()
if len(values) == 4:
result_dict[values[0]] = values[1:]
print (result_dict)

Invalid Output for Coursera Python Assignment

I'm doing the Coursera Python for Everybody stream and I'm stuck on Assignment 10.2. I'm getting invalid output for it. Here is what the assignment asks:
Write a program to read through the mbox-short.txt and figure out the
distribution by hour of the day for each of the messages. You can pull
the hour out from the 'From ' line by finding the time and then
splitting the string a second time using a colon.
From stephen.marquard#uct.ac.za Sat Jan 5 09:14:16 2008
Once you have accumulated the counts for each hour, print out the
counts, sorted by hour as shown below.
Here is my code:
name = raw_input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
counts = dict()
lst = list()
for line in handle:
line = line.rstrip()
if not line.startswith('From '):
continue
words = line.split()
words = words[5]
words = words.split(":")
for word in counts:
counts[word] = counts.get(word, 0) + 1
lst = list()
for key, val in counts.items():
lst.append((key, val))
lst.sort()
print lst
Let me know what I'm doing wrong. Any advice or hint is appreciated.
I think you are iterating through the wrong thing in the inner loop: it should be for word in words, not for word in counts.
name = raw_input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
hours = dict()
for line in handle:
if line.startswith("From "):
hour = line.split()[5].split(':')[0]
hours[hour] = hours.get(hour, 0) + 1
for key, value in sorted(hours.items(), None):
print key, value
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
counts = {}
for line in handle:
if not line.startswith("From "):continue
time = line.split()
time = time[5]
hour = time.split(':')
hour = hour[0]
counts[hour] = counts.get(hour, 0) + 1
for k, v in sorted(counts.items()):
print (k,v)
counts = dict()
for line in handle:
if line.startswith ('From '):
words = line.split()
hour=words[5].split(':')
counts[hour[0]]= counts.get(hour[0],0)+1
lst=list()
for key, val in counts.items():
lst.append((key,val))
lst=sorted(lst)
for a,b in lst:
print (a,b)
name =input("Enter file:")
if len(name) < 1:
name = "mbox-short.txt"
handle = open(name)
counts = dict()
for line in handle:
if line.startswith("From "):
time = line.split()[5].split(":")
counts [time[0]] = counts.get(time[0], 0) + 1
#print sorted( [ (v,k) for k,v in counts.items()] )
list = list()
for key, value in counts.items():
list.append( (key,value) )
list.sort()
for hour, counts in list:
print (hour, counts)
file = open('words.txt')
dic = dict()
lst = list()
for line in file :
line = line.rstrip()
if not line.startswith('From '):
continue
words = line.split()
words= words[5].split(':')
words = words[0]
dic[words] = dic.get(words,0)+1
for k,v in dic.items():
lst.append((k,v))
lst.sort()
for k,v in lst:
print(k,v)
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
g={}
for line in handle :
if not line.startswith('From'): continue
werds=line.split()[5:6]
for werd in werds :
we=werd.split(':')[0]
g[we]=g.get(we,0)+1
lst=list()
for v,k in g.items() :
new=(v,k)
lst.append(new)
lst=sorted(lst)
for v,k in lst :
print(v,k)
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
one = dict()
for line1 in handle:
if line1.startswith("From "):
lst1 = line1.split()
lst2 = lst1[5].split(":")
word = lst2[0]
one[word] = one.get(word,0) + 1
lst3 = list()
for k,v in one.items():
tup = (k,v)
lst3.append(tup)
lst3 = sorted(lst3,)
for k,v in lst3:
print(k,v)
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
h = dict()
for line in handle:
if line.startswith('From '):
l = line.split()[5].split(':')[0]
h[l] = h.get(l, 0) +1
for k,v in sorted(h.items(), None):
print(k,v)
This worked for me:-
Opening the file
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
lst = list()
counts = dict()
Splitting each line of the file to get words, then word at 6th place, then in that first letter of that 6th word splitted by ':', appending that into the list
for lines in handle:
if not lines.startswith('From '): continue
words = lines.split()
words = words[5]
words = words.split(':')
lst.append(words[0])
Now counting the letters occurred at different no. of times
for i in lst:
counts[i] = counts.get(i,0) + 1
Finally sorting them by key(here time)
for k, v in sorted(counts.items()):
print(k,v)
name = input("Enter file: ")
if len(name) < 1:
name = "mbox-short.txt"
handle = open(name)
hours = dict()
for line in handle:
# Skipping lines we don't need
if not line.startswith("From "):
continue
words = line.split()
# Finding text in the line that we need
time = words[5]
time = time.split(":")
hour_time = time[0]
# Adding to the dictionary and checking if it already there
hours[hour_time] = hours.get(hour_time, 0) + 1
#Sorting dictionary using sorted() method
hours_sorted = sorted(hours.items())
for key, value in sorted(hours.items()):
print(key, value)
name = raw_input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
hours = dict()
for line in handle:
if line.startswith("From "):
hour = line.split()[5].split(':')[0]
hours[hour] = hours.get(hour, 0) + 1
for key, value in sorted(hours.items(), None):
print key, value

Adding to a dictionary in python instead of appending

My raw data is:
abc 123
abc 456
def 789
def 101112
I want to put this into a dictionary where the first column is the key and the second column is the value. In the dictionary I currently have:
{'abc': ['123', '456'], 'def': ['789', '101112']}
instead of appending the values I want to add them to the original value so that it looks like:
{'abc': ['579'], 'def': ['101901']}
My current code is:
d = defaultdict(list)
infile = open('test.csv','r')
lines = infile.readlines()[2:-1]
for item in lines:
key, value = [a.strip() for a in item.split(' ')]
d[key].append(value)
d = defaultdict(list)
infile = open('test.csv','r')
lines = infile.readlines()[2:-1]
for item in lines:
key, value = [a.strip() for a in item.split(' ')]
if key in d:
d[key][0] = str(int(d[key][0]) + value)
else:
d[key].append(str(value))
d = defaultdict(list)
with open('test.csv', 'r') as infile:
for line in infile.readlines()[2:-1]:
key, value = [a.strip() for a in item.split(' ')]
d[key] = str(int(d[key] + value))
Here is version using default python dict:
d = dict()
infile = open('test.csv', 'r')
lines = infile.readlines()[2:-1]
for line in lines:
k, v = [i.strip() for i in line.split(' ')]
if k in d:
d[k] = [str(int(d[k][0]) + int(v))]
else:
d[k] = [v]
print d

Categories

Resources