Lists in dictionary - python

I have a text file of format like this
10:45 a b c
x 0 1 2
y 4 5 6
z 7 8 9
I want to make x as key and 0,1,2 its value in the list.Same with y and z
while(1):
line = f.readline()
if time in line:
print (line)
L1.append(line)
for count,line in enumerate(f):
if (i < 3):
L1.append(line)
print ("Line{} : {}".format(count,line.strip()))
i=i+1
#print(L1)
for k in range(1):
print(L1[k])
test1 = L1[k]
a1 = test1.split()
print (a1[1])
dict = {a1[1]: L1[k] for a1[1] in L1[k]}
print (dict)
for k in range(1,3):
#print("hey")
print (L1[k]) #will list the single row
test = L1[k]
#print(test)
a = test.split()
print (a[0])
dict = {a[0]:L1[k] for a[0] in L1[k]}
print (dict)
Any idea what i am doing wrong here?
P.S. - I am new to python

You could try this:
my_dict = {}
lines = f.readLines()
lines.pop(0)
for line in lines:
line_list = line.split(' ')
key = line_list.pop(0)
my_dict.update({key: line_list})

This will accomplish what it is I think you need (assuming your text file is stored in the same directory and you replace 'test.txt' with your filename):
with open('test.txt', 'r') as values:
contents = values.read().strip().split()
new_dict = {}
i = 4
while i <= len(contents)-4:
new_dict.update({contents[i]: contents[i+1:i+4]})
i += 4
print(new_dict)
or this, if you want the values as integers:
with open('test.txt', 'r') as values:
contents = values.read().strip().split()
new_dict = {}
i = 4
while i <= len(contents)-4:
new_dict.update({contents[i]: [int(contents[i+1]),int(contents[i+2]),int(contents[i+3])]})
i += 4
print(new_dict)

Try this
import string
start_found = False
result_dict = {}
for line in open("stack1_input.txt", mode='r'):
if (line.startswith("10:45")):
start_found = True
continue
if start_found == True:
values = line.split()
if len(values) == 4:
result_dict[values[0]] = values[1:]
print (result_dict)

Related

Python Convert nested key value file to csv file

I've below file in txt format
<START>
<SUBSTART
COLA=123;
COLB=123;
COLc=ABC;
COLc=BCD;
COLD=DEF;
<SUBEND
<SUBSTART
COLA=456;
COLB=456;
COLc=def;
COLc=def;
COLD=xyz;
<SUBEND
<SUBSTART
COLA=789;
COLB=789;
COLc=ghi;
COLc=ghi;
COLD=xyz;
<SUBEND>
<END>
Expected output,
COLA,COLB,COLc,COLc,COLD
123,123,ABC,BCD,DEF
456,456,def,def,xyz
789,789,ghi,ghi,xyz
how could I implement it in this python?
I've tried using dictionary, since it has repitative keys.that is not working.
You need to write a small parser for your custom format.
Here is a very naive a simple example (updated to handle an arbitrary number of duplicates):
from collections import Counter
out = []
add = False
for line in text.split('\n'): # here you could read from file instead
if line.startswith(' '): # restarting cycle
if not add:
out.append({})
c = Counter() # counter for duplicates
add = True
k,v = line.strip(' ;').split('=')
c[k] += 1
if c[k]>1: # found a duplicated column, adding suffix
k += f'_{c[k]-1}'
out[-1][k] = v
else:
add = False
df = pd.DataFrame(out)
input:
text = '''<SUBSTART
COLA=A;
COLB=B;
COLc=C;
COLc=D;
COLc=E;
COLD=F;
<SUBEND
<SUBSTART
COLA=G;
COLB=H;
COLc=I;
COLc=J;
COLc=K;
COLD=L;
<SUBSTART
COLA=M;
COLB=N;
COLc=O;
COLc=P;
COLc=Q;
COLD=R;
<SUBEND>
<END>'''
output:
COLA COLB COLc COLc_1 COLc_2 COLD
0 A B C D E F
1 G H I J K L
2 M N O P Q R
if not startswith("<"), save
if startswith('<SUBEND'), add new newline
import json
def main():
datas = []
with open('example.txt', 'r') as f:
sub_datas = []
for line in f.readlines():
if not line.startswith('<'):
items = line.strip()[:-1].split("=")
sub_datas.append({
items[0]: items[1]
})
elif line.startswith('<SUBEND'):
datas.append(sub_datas)
sub_datas = []
print(json.dumps(datas, indent=4))
if __name__ == '__main__':
main()

Enter single digit but the result have the tens digit when use python search value

When I input 7,17 but the result is
ifDescr.7
ifDescr.70
ifDescr.17
If I want the result is 7 and 17 when I input 7 17, how do I code it?
ifDescr.7
ifDescr.17
text file
ifDescr.7
ifDescr.70
ifDescr.17
def search_multiple(file_name, list_of_strings):
line_number = 0
list_of_results = []
with open(file_name, 'r') as read:
for line in read:
line_number += 1
for x in list_of_strings:
if x in line:
list_of_results.append((x,line_number,line.rstrip()))
return list_of_results
def main ():
folder = ('single.txt')
verify1,verify2 = input ("Input number").split()
matched_lines = search_multiple(folder,['ifDescr.' + verify1, 'ifDescr.' + verify2,])
for x in matched_lines:
print('Line = ', x[2])
if __name__ == '__main__':
main()
The reason for this behavior is you are using in to check if string is in the line. As ifDescr.70 contains ifDecsr.7 in it,the result contains it as well. Try out the below function:
def search_multiple(file_name, list_of_strings):
line_number = 0
list_of_results = []
with open(file_name, 'r') as read:
for line in read:
line_number += 1
for x in list_of_strings:
if x == line.strip():
list_of_results.append((x,line_number,line.rstrip()))
return list_of_results

Write a program to read through the mbox-short.txt and figure out the distribution by hour of the day for each of the messages

10.2 Write a program to read through the mbox-short.txt and figure out the distribution by hour of the day for each of the messages. You can pull the hour out from the 'From ' line by finding the time and then splitting the string a second time using a colon.
From stephen.marquard#uct.ac.za Sat Jan 5 09:14:16 2008
Once you have accumulated the counts for each hour, print out the counts, sorted by hour as shown below.
My Code:
fname = input("Enter file:")
fhandle = open(fname)
dic={}
for line in fhandle:
if not line.startswith("From "):
continue
else:
line=line.split()
line=line[5] # accesing the list using index and splitting it
line=line[0:2]
for bline in line:
dic[bline]=dic.get(bline,0)+1 # Using this line we created a dictionary having keys and values
#Now it's time to access the dictionary and sort in some way.
lst=[]
for k1,v1 in dic.items(): # dictionary er key value pair access korar jonno items method use kora hoyechhe
lst.append((k1,v1)) # dictionary er keys and corresponding values ke lst te append korlam
lst.sort() #lst take sort korlam. sorting is done through key
#print(lst)
for k1,v1 in lst: # we are able to access this list using key value pair as it was basically a dictionary before, It is just appended
print(k1,v1)
#print(dic)
#print(dic)
Desired Output:
04 3
06 1
07 1
09 2
10 3
11 6
14 1
15 2
16 4
17 2
18 1
19 1
My Output:
enter image description here
I don't understand what's going wrong.
Working Code. Break down the code in to simple form as much i can. So it will be easy to understand for you.
d = dict()
lst = list()
fname = input('enter the file name : ')
try:
fopen = open(fname,'r')
except:
print('wrong file name !!!')
for line in fopen:
stline = line.strip()
if stline.startswith('From:'):
continue
elif stline.startswith('From'):
spline = stline.split()
time = spline[5]
tsplit = time.split(':')
t1 = tsplit[0].split()
for t in t1:
if t not in d:
d[t] = 1
else:
d[t] = d[t] + 1
for k,v in d.items():
lst.append((k,v))
lst = sorted(lst)
for k,v in lst:
print(k,v)
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
emailcount = dict()
for line in handle:
if not line.startswith("From "): continue
line = line.split()
line = line[1]
emailcount[line] = emailcount.get(line, 0) +1
bigcount = None
bigword = None
for word,count in emailcount.items():
if bigcount == None or count > bigcount:
bigcount = count
bigword = word
print(bigword, bigcount)
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
counts = {}
for line in handle:
word = line.split()
if len(word) < 3 or word[0] != "From" : continue
full_hour = word[5]
hour = full_hour.split(":")
hour = str(hour[:1])
hour = hour[2:4]
if hour in counts :
counts[hour] = 1 + counts[hour]
else :
counts.update({hour:1})
lst = list()
for k, v in counts.items():
new_tup = (k, v)
lst.append(new_tup)
lst = sorted(lst)
for k, v in lst:
print(k,v)
counts=dict()
fill=open("mbox-short.txt")
for line in fill :
if line.startswith("From "):
x=line.split()
b=x[5]
y=b.split(":")
f=y[0]
counts[f]=counts.get(f,0)+1
l=list()
for k,v in counts.items():
l.append((k,v))
l.sort()
for k,v in l:
print(k,v)
I listen carefully in the online lessons, This is my code by what i learned in class. I think it will be easy for you to understand.
fn = input('Please enter file: ')
if len(fn) < 1: fn = 'mbox-short.txt'
hand = open(fn)
di = dict()
for line in hand:
ls = line.strip()
wds = line.split()
if 'From' in wds and len(wds) > 2:
hours = ls.split()
hour = hours[-2].split(':')
ch = hour[0]
di[ch] = di.get(ch, 0) + 1
tmp = list()
for h,t in di.items():
newt = (h,t)
tmp.append(newt)
tmp = sorted(tmp)
for h,t in tmp:
print(h,t)
name = input("Enter file:")
if len(name) < 1:
name = "mbox-short.txt"
handle = open(name)
counts = dict()
for line in handle:
line = line.strip()
if not line.startswith("From ") : continue
line = line.split()
hr = line[5].split(":")
hr = hr[0:1]
for piece in hr:
counts[piece] = counts.get(piece,0) + 1
lst = list()
for k,v in counts.items():
lst.append((k,v))
lst = sorted(lst)
for k,v in lst:
print(k,v)
fname = input("Enter file:")
fhandle = open(fname)
dic={}
for line in fhandle:
if not line.startswith('From '):
continue
else:
line=line.split()
line=line[5] # accesing the list using index and splitting it
line=line.split(':')
bline=line[0]
#for bline in line:
#print(bline)
dic[bline]=dic.get(bline,0)+1 # Using this line we created a
dictionary having keys and values
#Now it's time to access the dictionary and sort in some way.
lst=[]
for k1,v1 in dic.items(): # dictionary er key value pair access korar jonno items method use kora hoyechhe
lst.append((k1,v1)) # dictionary er keys and corresponding values ke lst te append korlam
lst.sort() #lst take sort korlam. sorting is done through key
#print(lst)
for k1,v1 in lst: # we are able to access this list using key value pair as it was basically a dictionary before, It is just appended
print(k1,v1)
#print(dic)
#print(dic)

Python making matching pairs

I've got the .txt file like this within:
Crista
Jame
7,3
2,0
Wiki
Rok
4,1
6,2
3,2
6,8
Pope
Lokk
5,2
0,1
3,1
Sam
Antony
4,3
9,1
My code to find all names and append them to the names[] list, and to find all digits and append them to the digits[] list (if there are more than two lines with digits in a row I didn't need them in the list before):
import re
f=open('mine.txt')
names=[]
digits=[]
count=0
for line in f:
line = line.rstrip()
if re.search('^[a-zA-Z]', line):
name=line
names.append(name)
if re.findall('^\d{1}:\d{1}', line):
if count < 2 :
digit=line
digits.append(digit)
count += 1
elif line != "" :
count = 0
Then I made pairs for matching names and digits:
my_pairs_dig=list()
while(digits):
a = digits.pop(0); b = digits.pop(0)
my_pairs_dig.append((a,b))
my_pairs_dig
my_pairs_names = list()
while(names):
a = names.pop(0); b = names.pop(0)
my_pairs_names.append((a,b))
my_pairs_names
outp=list(zip(my_pairs_names,my_pairs_dig))
And got this output:
[(('Crista', 'Jame'), ('7,3', '2,0')), (('Wiki', 'Rok'), ('4,1', '6,2')), (('Pope', 'Lokk'), ('5,2', '0,1')), (('Sam', 'Antony'),('4,3', '9,1'))]
But plans were changed and now my desired outout is:
[(('Crista', 'Jame'), ('7,3', '2,0')), (('Wiki', 'Rok'), ('4,1', '6,2'), ('3,2', '6,8')), (('Pope', 'Lokk'), ('5,2', '0,1'), ('3,1')), (('Sam', 'Antony'),('4,3', '9,1'))]
How can I rewrite my code to got the desired outoput?
Try this
with open('test.txt', 'r') as fp:
data = fp.read().split("\n")
i, res = 0, []
while i < len(data):
if data[i].isalpha():
names = (data[i], data[i+1])
i += 2
digits = []
while i < len(data) and not data[i].isalpha():
digits.append(data[i])
i += 1
digits = tuple(digits)
if len(digits) > 2:
res.append((names, digits[: 2], digits[2: ]))
else:
res.append((names, digits[: 2]))
print(res)
Output:
[(('Crista', 'Jame'), ('7,3', '2,0')), (('Wiki', 'Rok'), ('4,1', '6,2'), ('3,2', '6,8')), (('Pope', 'Lokk'), ('5,2', '0,1'), ('3,1',)), (('Sam', 'Antony'), ('4,3', '9,1'))]
Try this:
import re
digits=[]
result = []
name1, name2 = None, None
for line in f:
if line:
line = line.rstrip()
if re.search('^[a-zA-Z]', line):
if name1 and name2:
result.append(((name1, name2), *tuple(tuple(digits[i:i+2]) for i in range(0, len(digits), 2))))
name1, name2, digits = None, None, []
if name1:
name2 = line
else:
name1 = line
else:
digits.append(line)
if name1 and name2:
result.append(((name1, name2), *tuple(tuple(digits[i:i+2]) for i in range(0, len(digits), 2))))
name1, name2, digits = None, None, []
print(result)
Output:
[(('Crista', 'Jame'), ('7,3', '2,0')), (('Wiki', 'Rok'), ('4,1', '6,2'), ('3,2', '6,8')), (('Pope', 'Lokk'), ('5,2', '0,1'), ('3,1',)), (('Sam', 'Antony'), ('4,3', '9,1'))]
This is based on your assumption:
that's always two names and then 2,3 or 4 lines with numbers

count numbers associated with category in list

I have a list like this
GroupID,Number
yellow,1
yellow,2
tan,0
blue,1
black,2
black,3
What I want is this
GroupID,Number
yellow,3
tan, 0
blue,1
black,5
So I want to add the numbers associated with each groupID.
This is what I got, but have difficulty with the result statement:
from collections import defaultdict
d = defaultdict(list)
f = open("metal_modules.csv","r")
sheet = f.readlines()
#print sheet
for line in sheet[1:]:
#print line
spl = line.strip().split(",")
#print spl[1]
name = spl[0]
d[name].append(spl[1])
outfile = open("out.txt","w")
result = ""
for v in d.values():
result = #here I need to sum the number in column two for each key in the dictionary#
#print result
outfile.write(result)
f.close()
outfile.close()
keep it simple
result = ""
for group in d:
result += "%s, %s\n" % (group, sum(n for n in d[group]))
You could try the below if the order won't be an important issue for you.
from collections import defaultdict
with open('infile') as f:
d = defaultdict(list)
h = f.readline()
m = f.readlines()
for i in m:
s = i.rstrip().split(',')
d[s[0]].append(s[1])
with open('outfile', 'w') as w:
w.write(h)
for i in d.items():
w.write(i[0]+","+str(sum(map(int,i[1])))+"\n")
Take a look at the following:
with open("metal_modules.csv","r") as f:
sheet = f.readlines()
counter = {}
for line in sheet[1:]:
k,v = line.split(",")
if k in counter:
counter[k] += int(v)
else:
counter[k] = int(v)
with open("out.txt","w") as outfile:
result = "GroupID,Number\n"
for item in counter:
result += "%s,%s\n" % (item,counter[item])
outfile.write(result.strip())

Categories

Resources