Looping through file and trying to group values by keys - python

I have a text file with the following:
1 cdcdm
1 dhsajdhsa
2 ffdm
2 mdff
3 ccdfm
3 cdmfc
3 fmdcc
My goal is for the output to look like this:
1 : cdcdm, dhsajdhsa
2 : ffdm, mdff
3 : ccdfm, cdmfc, fmdcc
I wrote the following code, but for some reason, I'm not getting the expected output.
value_list = ''
cur_key = None
key = None
f = open('example.txt', 'r')
for line in f.readlines():
try:
key, value = line.split()
key = key.strip()
value = value.strip()
if cur_key == key:
value_list = value_list + "," + value
else:
if cur_key:
print(cur_key + ":" +value_list)
cur_key = key
value_list = ''
else:
cur_key = key
except Exception as e:
continue
I'm getting the following output:
1:,dhsajdhsa
2:,mdff
How can I modify my code to get this to work?
Thanks,
Mango

A minimally changed implementation might look like this
with open('example.txt', 'r') as f:
cur_key = None
value_list = []
for line in f.readlines():
key, value = line.split()
value = value.strip()
if not cur_key:
cur_key = key
if cur_key == key:
value_list.append(value)
else:
print(cur_key + ":" + ', '.join(value_list))
cur_key = key
value_list = [value]
print(cur_key + ":" +', '.join(value_list))
output:
1:cdcdm, dhsajdhsa
2:ffdm, mdff
3:ccdfm, cdmfc, fmdcc
So we need to make sure cur_key has a value for the first iteration. So set it if not None. Also when we find a new key we shouldn't reset value_list to be blank. It should be the set to the value read on that line, so the lien is not skipped. Also to catch the final groups line we should print the values again outside the loop at the end.

Use itertools.groupby:
import itertools
with open('example.txt') as f:
for key, strings in itertools.groupby(f, lambda s: s.strip()[0]):
print('{}: {}'.format(
key, ', '.join(s.split(None, 1)[1].strip() for s in strings)))
Here's a answer based on your code:
value_list = []
cur_key = None
f = open('example.txt', 'r')
for line in f:
key, value = line.split()
key = key.strip()
value = value.strip()
if cur_key == key or cur_key is None:
value_list.append(value)
else:
print('{}: {}'.format(cur_key, ','.join(value_list)))
value_list = [value]
cur_key = key
if value_list:
print('{}: {}'.format(cur_key, ','.join(value_list)))

I recommend throwing that away and using a collections.defaultdict. Then you can add values to a list for the corresponding key, and print the completed dictionary when you're done:
import collections
d = collections.defaultdict(list)
with open('example.txt') as f:
for line in f:
k,v = line.split()
d[k].append(v.strip())
for k,v in sorted(d.items()):
print('{} : {}'.format(k, ', '.join(v)))

I also believe there are better ways to do it, but if you really want to stick to the basics, at least use lists instead of concatenating text. Here's yet another version of your code, with slight changes:
lists = []
cur_key = None
key = None
f = open('example.txt', 'r')
for line in f.readlines():
try:
key, value = line.split()
key = key.strip()
value = value.strip()
if cur_key != key:
if(cur_key):
lists.append(value_list)
value_list = []
cur_key = key
value_list.append(value)
except Exception as e:
continue
lists.append(value_list)
for i,l in enumerate(lists):
print(str(i+1) + ' : ' + ', '.join(l))

Related

Read dictionary from file into original lists

I have one nested list, and one list for "numbers"
test_keys = [["tobbe", "kalle"],["karl", "Clara"],["tobbe"],["tank"]]
test_values = ['123', '234','345','456']
res = {}
for key in test_keys:
for value in test_values:
res[value] = key
test_values.remove(value)
break
with open("myfile.txt", 'w') as f:
for key, value in res.items():
f.write('%s;%s;\n' % (key, value))
This provides the file
123;['tobbe', 'kalle'];
234;['karl', 'Clara'];
345;['finis'];
456;['tank'];
now I want to load the data back into the a dictionary without the ";" and later on back into the corresponding lists.
Try this:
res = {}
with open("myfile.txt") as file:
for line in file:
chunks = line.split(';')
names = chunks[1][1:-1].split(', ')
res[chunks[0]] = [name[1:-1] for name in names]
print(res)
test_keys = []
test_values = []
for key in res:
test_keys.append(key)
test_values.append(res[key])
print(test_keys)
print(test_values)

Form a python dictionary based on pattern matched keys and values in file

I am trying to create a dictionary with a file containing text based on a matched pattern. Lines containing key_str should become keys and subsequent lines should become values associated with those keys in the dictionary.
File:
ml1
/core
/home
ml2
/var
/home
/lib
cpuml1
/home
/root
/raid
Expected Output
my_dict: {ml1: ['/core','/home'], ml2: ['/var','/home','/lib'], cpuml1: ['/home','/root','/raid']}
Code:
d = {}
key_str = "ml"
val_str = ""
key_list = []
val_list = []
with open(homedir+'/backup/file2dict.result') as file2dict:
for line in file2dict:
words=line.split()
for aWord in words:
if key_str in aWord:
key_list.append(aWord)
print(key_list)
else:
val_list.append(aWord)
print("this is formed dictionary", d)
Whenever you encounter a new key, add existing val_list to last key and wipe val_list.
key_str = "ml"
val_str = ""
val_list = []
key = ''
d = {}
with open(homedir + '/backup/file2dict.result') as file2dict:
for line in file2dict:
words = line.split()
for aWord in words:
if key_str in aWord:
if key:
d[key] = val_list
val_list = []
key = aWord
else:
key = aWord
else:
val_list.append(aWord)
d[key] = val_list
print("this is formed dictionary", d)
Assuming that input format is correct, with double linefeeds between each block, each block starting with a key name, each following line in a block being a value for that key, and each key being unique in the file, as well as assuming that you meant ['/var','/home', '/lib'] for the ml2 key, then the result can be created with a comprehension:
with open(file) as f:
result = {key:lst for key, *lst in (block.split('\n') for block in f.read().split('\n\n'))}
Testing with a multiline string instead of a file:
>>> s = '''ml1
... /core
... /home
...
... ml2
... /var
... /home
... /lib
...
... cpuml1
... /home
... /root
... /raid'''
>>> {key:lst for key, *lst in (block.split('\n') for block in s.split('\n\n'))}
{'ml1': ['/core', '/home'], 'ml2': ['/var', '/home', '/lib'], 'cpuml1': ['/home', '/root', '/raid']}
Here is one way to do it:
#! /usr/bin/env python3
keystr="ml"
k=''
d = {}
with open ("testp.txt") as file2dict:
for line in file2dict:
li = line.strip()
# This uses your keystr, but you could say if not line.startswith("/"):
if keystr in li:
k = li
elif li and k:
d[k] = d.get(k,[]) + [li]
print(d)
You could simplify your code:
key_str = "ml"
result = {}
curr_key = None
with open('file2dict.result', 'r') as file2dict:
for line in filter(lambda l: l != '', map(str.strip, file2dict)):
if key_str in line:
curr_key = line
result[curr_key] = []
elif curr_key is not None:
result[curr_key].append(line)
else:
print("Value without a key: {}".format(line))
Here filter(lambda l: l != '', map(str.strip, file2dict)) id used to filter out empty lines; moreover you can use a dict (result) to collect lines.
If you cannot have keys without any following valid line in your input file (or if you want to skip them), you can use setdefault for your dict:
key_str = "ml"
result = {}
curr_key = None
with open('file2dict.result', 'r') as file2dict:
for line in filter(lambda l: l != '', map(str.strip, file2dict)):
if key_str in line:
curr_key = line
elif curr_key is not None:
result.setdefault(curr_key, []).append(line)
else:
print("Value without a key: {}".format(line))

Converting text file to list of dictionaries

I have written a script to convert a text file into dictionary..
script.py
l=[]
d={}
count=0
f=open('/home/asha/Desktop/test.txt','r')
for row in f:
rowcount+=1
if row[0] == ' ' in row:
l.append(row)
else:
if count == 0:
temp = row
count+=1
else:
d[temp]=l
l=[]
count=0
print d
textfile.txt
Time
NtGetTickCount
NtQueryPerformanceCounter
NtQuerySystemTime
NtQueryTimerResolution
NtSetSystemTime
NtSetTimerResolution
RtlTimeFieldsToTime
RtlTimeToTime
System informations
NtQuerySystemInformation
NtSetSystemInformation
Enumerations
Structures
The output i have got is
{'Time\n': [' NtGetTickCount\n', ' NtQueryPerformanceCounter\n', ' NtQuerySystemTime\n', ' NtQueryTimerResolution\n', ' NtSetSystemTime\n', ' NtSetTimerResolution\n', ' RtlTimeFieldsToTime\n', ' RtlTimeToTime\n']}
Able to convert upto 9th line in the text file. Suggest me where I am going wrong..
You never commit (i.e. run d[row] = []) the final list to the dictionary.
You can simply commit when you create the row:
d = {}
cur = []
for row in f:
if row[0] == ' ': # line in section
cur.append(row)
else: # new row
d[row] = cur = []
print (d)
Using dict.setdefault to create dictionary with lists as values will make your job easier.
d = {}
with open('input.txt') as f:
key = ''
for row in f:
if row.startswith(' '):
d.setdefault(key, []).append(row.strip())
else:
key = row
print(d)
Output:
{'Time\n': ['NtGetTickCount', 'NtQueryPerformanceCounter', 'NtQuerySystemTime', 'NtQueryTimerResolution', 'NtSetSystemTime', 'NtSetTimerResolution', 'RtlTimeFieldsToTime', 'RtlTimeToTime'], 'System informations\n': ['NtQuerySystemInformation', 'NtSetSystemInformation', 'Enumerations', 'Structures']}
A few things to note here:
Always use with open(...) for file operations.
If you want to check the first index, or the first few indices, use str.startswith()
The same can be done using collections.defaultdict:
from collections import defaultdict
d = defaultdict(list)
with open('input.txt') as f:
key = ''
for row in f:
if row.startswith(' '):
d[key].append(row)
else:
key = row
So you need to know two things at any given time while looping over the file:
1) Are we on a title level or content level (by indentation) and
2) What is the current title
In the following code, we first check if the current line we are at, is a title (so it does not start with a space) and set the currentTitle to that as well as insert that into our dictionary as a key and an empty list as a value.
If it is not a title, we just append to corresponding title's list.
with open('49359186.txt', 'r') as input:
topics = {}
currentTitle = ''
for line in input:
line = line.rstrip()
if line[0] != ' ':
currentTitle = line
topics[currentTitle] = []
else:
topics[currentTitle].append(line)
print topics
Try this:
d = {}
key = None
with open('/home/asha/Desktop/test.txt','r') as file:
for line in file:
if line.startswith(' '):
d[key].append(line.strip())
else:
key = line.strip(); d[key] = []
print(d)
Just for the sake of adding in my 2 cents.
This problem is easier to tackle backwards. Consider iterating through your file backwards and then storing the values into a dictionary whenever a header is reached.
f=open('test.txt','r')
d = {}
l = []
for row in reversed(f.read().split('\n')):
if row[0] == ' ':
l.append(row)
else:
d.update({row: l})
l = []
Just keep track the line which start with ' ' and you are done with one loop only :
final=[]
keys=[]
flag=True
with open('new_text.txt','r') as f:
data = []
for line in f:
if not line.startswith(' '):
if line.strip():
keys.append(line.strip())
flag=False
if data:
final.append(data)
data=[]
flag=True
else:
if flag==True:
data.append(line.strip())
final.append(data)
print(dict(zip(keys,final)))
output:
{'Example': ['data1', 'data2'], 'Time': ['NtGetTickCount', 'NtQueryPerformanceCounter', 'NtQuerySystemTime', 'NtQueryTimerResolution', 'NtSetSystemTime', 'NtSetTimerResolution', 'RtlTimeFieldsToTime', 'RtlTimeToTime'], 'System informations': ['NtQuerySystemInformation', 'NtSetSystemInformation', 'Enumerations', 'Structures']}

Creating new dictionary after looping twice

I'm trying to clean up the content of a csv file and then create a new dictionary out of it. I want the new dictionary to be globally available:
import csv
input_file = csv.DictReader(open("test_file3.csv"))
final_dict = {} #this should get filled with the new dictionary
for row in input_file: #cleaning the dictionary
new_dict = {}
for key, value in row.items():
if key == "Start Date":
new_dict[key] = value
else:
first_replace = value.replace(".", "")
second_replace = first_replace.replace(",", ".")
all_replaced = second_replace.replace(" €", "")
new_dict[key] = all_replaced
It works inside the the first loop but I don't know how to get the dictionary under new_dict to final_dict
Ideally I want final_dict = new_dict.
You don't need to create a new_dict inside your for loop, just access final_dict inside it:
final_dict = {}
for row in input_file:
for key, value in row.items():
if key == "Start Date":
final_dict[key] = value
else:
first_replace = value.replace(".", "")
second_replace = first_replace.replace(",", ".")
all_replaced = second_replace.replace(" €", "")
final_dict[key] = all_replaced
print final_dict
If there are multiple entries with the same key, only the last one will be included in final_dict.

Finding a line in a file then reading next few lines in Python

I have a plain text file with the following data:
id=1
name=Scott
occupation=Truck driver
age=23
id=2
name=Dave
occupation=Waiter
age=16
id=3
name=Susan
occupation=Computer programmer
age=29
I'm trying to work out the best way to get to any point in the file given an id string, then grab the rows underneath to extract the data for use in my program. I can do something like:
def get_person_by_id(id):
file = open('rooms', 'r')
for line in file:
if ("id=" + id) in line:
print(id + " found")
But I'm not sure how I can now go through the next bunch of lines and do line.split("=") or similar to extract the info (put into a list or dict or whatever) that I can use my program. Any pointers?
One option would be to load the entire thing into memory, which would save you from reading the file every time:
with open('rooms') as f:
chunks = f.read().split('\n\n')
people_by_id = {}
for chunk in chunks:
data = dict(row.split('=', 1) for row in chunk.split('\n'))
people_by_id[data['id']] = data
del data['id']
def get_person_by_id(id):
return people_by_id.get(id)
How about exiting from a for loop after finding the correct line:
def get_person_by_id(id):
file = open('rooms', 'r')
for line in file:
if ("id=" + id) in line:
print(id + " found")
break
#now you can continue processing your file:
next_line = file.readline()
Maybe:
d = dict()
with open(filename) as f:
for line in f:
k,v = line.split('=')
if 'id=' in line:
d[v] = {}
d[d.keys()[-1]][k] = v
And here is an iterative solution.
objects = []
current_object = None
with open("info.txt", "rb") as f:
for line in f:
line = line.strip("\r\n")
if not line:
current_object = None
continue
if current_object is None:
current_object = {}
objects.append(current_object)
key,_,value = line.partition('=')
current_object[key] = value
print objects
Another example of an iterative parser:
from itertools import takewhile
def entries(f):
e = {}
def read_one():
one = {}
for line in takewhile(lambda x: '=' in x, f):
key, val = line.strip().split('=')
one[key] = val
return one
while True:
one = read_one()
if not one:
break
else:
e[one.pop('id')] = one
return e
Example:
>>> with open('data.txt') as f:
..: print entries(f)['2']
{'age': '16', 'occupation': 'Waiter', 'name': 'Dave'}
Get all the person's attributes and values (i.e. id, name, occupation, age, etc..), till you find
an empy line.
def get_person_by_id(id):
person = {}
file = open('rooms', 'r')
for line in file:
if found == True:
if line.strip():
attr, value = line.split("="):
else:
return person
elif ("id=" + id) in line:
print(id + " found")
found = True
attr, value = line.split("=")
person[attr] = value
return person
This solution is a bit more forgiving of empty lines within records.
def read_persons(it):
person = dict()
for l in it:
try:
k, v = l.strip('\n').split('=', 1)
except ValueError:
pass
else:
if k == 'id': # New record
if person:
yield person
person = dict()
person[k] = v
if person:
yield person

Categories

Resources