Turning Text file to dictionary - python

I have a text file that has following structure:
mom:2
dad:3
mom:4
dad:2
me:4
And I need to make a dictionary that would display each name only once, but adding the numeric values together. In this case it would look like this:
{dad':5, 'me':4, 'mom':6}
How I should approach this problem?
I've tried
d = {}
try:
file = open(file.txt, "r")
for line in file:
(a, b) = line.split(":")
d[a] = float(b)
except IOError:
print()
but i haven't found a way to count up the values.

with open('file.txt', 'r') as f:
fp = f.readlines()
t = [l.strip().split(':') for l in fp if l != '\n']
d = {}
for l in t:
d[l[0]] = d.setdefault(l[0], 0) + int(l[1])

Related

Python: Problem when making a dict from a text file

My text file looks like this:
comp_1-item_14,
comp_2-item_1,item_7,item_35
comp_3-item_4,item_7,item_10,item_1,item_2
I want to make a dictionary from the text file. It should look like
{"comp_1": ("item_14"), "comp_2": ("item_1","item_7","item_35")}
How can i delete the '-' from this and fix it? My code is so:
d = {}
with open('pr.txt', 'r') as p:
for line in r:
split = line.split()
d[split[0]] = "-".join(split[0:])
print(d)
I just tried with a single line and it works fine :
line = "comp_3-item_4,item_7,item_10,item_1,item_2"
d = {}
line_list = line.split('-')
d[line_list[0]] = tuple(line_list[1].split(','))
print(d)
Output :
{'comp_3': ('item_4', 'item_7', 'item_10', 'item_1', 'item_2')}
Change split = line.split() to split = line.split('-')
try this,
d = {}
with open('pr.txt', 'r') as f:
for l in f.readlines():
split_ = l.strip().split("-")
d[split_[0]] = tuple(x for x in split_[1].split(",") if x)
{'comp_1': ('item_14',), 'comp_2': ('item_1', 'item_7', 'item_35')...}
d = {}
with open('pr.txt', 'r') as p:
for line in p:
s = line.strip().split('-')
d[s[0]] = "".join(s[1:])
print(d)

Optimize the for loop in python

Good, I currently have the following code:
n = 0
with open('/home/user/test.filter') as f:
lines = f.readlines()
for l in lines:
if lines[n].startswith('-A vlan_8'):
if "-o bond1.8" in lines[n]:
f = open('vlan8.filter_in', 'a')
f.write(l)
else:
f = open('vlan8.filter_out', 'a')
f.write(l)
if lines[n].startswith('-A vlan_10'):
if "-o bond1.10" in lines[n]:
f = open('vlan10.filter_in', 'a')
f.write(l)
else:
f = open('vlan10.filter_out', 'a')
f.write(l)
if lines[n].startswith('-A vlan_15'):
if "-o bond1.15" in lines[n]:
f = open('vlan15.filter_in', 'a')
f.write(l)
else:
f = open('vlan15.filter_out', 'a')
f.write(l)
# [...]
n = n + 1
I thought about optimizing it with some accumulator or list to not make the script so extensive. Any suggestions?
Sure you can. Maintain a list of these numbers as so:
numList = [8, 10, 15, ...]
Now, all you need is a little string formatting.
with open('/home/user/test.filter') as f:
lines = f.readlines()
for i, l in enumerate(lines): # I've used enumerate to eliminate n but really you don't need it
for num in numList:
if l.startswith('-A vlan_%d' %num):
if "-o bond1.%d" %num in l:
f = open('vlan%d.filter_in' %num, 'a')
else:
f = open('vlan%d.filter_out' %num, 'a')
f.write(l)
f.close()
break
I think you want to make the code cleaner, not faster. If that is so maybe this would work:
import re
parameter_re = re.compile(r'^-A vla_(\d+).*')
with open('data.csv') as f:
lines = f.readlines()
for line in lines:
# Match required parameter
match = parameter_re.match(line)
# Skip line if it doesn't match the required parameter
if not match:
continue
# Extract number from parameter
number = int(match.group(1))
# Create output parameter string
output_str = '-o bond1.%d' % number
# Select correct filename to save the line
if output_str in line:
output_filename = 'vlan%d.filter_in' % number
else:
output_filename = 'vlan%d.filter_out' % number
# Write the line to the correct file
with open(output_filename, 'a') as f:
f.write(line)
And if you want to make it shorter (which I don't recommend, better for it to be readable):
import re
with open('data.csv') as f:
lines = f.readlines()
for line in lines:
match = re.match(r'^-A vla_(\d+).*', line)
if not match:
continue
number = int(match.group(1))
if '-o bond1.%d' % number in line:
output_filename = 'vlan%d.filter_in' % number
else:
output_filename = 'vlan%d.filter_out' % number
with open(output_filename, 'a') as f:
f.write(line)

Python - confusion in how to code for reading file for updating a dynamic dictionary for lookup

I'm a total noob in python: How would i refactor the code below so that the dictionary d is actually a file on the filesystem that gets appended to by an email and the next incremental int id if a new email addresses is found in fr?
This is the fr file's file structure:
7#comp1.COM|4|11|GDSPV
7#comp1.COM|16|82|GDSPV
13#comp1.COM|16|82|GDSPV
The below is my program.. it masks email addresses with ids.
Notice that currently, I have hard-coded the d dictionary.
d= {
'7#comp1.COM': '199',
'8#comp4.COM': '200',
'13#comp1.COM': '205'
}
fr = open(sys.argv[1], 'r')
fw = open("masked_"+sys.argv[1], 'w')
cnt = 0
i = 1
line_list = []
for line in fr:
columns = line.split("|")
looking_for = columns[0] # this is what we need to search
if looking_for in d:
# by default, iterating over a dictionary will return keys
new_line = d[looking_for]+'|'+'|'.join(columns[1:])
line_list.append(new_line)
fw.writelines(line_list)
fr.close()
fw.close()
Also I would like to send multiple files through this program. Hundreds of them using wild card or somthing (*) for the fr file reader. Can you include that in the solution is possible. THANKS!!!
Take a look at the python shelve module. It is a dictionary-type object that is persistant to the filesystem (using pickle under the hood).
import shelve
filename = sys.argv[1]
d = shelve.open(filename)
d.update({
'7#comp1.COM': '199',
'8#comp4.COM': '200',
'13#comp1.COM': '205'
})
d.close()
d = shelve.open(filename)
print d
#{'8#comp4.COM': '200', '7#comp1.COM': '199', '13#comp1.COM': '205'}
d.keys()
#['8#comp4.COM', '13#comp1.COM', '7#comp1.COM']
d['13#comp1.COM']
#'205'
I would like to maintain and persist the dictionary d in a file.
pickle
Also I would like to send multiple files through this program.
glob
I coded the solution.
#!/usr/bin/python
d = {}
line_list=[]
fr = open(sys.argv[2], 'r')
fw = open(sys.argv[3]+"/masked_"+sys.argv[1], 'w')
with open(sys.argv[1], 'r+') as f:
for line in f:
(key, val) = line.split()
d[key] = val
for line in fr:
columns = line.split("|")
looking_for = columns[0] # this is what we need to search
if looking_for in d:
# by default, iterating over a dictionary will return keys
new_line = d[looking_for]+'|'+'|'.join(columns[1:])
line_list.append(new_line)
else:
new_idx = str(len(d)+1)
d[looking_for] = new_idx
kv = open(sys.argv[3], 'r+')
kv.write(looking_for+" "+new_idx)
kv.close()
new_line = d[looking_for]+'|'+'|'.join(columns[1:])
line_list.append(new_line)
fw.writelines(line_list)

Finding a line in a file then reading next few lines in Python

I have a plain text file with the following data:
id=1
name=Scott
occupation=Truck driver
age=23
id=2
name=Dave
occupation=Waiter
age=16
id=3
name=Susan
occupation=Computer programmer
age=29
I'm trying to work out the best way to get to any point in the file given an id string, then grab the rows underneath to extract the data for use in my program. I can do something like:
def get_person_by_id(id):
file = open('rooms', 'r')
for line in file:
if ("id=" + id) in line:
print(id + " found")
But I'm not sure how I can now go through the next bunch of lines and do line.split("=") or similar to extract the info (put into a list or dict or whatever) that I can use my program. Any pointers?
One option would be to load the entire thing into memory, which would save you from reading the file every time:
with open('rooms') as f:
chunks = f.read().split('\n\n')
people_by_id = {}
for chunk in chunks:
data = dict(row.split('=', 1) for row in chunk.split('\n'))
people_by_id[data['id']] = data
del data['id']
def get_person_by_id(id):
return people_by_id.get(id)
How about exiting from a for loop after finding the correct line:
def get_person_by_id(id):
file = open('rooms', 'r')
for line in file:
if ("id=" + id) in line:
print(id + " found")
break
#now you can continue processing your file:
next_line = file.readline()
Maybe:
d = dict()
with open(filename) as f:
for line in f:
k,v = line.split('=')
if 'id=' in line:
d[v] = {}
d[d.keys()[-1]][k] = v
And here is an iterative solution.
objects = []
current_object = None
with open("info.txt", "rb") as f:
for line in f:
line = line.strip("\r\n")
if not line:
current_object = None
continue
if current_object is None:
current_object = {}
objects.append(current_object)
key,_,value = line.partition('=')
current_object[key] = value
print objects
Another example of an iterative parser:
from itertools import takewhile
def entries(f):
e = {}
def read_one():
one = {}
for line in takewhile(lambda x: '=' in x, f):
key, val = line.strip().split('=')
one[key] = val
return one
while True:
one = read_one()
if not one:
break
else:
e[one.pop('id')] = one
return e
Example:
>>> with open('data.txt') as f:
..: print entries(f)['2']
{'age': '16', 'occupation': 'Waiter', 'name': 'Dave'}
Get all the person's attributes and values (i.e. id, name, occupation, age, etc..), till you find
an empy line.
def get_person_by_id(id):
person = {}
file = open('rooms', 'r')
for line in file:
if found == True:
if line.strip():
attr, value = line.split("="):
else:
return person
elif ("id=" + id) in line:
print(id + " found")
found = True
attr, value = line.split("=")
person[attr] = value
return person
This solution is a bit more forgiving of empty lines within records.
def read_persons(it):
person = dict()
for l in it:
try:
k, v = l.strip('\n').split('=', 1)
except ValueError:
pass
else:
if k == 'id': # New record
if person:
yield person
person = dict()
person[k] = v
if person:
yield person

Return lines from file in order of a list

I've tried to put together a solution from similar questions but have failed miserably. I just don't know enough about Python yet :(
I have an inputlist containing elements in a particular order ex: ["GRE", "KIN", "ERD", "KIN"]
I have a datafile containing the elements, plus other data ex:
"ERD","Data","Data"...
"KIN","Data","Data"...
"FAC","Data","Data"...
"GRE","Data","Data"...
I need to create an outputlist that contains the lines from the datafile in the order they appear in the inputlist.
The code below returns the outputlist in the order the appear in the datafile, which is not the intended behavior... :-\
with open(inputfile, 'r') as f:
names = [line.strip() for line in f]
outputlist = []
with open(datafile, 'r') as f:
for line in f:
name = line.split(',')[0]
if name[1:-1] in names:
outputlist.append(line)
output = open(outputfile, 'w')
output.writelines(outputlist)
How can I have it return the list in the proper order? Thanks in advance for your help :-)
Edit
Thank's to Oscar, this is the solution I implemented:
datafile = 'C:\\testing\\bldglist.txt'
inputfile = 'C:\\testing\\inputlist.txt'
outputfile = "C:\\testing\\output.txt"
with open(inputfile, 'r') as f:
inputlist = [line.strip() for line in f]
def outputList(inputlist, datafile, outputfile):
d = {}
with open(datafile, 'r') as f:
for line in f:
line = line.strip()
key = line.split(',')[0]
d[key] = line
with open(outputfile, 'w') as f:
f.write('"Abbrev","Xcoord","Ycoord"\n')
for key in inputlist:
f.write(d[key] + '\n')
outputList(inputlist, datafile, outputfile)
This is the easy solution. It reads the entire input file into memory as a dictionary of first letter: line. It's then easy to write the lines in the write order.
If the file is very large (gigabytes) or you don't have a lot of memory, there are other ways. But they're not nearly as nice.
I haven't tested this.
import csv
data = {}
with open(datafile) as f:
for line in csv.reader(f):
data[line[0]] = line
with open(outputfile, "w") as f:
f = csv.writer(f)
for entry in inputlist:
f.writerow(data[entry])
Assuming a data file with this format:
"ERD","Data","Data"...
"KIN","Data","Data"...
"FAC","Data","Data"...
"GRE","Data","Data"...
Try this solution:
def outputList(inputlist, datafile, outputfile):
d = {}
with open(datafile, 'r') as f:
for line in f:
line = line.lstrip()
key = line.split(',')[0]
d[key] = line
with open(outputfile, 'w') as f:
for key in inputlist:
f.write(d[key])
Use it like this:
outputList(['"GRE"', '"KIN"', '"ERD"', '"KIN"'],
'/path/to/datafile',
'/path/to/outputfile')
It will write the output file with the expected order.
1) Create a list with the elements you wish to map to. In this case, ["GRE", "KIN", "ERD", "FAC"]
2) Read the file and map (using a dictionary of lists) the first elements.
3) Output to a file.
import csv
out_index=["GRE", "KIN", "ERD", "FAC"]
d={}
with open('/Users/andrew/bin/SO/abcd.txt','r') as fr:
for e in csv.reader(fr):
if e[0] not in d: d[e[0]]=[]
for ea in e[1:]:
d[e[0]].append(ea)
for i in out_index:
print i,":"
for e in d[i]:
print ' ',e
Given this example data:
"ERD","Data-a1","Data-a2"
"KIN","Data-b1","Data-b2"
"FAC","Data-c1","Data-c2"
"GRE","Data-d1","Data-d2"
"ERD","Data-a3","Data-a4"
"GRE","Data-d3","Data-d4"
Output:
GRE :
Data-d1
Data-d2
Data-d3
Data-d4
KIN :
Data-b1
Data-b2
ERD :
Data-a1
Data-a2
Data-a3
Data-a4
FAC :
Data-c1
Data-c2
Done!

Categories

Resources