Python Noob issue with populating dictionary from file. Then updating dict and writing back to file - python

The code below is supposed to lookup first column (key) from a file Dict_file and replace the first column of another file fr, with the value of the key found from dict_file. But it keeps the dict_file as an updated dictionary for future lookups.
Every time the code is run, it initializes a dictionary from that dict_file file. If it finds a new email address from another file, it adds it to the bottom of the dict_file.
It should work fine according to my understanding because if it doesn't find an # symbol it assigns looking_for the value of "Dummy#dummy.com".. Dummy#dummy.com should be appended to the bottom of dict_file.
But for some reason, I keep getting new lines and blank lines appended along with other new emails at the end of the dict_file. I can't be writing blanks and newlines to the end of the dict_file.
Why is this happening? Whats wrong in the code below, my brain is about to explode! Any help will be greatly appreciated!
#!/usr/bin/python
import sys
d = {}
line_list=[]
alist=[]
f = open(sys.argv[3], 'r') # Map file
for line in f:
alist = line.split()
key = alist[0]
value = alist[1]
d[str(key)] = str(value)
alist=[]
f.close()
fr = open(sys.argv[1], 'r') # source file
fw = open(sys.argv[2]+"/masked_"+sys.argv[1], 'w') # target file
for line in fr:
columns = line.split("|")
looking_for = columns[0] # this is what we need to search
if looking_for in d:
# by default, iterating over a dictionary will return keys
if not looking_for.find("#"):
looking_for == "Dummy#dummy.com"
new_line = d[looking_for]+'|'+'|'.join(columns[1:])
line_list.append(new_line)
else:
new_line = d[looking_for]+'|'+'|'.join(columns[1:])
line_list.append(new_line)
else:
new_idx = str(len(d)+1)
d[looking_for] = new_idx
kv = open(sys.argv[3], 'a')
kv.write("\n"+looking_for+" "+new_idx)
kv.close()
new_line = d[looking_for]+'|'+'|'.join(columns[1:])
line_list.append(new_line)
fw.writelines(line_list)
Here is the dict_file:
WHATEmail#SIMPLE.COM 223
SamHugan#CR.COM 224
SAMASHER#CATSTATIN.COM 225
FAKEEMAIL#SLOW.com 226
SUPERMANN#MYMY.COM 227
Here is the fr file that gets the first column turned into the id from the dict_file lookup:
WHATEmail#SIMPLE.COM|12|1|GDSP
FAKEEMAIL#SLOW.com|13|7|GDFP
MICKY#FAT.COM|12|1|GDOP
SUPERMANN#MYMY.COM|132|1|GUIP
MONITOR|132|1|GUIP
|132|1|GUIP
00 |12|34|GUILIGAN

Firstly, you need to ignore blanks in your initial dictionary read, otherwise you will get an index out of range error when you run this script again. Do the same when you read via the fr object to avoid entering nulls. Wrap your email check condition further out for greater scope. Do a simple check for the "#" using the find method. And you're good to go.
Try the below. This should work:
#!/usr/bin/python
import sys
d = {}
line_list=[]
alist=[]
f = open(sys.argv[3], 'r') # Persisted Dictionary File
for line in f:
line = line.strip()
if line =="":
continue
alist = line.split()
key = alist[0]
value = alist[1]
d[str(key)] = str(value)
alist=[]
f.close()
fr = open(sys.argv[1], 'r') # source file
fw = open(sys.argv[2]+"/masked_"+sys.argv[1], 'w') # Target Directory Location
for line in fr:
line = line.strip()
if line == "":
continue
columns = line.strip().split('|')
if columns[0].find("#") > 1:
looking_for = columns[0] # this is what we need to search
else:
looking_for = "Dummy#dummy.com"
if looking_for in d:
# by default, iterating over a dictionary will return keys
new_line = d[looking_for]+'|'+'|'.join(columns[1:])
line_list.append(new_line)
else:
new_idx = str(len(d)+1)
d[looking_for] = new_idx
kv = open(sys.argv[3], 'a')
kv.write(looking_for+" "+new_idx+'\n')
kv.close()
new_line = d[looking_for]+'|'+'|'.join(columns[1:])
line_list.append(new_line)
fw.writelines(line_list)

Related

Incorrectly reading lines of a text File in python

So basically i want to iterate the lines of a text file that has this format:
-----------------------------------------
Code: 0123456789
EGGS: 3 7.00 21.00
BACON: 1 3.50 3.50
COFFEE: 2 14.20 28.40
TOTAL: 52.90
-----------------------------------------
and i have the following code to read the lines one by one:
with open(filename, "rt", encoding="utf-8") as f:
for line in f:
prevline = line
line.split()
if '-' in line:
temp = f.readline().split(':') #Get Code
print(temp)
AFM = temp[1]
print(AFM)
else:
tempProducts = line.split(':') #Get Product in a list
productName = tempProducts[0] #Store Product Name in a variable
productStats = tempProducts[1] #Store Product Stats in a list
productStats = productStats.split(" ")
for value in productStats:
valueArray.append(float(value))
products.update({productName:valueArray})
if '-' in f.readline():
rec = Receipt(AFM,products)
products={}
valueArray=[]
receipts.append(rec)
else:
line=prevline
mind that i want to skip the line with the '------------' characters the code works but it keeps reading second line then fourth then sixth(code,bacon,total). The question is how can i fix this.Edit: there are multiple receipts in the file so i need each time to skip the line with the'----------'.
with open(filename, "rt", encoding="utf-8") as f:
old_list = [] # Saving all the lines including '----'
for line in f:
old_list.append(line)
new_list = old_list[1:-1] # new list which removes the '----' lines
You can iterate just through new_list with your .split logic.
See if this does the job
with open(filename, "rt", encoding="utf-8") as f:
valueArray = []
for line in f:
if not '-' in line:
if 'Code' in line:
AFM = line.split(':')[1]
print(AFM)
valueArray = []
products = {}
else:
tempProducts = line.split(':') # Get Product in a list
productName = tempProducts[0] # Store Product Name in a variable
productStats = tempProducts[1] # Store Product Stats in a list
productStats_list = productStats.split(" ")
for value in productStats:
valueArray.append(float(value))
products.update({productName: valueArray})
if 'TOTAL' in line:
rec = Receipt(AFM, products)
receipts.append(rec)
To anyone seeing this post now consider it closed i do not provide enough information and the code was messed up. Sorry for wasting your time

Converting text file to list of dictionaries

I have written a script to convert a text file into dictionary..
script.py
l=[]
d={}
count=0
f=open('/home/asha/Desktop/test.txt','r')
for row in f:
rowcount+=1
if row[0] == ' ' in row:
l.append(row)
else:
if count == 0:
temp = row
count+=1
else:
d[temp]=l
l=[]
count=0
print d
textfile.txt
Time
NtGetTickCount
NtQueryPerformanceCounter
NtQuerySystemTime
NtQueryTimerResolution
NtSetSystemTime
NtSetTimerResolution
RtlTimeFieldsToTime
RtlTimeToTime
System informations
NtQuerySystemInformation
NtSetSystemInformation
Enumerations
Structures
The output i have got is
{'Time\n': [' NtGetTickCount\n', ' NtQueryPerformanceCounter\n', ' NtQuerySystemTime\n', ' NtQueryTimerResolution\n', ' NtSetSystemTime\n', ' NtSetTimerResolution\n', ' RtlTimeFieldsToTime\n', ' RtlTimeToTime\n']}
Able to convert upto 9th line in the text file. Suggest me where I am going wrong..
You never commit (i.e. run d[row] = []) the final list to the dictionary.
You can simply commit when you create the row:
d = {}
cur = []
for row in f:
if row[0] == ' ': # line in section
cur.append(row)
else: # new row
d[row] = cur = []
print (d)
Using dict.setdefault to create dictionary with lists as values will make your job easier.
d = {}
with open('input.txt') as f:
key = ''
for row in f:
if row.startswith(' '):
d.setdefault(key, []).append(row.strip())
else:
key = row
print(d)
Output:
{'Time\n': ['NtGetTickCount', 'NtQueryPerformanceCounter', 'NtQuerySystemTime', 'NtQueryTimerResolution', 'NtSetSystemTime', 'NtSetTimerResolution', 'RtlTimeFieldsToTime', 'RtlTimeToTime'], 'System informations\n': ['NtQuerySystemInformation', 'NtSetSystemInformation', 'Enumerations', 'Structures']}
A few things to note here:
Always use with open(...) for file operations.
If you want to check the first index, or the first few indices, use str.startswith()
The same can be done using collections.defaultdict:
from collections import defaultdict
d = defaultdict(list)
with open('input.txt') as f:
key = ''
for row in f:
if row.startswith(' '):
d[key].append(row)
else:
key = row
So you need to know two things at any given time while looping over the file:
1) Are we on a title level or content level (by indentation) and
2) What is the current title
In the following code, we first check if the current line we are at, is a title (so it does not start with a space) and set the currentTitle to that as well as insert that into our dictionary as a key and an empty list as a value.
If it is not a title, we just append to corresponding title's list.
with open('49359186.txt', 'r') as input:
topics = {}
currentTitle = ''
for line in input:
line = line.rstrip()
if line[0] != ' ':
currentTitle = line
topics[currentTitle] = []
else:
topics[currentTitle].append(line)
print topics
Try this:
d = {}
key = None
with open('/home/asha/Desktop/test.txt','r') as file:
for line in file:
if line.startswith(' '):
d[key].append(line.strip())
else:
key = line.strip(); d[key] = []
print(d)
Just for the sake of adding in my 2 cents.
This problem is easier to tackle backwards. Consider iterating through your file backwards and then storing the values into a dictionary whenever a header is reached.
f=open('test.txt','r')
d = {}
l = []
for row in reversed(f.read().split('\n')):
if row[0] == ' ':
l.append(row)
else:
d.update({row: l})
l = []
Just keep track the line which start with ' ' and you are done with one loop only :
final=[]
keys=[]
flag=True
with open('new_text.txt','r') as f:
data = []
for line in f:
if not line.startswith(' '):
if line.strip():
keys.append(line.strip())
flag=False
if data:
final.append(data)
data=[]
flag=True
else:
if flag==True:
data.append(line.strip())
final.append(data)
print(dict(zip(keys,final)))
output:
{'Example': ['data1', 'data2'], 'Time': ['NtGetTickCount', 'NtQueryPerformanceCounter', 'NtQuerySystemTime', 'NtQueryTimerResolution', 'NtSetSystemTime', 'NtSetTimerResolution', 'RtlTimeFieldsToTime', 'RtlTimeToTime'], 'System informations': ['NtQuerySystemInformation', 'NtSetSystemInformation', 'Enumerations', 'Structures']}

make list of CVS file w/o header

this function finds the gives the population of either a specific state or all the states all the states when parameters is left blank. when I leave the parameters blanks though it gives me a error because there are headers in the first 3 rows of the file.
def findpop(state=None):
f=open(getMediaPath("population_state_reduced (2).csv"),"rt")
for line in f:
parts = line.split(',')
if state is None:
return [(parts[4], int(parts[5]))]
else:
for line in f:
if parts[4] == state.capitalize():
return int(parts[5])
print findpop()
Just skip the first 3 lines:
def findpop(state=None):
f = open(getMediaPath("population_state_reduced (2).csv"), "rt")
index = 1
for line in f:
if index > 3:
parts = line.split(',')
if state is None:
return [(parts[4], int(parts[5]))]
else:
for line in f:
if parts[4] == state.capitalize():
return int(parts[5])
index += 1
print findpop()

Use python to extract lines that contain different keywords into dictionaries

So have an input file to script like as follows:
20248109|Generic|1|xxx|2|yyy|LINEA|68.66|68.67|True|2920958141272
.
.
.
21248109|Generic|3|xxx|4|www|LINEB|7618|7622|True|2920958281071.97
want the python script to iterate through and put LINEA into dictionary like as follows {{1:[68.66,68.67]},{3:[7618,7622]}}
here's as far as i've gotten:
Key = ["LINEA", "LINEB"]
fin = open(path)
test = []
for line in fin.readlines():
if True in [item in line for item in Key]:
test.append(line)
Any help at all would be fantastic.
First, you should use the csv module:
import csv
with open(path, "rb") as infile:
reader = csv.reader(infile, delimiter="|")
Then, you can iterate over the lines:
test = []
for row in reader:
if row[6] in Key:
test.append({int(row[2]): row[7:9]})
I would do this:
keys = ["LINEA", "LINEB"]
with open(path) as fin
answer = {line.partition("Generic|")[-1]:line for line in fin if any(key in line for key in keys)}
To edit your answer directly, you're actually quite close:
Key = ["LINEA", "LINEB"]
fin = open(path)
test = {} # dictionary
for line in fin.readlines():
if True in [item in line for item in Key]:
dict_key = line.partition("Generic|")[-1]
test[dict_key] = line

Python - confusion in how to code for reading file for updating a dynamic dictionary for lookup

I'm a total noob in python: How would i refactor the code below so that the dictionary d is actually a file on the filesystem that gets appended to by an email and the next incremental int id if a new email addresses is found in fr?
This is the fr file's file structure:
7#comp1.COM|4|11|GDSPV
7#comp1.COM|16|82|GDSPV
13#comp1.COM|16|82|GDSPV
The below is my program.. it masks email addresses with ids.
Notice that currently, I have hard-coded the d dictionary.
d= {
'7#comp1.COM': '199',
'8#comp4.COM': '200',
'13#comp1.COM': '205'
}
fr = open(sys.argv[1], 'r')
fw = open("masked_"+sys.argv[1], 'w')
cnt = 0
i = 1
line_list = []
for line in fr:
columns = line.split("|")
looking_for = columns[0] # this is what we need to search
if looking_for in d:
# by default, iterating over a dictionary will return keys
new_line = d[looking_for]+'|'+'|'.join(columns[1:])
line_list.append(new_line)
fw.writelines(line_list)
fr.close()
fw.close()
Also I would like to send multiple files through this program. Hundreds of them using wild card or somthing (*) for the fr file reader. Can you include that in the solution is possible. THANKS!!!
Take a look at the python shelve module. It is a dictionary-type object that is persistant to the filesystem (using pickle under the hood).
import shelve
filename = sys.argv[1]
d = shelve.open(filename)
d.update({
'7#comp1.COM': '199',
'8#comp4.COM': '200',
'13#comp1.COM': '205'
})
d.close()
d = shelve.open(filename)
print d
#{'8#comp4.COM': '200', '7#comp1.COM': '199', '13#comp1.COM': '205'}
d.keys()
#['8#comp4.COM', '13#comp1.COM', '7#comp1.COM']
d['13#comp1.COM']
#'205'
I would like to maintain and persist the dictionary d in a file.
pickle
Also I would like to send multiple files through this program.
glob
I coded the solution.
#!/usr/bin/python
d = {}
line_list=[]
fr = open(sys.argv[2], 'r')
fw = open(sys.argv[3]+"/masked_"+sys.argv[1], 'w')
with open(sys.argv[1], 'r+') as f:
for line in f:
(key, val) = line.split()
d[key] = val
for line in fr:
columns = line.split("|")
looking_for = columns[0] # this is what we need to search
if looking_for in d:
# by default, iterating over a dictionary will return keys
new_line = d[looking_for]+'|'+'|'.join(columns[1:])
line_list.append(new_line)
else:
new_idx = str(len(d)+1)
d[looking_for] = new_idx
kv = open(sys.argv[3], 'r+')
kv.write(looking_for+" "+new_idx)
kv.close()
new_line = d[looking_for]+'|'+'|'.join(columns[1:])
line_list.append(new_line)
fw.writelines(line_list)

Categories

Resources