Python coding issue for csv read - python

Have a simple code where
goal:
open a csv file as list print it --> worked
open a csv file as dictionary
print it --> working
modify it --> is the code correct for it?
print again --> not working
using Pycharm for debug and can't identify the issue. Any help will be highly appreciated.
import sys
import csv
def print_csv_list(list_in):
"""
function takes a list of lists and prints # of lines instructed by counter parameter
:param list_in: list of lists
:return: no return
"""
counter = 0
for line in list_in:
if counter < 2:
for item in line:
sys.stdout.write(item.strip(",") + "\t")
sys.stdout.flush()
print("\n")
counter +=1
def print_csv_file(file_dict):
for dict_item in file_dict:
print dict_item
def modify_dict(file_dict):
print_csv_file(file_dict)
for dict_item in file_dict:
for k, v in dict_item.iteritems():
if k == "ral_file":
dict_item[k] = v.strip("_regs")
print_csv_file(file_dict)
def parse_ral_file(csvfile):
with open(csvfile, 'r')as print_file:
file_read = csv.reader(print_file, delimiter=',')
print_csv_file(file_read)
with open(csvfile, 'r')as dict_file:
file_dict = csv.DictReader(dict_file, delimiter=',')
modify_dict(file_dict)
if __name__ == "__main__":
x = sys.argv[1]
parse_ral_file(x)

When you iterate through a generator (including a file), you leave the pointer at the end. This means any subsequent iteration will result in empty content. You need to use seek and make the pointer go back to the start of the file.
with open(csvfile, 'r')as dict_file:
modify_dict(dict_file)
def modify_dict(dict_file):
file_dict = csv.DictReader(dict_file, delimiter=',')
print_csv_file(file_dict)
dict_file.seek(0) # If you remove this line, the second `print_csv_file`
# won't print anything
print_csv_file(file_dict)

Related

JSON file rewrites data after I re-run the program

I wanted to make a program that tracks the progress of our competition. I made a library containing our names as the key, and our wins as the value. I then made a JSON file to save the progress. But for some reason, when I re-run the program, it goes back to it's initial values and adds the new one; as if it was the first time I used the program.
Here is my code:
import os, json, sys
Numbers = {
"Peter" : 1,
"Drew" : 1,
}
def q1():
New_numbers = {}
q = input("Name? ")
if q not in Numbers:
Numbers[q] =1
with open("list.json", "w") as f:
json.dump(Numbers, f)
f.close()
with open("list.json", "r") as f:
New_numbers = json.load(f)
for key,value in New_numbers.items():
print(key, ":", value)
elif q in Numbers:
Numbers[q] += 1
with open("list.json", "w") as f:
json.dump(Numbers, f)
f.close()
with open("list.json", "r") as f:
New_numbers = json.load(f)
for key,value in New_numbers.items():
print(key, ":", value)
q1()
The first use, it works perfectly. However, as I've mentioned before, when I use it again, it loads the initial library; not the JSON file.

Convert complex nested JSON to csv using python

I got a complex nested JSON file and I need to convert it completely, but my code can only convert part of the information,
How can I modify my code, or do you guys have a better code?
my json file
import csv
import json
import sys
import codecs
def trans(path):
jsonData = codecs.open('H://14.json', 'r', 'utf-8')
# csvfile = open(path+'.csv', 'w')
# csvfile = open(path+'.csv', 'wb')
csvfile = open('H://11.csv', 'w', encoding='utf-8', newline='')
writer = csv.writer(csvfile, delimiter=',')
flag = True
for line in jsonData:
dic = json.loads(line)
if flag:
keys = list(dic.keys())
print(keys)
writer.writerow(keys)
flag = False
writer.writerow(list(dic.values()))
jsonData.close()
csvfile.close()
if __name__ == '__main__':
path=str(sys.argv[0])
print(path)
trans(path)
my json file
{"id":"aa","sex":"male","name":[{"Fn":"jeri","Ln":"teri"}],"age":45,"info":[{"address":{"State":"NY","City":"new york"},"start_date":"2001-09","title":{"name":"Doctor","Exp":"head"},"year":"2001","month":"05"}],"other":null,"Hobby":[{"smoking":null,"gamble":null}],"connect":[{"phone":"123456789","email":"info#gmail.com"}],"Education":"MBA","School":{"State":"NY","City":"new york"}}
{"id":"aa","sex":"female","name":[{"Fn":"lo","Ln":"li"}],"age":34,"info":[{"address":{"State":"NY","City":"new york"},"start_date":"2008-11","title":{"name":"Doctor","Exp":"hand"},"year":"2008","month":"02"}],"other":null,"Hobby":[{"smoking":null,"gamble":null}],"connect":[{"phone":"123456789","email":"info#gmail.com"}],"Education":"MBA","School":{"State":"NY","City":"new york"}}
It only converts part of the information, 'name''info''Hobby''connect''School' these information are not converted,I need to convert all information completely,
You could use the below function to treat each dic. It will flatten the dict by recursive calls until there is no dict or list in the values. In order to avoid issues with 2 keys having the same name, I concatenate with the previous level.
WARNING: it is based on your format so if you have lists with more than one element in the middle, it will only take care of the first element.
def flatten_dict(input_dict, result = None):
result = result or {}
for key, value in input_dict.items():
if isinstance(value, list):
current_dict = {key+"_"+k: v for k, v in value[0].items()}
flatten_dict(current_dict, result)
elif isinstance(value, dict):
current_dict = {key+"_"+k: v for k, v in value.items()}
flatten_dict(current_dict, result)
else:
result[key] = value
return result
Then apply this function on each dic, transform to Dataframe and save as CSV.
res = []
for line in jsonData:
dic = json.loads(line)
res.append(flatten_dict(dic))
res_df = pd.DataFrame(res)
res_df.to_csv("result.csv")
Result:

Why is this code just deleting the whole contents of the file?

I am making a code that checks if a certain user name is in a text file.
If it is, it stores the score. However, once it reaches more than 3 scores it deletes the oldest to maintain it at 3 scores.
Here is my code:
if userclass=="1":
filefordataclass1 = open("Class1scores.txt", "a"); #this opens/creates a new text file
filefordataclass1.write(str(username) + ":" + str(score))#this converts the
filefordataclass1.write("\n")
user_scores = {}
with open("Class1scores.txt", "r+")as file:
file.seek(0)
scores = file.readlines()
for line in scores:
name, scores = line.rstrip('\n').split(':',1)
if name not in user_scores:
user_scores[name] = deque(maxlen=3)
temp_q = user_scores[name]
temp_q.append(str(score))
user_scores[name] = temp_q
filehandle=open("Class1scores.txt", "w")
for key, values in user_scores.items():
filehandle.write(name + ',')
filehandle.write(','.join(list(values)) + '\n')
filehandle.close()# Initialize score list
filefordataclass1.close
If you can tell me what is wrong with the python code and how to fix it It would be much appreciated.
Don't chance your file multiple times. First read the contents, then add the new score, then write everything:
from collections import defaultdict, deque
if userclass=="1":
user_scores = defaultdict(lambda: deque(maxlen=3))
with open("Class1scores.txt", "r") as lines:
for line in lines:
name, scores = line.rstrip('\n').split(':',1)
user_scores[name].extend(scores.split(','))
user_scores[username].append(str(score))
with open("Class1scores.txt", "w") as output:
for key, values in user_scores.items():
filehandle.write('%s:%s\n' % (key, ','.join(list(values))))
Otherwise you are lost in searching for errors.
You should open the output file with "a" (append) instead of "w" (write).
no need to open the file again in write mode as you have already opened the file in read/write mode with r+.Use seek and truncate after storing the file data in variable. Code is as follows:
from collections import defaultdict, deque
userclass = "1"
if userclass=="1":
user_scores = defaultdict(lambda: deque(maxlen=3))
f = open("Class1scores.txt", "r+")
lines = f.readlines()
print lines
for line in lines:
name, scores = line.rstrip().split(':')
user_scores[name].extend(scores.split(','))
if len(user_scores) > 0:
f.seek(0)
f.truncate()
for key, values in user_scores.items():
f.write('%s:%s\n' % (key, ','.join(list(values))))
f.close()
hope this helps :)

Edit CSV file in python which reads values from another json file in python

I wanted to edit a csv file which reads the value from one of my another json file in python 2.7
my csv is : a.csv
a,b,c,d
,10,12,14
,11,14,15
my json file is a.json
{"a":20}
i want my where the column 'a' will try to match in json file. if their is a match. it should copy that value from json and paste it to my csv file and the final output of my csv file should be looks like this.
a,b,c,d
20,10,12,14
20,11,14,15
Till now I what I have tried is
fileCSV = open('a.csv', 'a')
fileJSON = open('a.json', 'r')
jsonData = fileJSON.json()
for k in range(jsonData):
for i in csvRow:
for j in jsonData.keys():
if i == j:
if self.count == 0:
self.data = jsonData[j]
self.count = 1
else:
self.data = self.data + "," + jsonData[j]
self.count = 0
fileCSV.write(self.data)
fileCSV.write("\n")
k += 1
fileCSV.close()
print("File created successfully")
I will be really thankful if anyone can help me for this.
please ignore any syntactical and indentation error.
Thank You.
Some basic string parsing will get you here.. I wrote a script which works for the simple scenario which you refer to.
check if this solves your problem:
import json
from collections import OrderedDict
def list_to_csv(listdat):
csv = ""
for val in listdat:
csv = csv+","+str(val)
return csv[1:]
lines = []
csvfile = "csvfile.csv"
outcsvfile = "outcsvfile.csv"
jsonfile = "jsonfile.json"
with open(csvfile, encoding='UTF-8') as a_file:
for line in a_file:
lines.append(line.strip())
columns = lines[0].split(",")
data = lines[1:]
whole_data = []
for row in data:
fields = row.split(",")
i = 0
rowData = OrderedDict()
for column in columns:
rowData[columns[i]] = fields[i]
i += 1
whole_data.append(rowData)
with open(jsonfile) as json_file:
jsondata = json.load(json_file)
keys = list(jsondata.keys())
for key in keys:
value = jsondata[key]
for each_row in whole_data:
each_row[key] = value
with open(outcsvfile, mode='w', encoding='UTF-8') as b_file:
b_file.write(list_to_csv(columns)+'\n')
for row_data in whole_data:
row_list = []
for ecolumn in columns:
row_list.append(row_data.get(ecolumn))
b_file.write(list_to_csv(row_list)+'\n')
CSV output is not written to the source file but to a different file.
The output file is also always truncated and written, hence the 'w' mode.
I would recommend using csv.DictReader and csv.DictWriter classes which will read into and out of python dicts. This would make it easier to modify the dict values that you read in from the JSON file.

Python Noob issue with populating dictionary from file. Then updating dict and writing back to file

The code below is supposed to lookup first column (key) from a file Dict_file and replace the first column of another file fr, with the value of the key found from dict_file. But it keeps the dict_file as an updated dictionary for future lookups.
Every time the code is run, it initializes a dictionary from that dict_file file. If it finds a new email address from another file, it adds it to the bottom of the dict_file.
It should work fine according to my understanding because if it doesn't find an # symbol it assigns looking_for the value of "Dummy#dummy.com".. Dummy#dummy.com should be appended to the bottom of dict_file.
But for some reason, I keep getting new lines and blank lines appended along with other new emails at the end of the dict_file. I can't be writing blanks and newlines to the end of the dict_file.
Why is this happening? Whats wrong in the code below, my brain is about to explode! Any help will be greatly appreciated!
#!/usr/bin/python
import sys
d = {}
line_list=[]
alist=[]
f = open(sys.argv[3], 'r') # Map file
for line in f:
alist = line.split()
key = alist[0]
value = alist[1]
d[str(key)] = str(value)
alist=[]
f.close()
fr = open(sys.argv[1], 'r') # source file
fw = open(sys.argv[2]+"/masked_"+sys.argv[1], 'w') # target file
for line in fr:
columns = line.split("|")
looking_for = columns[0] # this is what we need to search
if looking_for in d:
# by default, iterating over a dictionary will return keys
if not looking_for.find("#"):
looking_for == "Dummy#dummy.com"
new_line = d[looking_for]+'|'+'|'.join(columns[1:])
line_list.append(new_line)
else:
new_line = d[looking_for]+'|'+'|'.join(columns[1:])
line_list.append(new_line)
else:
new_idx = str(len(d)+1)
d[looking_for] = new_idx
kv = open(sys.argv[3], 'a')
kv.write("\n"+looking_for+" "+new_idx)
kv.close()
new_line = d[looking_for]+'|'+'|'.join(columns[1:])
line_list.append(new_line)
fw.writelines(line_list)
Here is the dict_file:
WHATEmail#SIMPLE.COM 223
SamHugan#CR.COM 224
SAMASHER#CATSTATIN.COM 225
FAKEEMAIL#SLOW.com 226
SUPERMANN#MYMY.COM 227
Here is the fr file that gets the first column turned into the id from the dict_file lookup:
WHATEmail#SIMPLE.COM|12|1|GDSP
FAKEEMAIL#SLOW.com|13|7|GDFP
MICKY#FAT.COM|12|1|GDOP
SUPERMANN#MYMY.COM|132|1|GUIP
MONITOR|132|1|GUIP
|132|1|GUIP
00 |12|34|GUILIGAN
Firstly, you need to ignore blanks in your initial dictionary read, otherwise you will get an index out of range error when you run this script again. Do the same when you read via the fr object to avoid entering nulls. Wrap your email check condition further out for greater scope. Do a simple check for the "#" using the find method. And you're good to go.
Try the below. This should work:
#!/usr/bin/python
import sys
d = {}
line_list=[]
alist=[]
f = open(sys.argv[3], 'r') # Persisted Dictionary File
for line in f:
line = line.strip()
if line =="":
continue
alist = line.split()
key = alist[0]
value = alist[1]
d[str(key)] = str(value)
alist=[]
f.close()
fr = open(sys.argv[1], 'r') # source file
fw = open(sys.argv[2]+"/masked_"+sys.argv[1], 'w') # Target Directory Location
for line in fr:
line = line.strip()
if line == "":
continue
columns = line.strip().split('|')
if columns[0].find("#") > 1:
looking_for = columns[0] # this is what we need to search
else:
looking_for = "Dummy#dummy.com"
if looking_for in d:
# by default, iterating over a dictionary will return keys
new_line = d[looking_for]+'|'+'|'.join(columns[1:])
line_list.append(new_line)
else:
new_idx = str(len(d)+1)
d[looking_for] = new_idx
kv = open(sys.argv[3], 'a')
kv.write(looking_for+" "+new_idx+'\n')
kv.close()
new_line = d[looking_for]+'|'+'|'.join(columns[1:])
line_list.append(new_line)
fw.writelines(line_list)

Categories

Resources