Convert complex nested JSON to csv using python - python

I got a complex nested JSON file and I need to convert it completely, but my code can only convert part of the information,
How can I modify my code, or do you guys have a better code?
my json file
import csv
import json
import sys
import codecs
def trans(path):
jsonData = codecs.open('H://14.json', 'r', 'utf-8')
# csvfile = open(path+'.csv', 'w')
# csvfile = open(path+'.csv', 'wb')
csvfile = open('H://11.csv', 'w', encoding='utf-8', newline='')
writer = csv.writer(csvfile, delimiter=',')
flag = True
for line in jsonData:
dic = json.loads(line)
if flag:
keys = list(dic.keys())
print(keys)
writer.writerow(keys)
flag = False
writer.writerow(list(dic.values()))
jsonData.close()
csvfile.close()
if __name__ == '__main__':
path=str(sys.argv[0])
print(path)
trans(path)
my json file
{"id":"aa","sex":"male","name":[{"Fn":"jeri","Ln":"teri"}],"age":45,"info":[{"address":{"State":"NY","City":"new york"},"start_date":"2001-09","title":{"name":"Doctor","Exp":"head"},"year":"2001","month":"05"}],"other":null,"Hobby":[{"smoking":null,"gamble":null}],"connect":[{"phone":"123456789","email":"info#gmail.com"}],"Education":"MBA","School":{"State":"NY","City":"new york"}}
{"id":"aa","sex":"female","name":[{"Fn":"lo","Ln":"li"}],"age":34,"info":[{"address":{"State":"NY","City":"new york"},"start_date":"2008-11","title":{"name":"Doctor","Exp":"hand"},"year":"2008","month":"02"}],"other":null,"Hobby":[{"smoking":null,"gamble":null}],"connect":[{"phone":"123456789","email":"info#gmail.com"}],"Education":"MBA","School":{"State":"NY","City":"new york"}}
It only converts part of the information, 'name''info''Hobby''connect''School' these information are not converted,I need to convert all information completely,

You could use the below function to treat each dic. It will flatten the dict by recursive calls until there is no dict or list in the values. In order to avoid issues with 2 keys having the same name, I concatenate with the previous level.
WARNING: it is based on your format so if you have lists with more than one element in the middle, it will only take care of the first element.
def flatten_dict(input_dict, result = None):
result = result or {}
for key, value in input_dict.items():
if isinstance(value, list):
current_dict = {key+"_"+k: v for k, v in value[0].items()}
flatten_dict(current_dict, result)
elif isinstance(value, dict):
current_dict = {key+"_"+k: v for k, v in value.items()}
flatten_dict(current_dict, result)
else:
result[key] = value
return result
Then apply this function on each dic, transform to Dataframe and save as CSV.
res = []
for line in jsonData:
dic = json.loads(line)
res.append(flatten_dict(dic))
res_df = pd.DataFrame(res)
res_df.to_csv("result.csv")
Result:

Related

Read dictionary from file into original lists

I have one nested list, and one list for "numbers"
test_keys = [["tobbe", "kalle"],["karl", "Clara"],["tobbe"],["tank"]]
test_values = ['123', '234','345','456']
res = {}
for key in test_keys:
for value in test_values:
res[value] = key
test_values.remove(value)
break
with open("myfile.txt", 'w') as f:
for key, value in res.items():
f.write('%s;%s;\n' % (key, value))
This provides the file
123;['tobbe', 'kalle'];
234;['karl', 'Clara'];
345;['finis'];
456;['tank'];
now I want to load the data back into the a dictionary without the ";" and later on back into the corresponding lists.
Try this:
res = {}
with open("myfile.txt") as file:
for line in file:
chunks = line.split(';')
names = chunks[1][1:-1].split(', ')
res[chunks[0]] = [name[1:-1] for name in names]
print(res)
test_keys = []
test_values = []
for key in res:
test_keys.append(key)
test_values.append(res[key])
print(test_keys)
print(test_values)

Set value for a nested key in YAML

I need a function which will allow setting a nested value in YAML file. For instance, for a YAML like:
LEVEL_1:
LEVEL_2:
LEVEL_3: some_value
I would do something like:
update_yaml_value("LEVEL_1.LEVEL_2.LEVEL_3", "new_value")
I'd appreciate any help. Thanks in advance.
First of all you need to import yaml:
import yaml
When you load some yaml file you will get a python dict object.
with open('/path/to/smth.yaml', 'r') as f:
yaml_data = yaml.safe_load(f)
To have the ability to change it in way you described you can create function like next:
def update_yaml_value(long_key, data):
keys = long_key.split('.')
accessable = yaml_data
for k in keys[:-1]:
accessable = accessable[k]
accessable[keys[-1]] = data
And then save this yaml file:
with open('/path/to/smth.yaml', 'w+') as f:
yaml.dump(yaml_data, f, default_flow_style=False)
Your python code will load the YAML into a dict.
With the dict you will be able to update the value.
a_dict = load_yaml() # to be implemented by the OP
a_dict['level_1']['level_2']['level_3'] = 'a_new_value'
You can use this function to change a dict value recersivly.
test.py:
#!/usr/bin/env python3
import yaml
def change_config(conf, key, value):
if isinstance(conf, dict):
for k, v in conf.items():
if k == key:
conf[k] = value
elif isinstance(v, dict):
change_config(v, key, value)
with open("smth.yaml", "r") as f:
yaml_data = yaml.load(f, Loader=yaml.Loader)
print(yaml_data)
change_config(yaml_data, "level3", "hassan")
print(yaml_data)
smth.yaml:
level0:
level1:
level3: test
Terminal Output:
{'level0': {'level1': {'level3': 'test'}}}
{'level0': {'level1': {'level3': 'hassan'}}}

How to sort the keys in a dictionary alphabetically and put them in a list?

So far I have this code which is creating a dictionary from an input file:
def read_file(filename):
with open("menu1.csv") as file:
file.readline()
for line in file:
line_strip = [line.rstrip('\n')]
lines= [line.split(',')]
result = {key: (float(fl), int(intg),
text.strip()) for key,
fl, intg,text in lines}
print(result)
read_file("menu1.csv")
I have to keep that code in that def format. However, this outputs 27 different dictionaries. How do I make it so it is all in ONE dictionary?
ALso:
I want to alphabetize the keys and put them into a list. I tried something like this but it won't work:
def alphabetical_menu(dict):
names = []
for name in d:
names.append(name)
names.sort()
print(names)
What am I doing wrong? or do you have a way to do it?
Is this what you wanted?
def read_file(filename):
result = {}
with open(filename) as file:
file.readline()
for line in file:
line_strip = line.rstrip()
line_split= line.split(',')
key, fl, intg, text = tuple(line_split)
result[key] = (float(fl), int(intg), text.strip())
return result
def alphabetical_menu(d):
return sorted(d.keys())
menu_dict = read_file("menu1.csv")
menu_sorted_keys = alphabetical_menu(menu_dict)
# To check the result
print(menu_dict)
print(menu_sorted_keys)

Python coding issue for csv read

Have a simple code where
goal:
open a csv file as list print it --> worked
open a csv file as dictionary
print it --> working
modify it --> is the code correct for it?
print again --> not working
using Pycharm for debug and can't identify the issue. Any help will be highly appreciated.
import sys
import csv
def print_csv_list(list_in):
"""
function takes a list of lists and prints # of lines instructed by counter parameter
:param list_in: list of lists
:return: no return
"""
counter = 0
for line in list_in:
if counter < 2:
for item in line:
sys.stdout.write(item.strip(",") + "\t")
sys.stdout.flush()
print("\n")
counter +=1
def print_csv_file(file_dict):
for dict_item in file_dict:
print dict_item
def modify_dict(file_dict):
print_csv_file(file_dict)
for dict_item in file_dict:
for k, v in dict_item.iteritems():
if k == "ral_file":
dict_item[k] = v.strip("_regs")
print_csv_file(file_dict)
def parse_ral_file(csvfile):
with open(csvfile, 'r')as print_file:
file_read = csv.reader(print_file, delimiter=',')
print_csv_file(file_read)
with open(csvfile, 'r')as dict_file:
file_dict = csv.DictReader(dict_file, delimiter=',')
modify_dict(file_dict)
if __name__ == "__main__":
x = sys.argv[1]
parse_ral_file(x)
When you iterate through a generator (including a file), you leave the pointer at the end. This means any subsequent iteration will result in empty content. You need to use seek and make the pointer go back to the start of the file.
with open(csvfile, 'r')as dict_file:
modify_dict(dict_file)
def modify_dict(dict_file):
file_dict = csv.DictReader(dict_file, delimiter=',')
print_csv_file(file_dict)
dict_file.seek(0) # If you remove this line, the second `print_csv_file`
# won't print anything
print_csv_file(file_dict)

Edit CSV file in python which reads values from another json file in python

I wanted to edit a csv file which reads the value from one of my another json file in python 2.7
my csv is : a.csv
a,b,c,d
,10,12,14
,11,14,15
my json file is a.json
{"a":20}
i want my where the column 'a' will try to match in json file. if their is a match. it should copy that value from json and paste it to my csv file and the final output of my csv file should be looks like this.
a,b,c,d
20,10,12,14
20,11,14,15
Till now I what I have tried is
fileCSV = open('a.csv', 'a')
fileJSON = open('a.json', 'r')
jsonData = fileJSON.json()
for k in range(jsonData):
for i in csvRow:
for j in jsonData.keys():
if i == j:
if self.count == 0:
self.data = jsonData[j]
self.count = 1
else:
self.data = self.data + "," + jsonData[j]
self.count = 0
fileCSV.write(self.data)
fileCSV.write("\n")
k += 1
fileCSV.close()
print("File created successfully")
I will be really thankful if anyone can help me for this.
please ignore any syntactical and indentation error.
Thank You.
Some basic string parsing will get you here.. I wrote a script which works for the simple scenario which you refer to.
check if this solves your problem:
import json
from collections import OrderedDict
def list_to_csv(listdat):
csv = ""
for val in listdat:
csv = csv+","+str(val)
return csv[1:]
lines = []
csvfile = "csvfile.csv"
outcsvfile = "outcsvfile.csv"
jsonfile = "jsonfile.json"
with open(csvfile, encoding='UTF-8') as a_file:
for line in a_file:
lines.append(line.strip())
columns = lines[0].split(",")
data = lines[1:]
whole_data = []
for row in data:
fields = row.split(",")
i = 0
rowData = OrderedDict()
for column in columns:
rowData[columns[i]] = fields[i]
i += 1
whole_data.append(rowData)
with open(jsonfile) as json_file:
jsondata = json.load(json_file)
keys = list(jsondata.keys())
for key in keys:
value = jsondata[key]
for each_row in whole_data:
each_row[key] = value
with open(outcsvfile, mode='w', encoding='UTF-8') as b_file:
b_file.write(list_to_csv(columns)+'\n')
for row_data in whole_data:
row_list = []
for ecolumn in columns:
row_list.append(row_data.get(ecolumn))
b_file.write(list_to_csv(row_list)+'\n')
CSV output is not written to the source file but to a different file.
The output file is also always truncated and written, hence the 'w' mode.
I would recommend using csv.DictReader and csv.DictWriter classes which will read into and out of python dicts. This would make it easier to modify the dict values that you read in from the JSON file.

Categories

Resources