selective flattening in python nested dictionary and finding keys - python

i have the data in this format present in json file
[
{
"FIRST NAME": "Nasim",
"EMAIL": "ac#iaculisnec.net",
"ADDLINE1": "855-8805 Nunc. Avenue",
"CITY": "Masterton",
"LOCATION":{"ADDLINE2":"855-8805",
"ADDLINE3":"Avenue",
"PIN":"100"}
},
{
"FIRST NAME": "Xanthus",
"EMAIL": "adipiscing.elit#tinciduntcongue.edu",
"ADDLINE1": "357-4583 Curae; St.",
"CITY": "Basildon",
"LOCATION":{"ADDLINE2":"357-4583",
"ADDLINE3":"Curae; St.",
"PIN":"101"}
},
{
"FIRST NAME": "Hedley",
"EMAIL": "Quisque.libero.lacus#arcu.ca",
"ADDLINE1": "315-623 Nibh. Road",
"CITY": "Abingdon",
"LOCATION":{"ADDLINE2":"315-623",
"ADDLINE3":"Nibh. Road",
"PIN":"102"}
}]
this is my code
data=json.loads(file('grade.json').read())
for row in data:
row['ADDRESS']= row['ADDLINE1']+','+ row['CITY']
del row['CITY'], row['ADDLINE1']
row['LOCATION1']=row['LOCATION']['ADDLINE2']+','+row['LOCATION'] ['ADDLINE3']+','+row['LOCATION']['PIN']
del row['LOCATION']
data =json.loads(file('grade.json').read())
out = {}
for sub in data.values():
for key, value in sub.items():
if key in out:
del out[key]
else:
out[key] = value
print(out)
file('files','w').write(json.dumps(data))
out_path= "outfile9.csv"
fieldnames = list(set(k for d in data for k in d))
with open(out_path, 'wb') as out_file:
writer = csv.DictWriter(out_file, fieldnames=fieldnames, dialect='excel')
writer.writeheader()
writer.writerows(data)
i want to remove d nested dictionary(LOCATION1, here after formatting-previously was LOCATION) but retain ADDLINE2,3,PIN as the same. i want a flattened dictionary. what can i do to improvise it?
i require keys in this form
[firstname,email,address,location{addline2,addline3,pin}]
even if extra nested values are added it should dynamically appear in this form

data=json.loads(file('grade.json').read())
for row in data:
row['ADDRESS']= row['ADDLINE1']+','+ row['CITY']
del row['CITY'], row['ADDLINE1']
row['LOCATION1']=row['LOCATION']['ADDLINE2']+','+row['LOCATION'] ['ADDLINE3']+','+row['LOCATION']['PIN']
del row['LOCATION']
data =json.loads(file('grade.json').read())
the above is all useless because of the last line, resets data.
to flatten ADDLINE2,3,PIN , add in the above loop, before everything else
row['ADDLINE2'] = row['LOCATION']['ADDLINE2']
row['ADDLINE3'] = row['LOCATION']['ADDLINE3']
row['PIN '] = row['LOCATION']['PIN ']

Related

How to convert nested JSON files to CSV in python

I am completely new to python and trying to covert nested json files to csv. The current code I am trying to use is:
import json
def read_json(filename: str) -> dict:
try:
with open(filename, "r") as f:
data = json.loads(f.read())
except:
raise Exception(f"Reading {filename} file encountered an error")
return data
def normalize_json(data: dict) -> dict:
new_data = dict()
for key, value in data.items():
if not isinstance(value, dict):
new_data[key] = value
else:
for k, v in value.items():
new_data[key + "_" + k] = v
return new_data
def generate_csv_data(data: dict) -> str:
# Defining CSV columns in a list to maintain
# the order
csv_columns = data.keys()
# Generate the first row of CSV
csv_data = ",".join(csv_columns) + "\n"
# Generate the single record present
new_row = list()
for col in csv_columns:
new_row.append(str(data[col]))
# Concatenate the record with the column information
# in CSV format
csv_data += ",".join(new_row) + "\n"
return csv_data
def write_to_file(data: str, filepath: str) -> bool:
try:
with open(filepath, "w+") as f:
f.write(data)
except:
raise Exception(f"Saving data to {filepath} encountered an error")
def main():
# Read the JSON file as python dictionary
data = read_json(filename="test2.json")
# Normalize the nested python dict
new_data = normalize_json(data=data)
# Pretty print the new dict object
print("New dict:", new_data)
# Generate the desired CSV data
csv_data = generate_csv_data(data=new_data)
# Save the generated CSV data to a CSV file
write_to_file(data=csv_data, filepath=data2.csv")
if __name__ == '__main__':
main()
It works partly: I get a CSV file that contains all values. However, for the nested key fields it only gives me the "highest" level (e.g. I get "currentEmployments" but not "currentEmployments_firmId").
Could someone help me with this?
Sample json file:
{
"basicInformation": {
"individualId": 10000,
"firstName": "Name",
"middleName": "middleName.",
"lastName": "lastName",
"bcScope": "Active",
"iaScope": "NotInScope",
"daysInIndustryCalculatedDate": "1/1/2000"
},
"currentEmployments": [
{
"firmId": 001,
"firmName": "firm1",
"iaOnly": "N",
"registrationBeginDate": "1/1/2005",
"firmBCScope": "ACTIVE",
"firmIAScope": "ACTIVE",
"iaSECNumber": "10000",
"iaSECNumberType": "100",
"bdSECNumber": "1000",
"branchOfficeLocations": [
{
"locatedAtFlag": "Y",
"supervisedFromFlag": "N",
"privateResidenceFlag": "N",
"branchOfficeId": "10000",
"street1": "street1",
"city": "city",
"state": "MD",
"country": "United States",
"zipCode": "10000"
}
]
}
],
"currentIAEmployments": [],
"previousEmployments": [
{
"iaOnly": "N",
"bdSECNumber": "20000",
"firmId": 200,
"firmName": "firm2",
"street1": "street",
"city": "city",
"state": "MD",
"country": "UNITED STATES",
"zipCode": "10000",
}
],
"examsCount": {
"stateExamCount": 0,
"principalExamCount": 0,
"productExamCount": 1
},
}

How to add to python dictionary without needing a key

I am scraping some information off the web and want to show write the information into a JSON file with this format:
[
{
"name" : "name1",
"value" : 1
},
{
"name" : "name2",
"value" : 2
},
{
"name" : "name3",
"value" : 3
},
{
"name" : "name4",
"value" : 4
},
{
"name" : "name5",
"value" : 5
}
]
I am looping through everything I am scraping but don't know how to convert that information to this format. I tried to create a dictionary and then add to it after every loop but it does not give me the output I want.
dictionary = None
name = None
value = None
for item in someList:
name = item.name
value = item.value
dictionary[""] = {"name": name, "value": value}
with open("data.json", "w") as file:
json.dump(dictionary, file, indent=4)
Try this:
import json
myList = [{"name": item.name, "value": item.value} for item in someList]
with open("data.json", "w") as file:
json.dump(myList, file, indent=4)
The answer was simpler than I thought. I just needed to make a list of dictionaries and use that list in the json.dumps() function. Like this:
myList = list()
name = None
value = None
for item in someList:
name = item.name
value = item.value
myList.append({"name": name, "value": value})
with open("data.json", "w") as file:
json.dump(myList, file, indent=4)
The format you show is a list not a dictionary. So you can make a list and append to it the different dictionaries.
arr = []
for item in someList:
dictionary.append({"name": item.name, "value": item.value})
with open("data.json", "w") as file:
json.dump(array, file, indent=4)

ValueError: dict contains fields not in fieldnames: 'Reviews', 'Consumer_Feedback'

I am trying to convert json to csv. But I get the error:
ValueError: dict contains fields not in fieldnames: 'Reviews', 'Consumer_Feedback'
How to check that all keys are written?
This is what I've writen:
file_id = ''
with open(filename_jsonl, 'r') as f:
for line in f.read():
file_id += line
file_id = [json.loads(item + '\n}') for item in file_id.split('}\n')[0:-1]]
with open(filename_csv, 'a') as f:
writer = csv.DictWriter(f, file_id[0].keys(), delimiter=";")
writer.writeheader()
for profile in file_id:
writer.writerow(profile)
jsonl
{
"First_and_Last_Name": "Lori Anderson",
"Primary_Specialty": "Acupuncturist",
"Practice": null,
"Education": [],
"Phone": "(405) 943-0377",
"Address": "5701 NW 23rd St, Oklahoma City, OK 73127"
}
{
"First_and_Last_Name": "Joe Wojciechowski, D.C.",
"Primary_Specialty": "Chiropractor",
"Practice": "13",
"Education": [
"Palmer College of Chiropractic"],
"Consumer_Feedback": "(1 Review)",
"Reviews": [
"\r\nDr. Joe is an amazing chiropractor. He continues to educate himself and incorporates everything he learns into his practice."],
"Phone": "(405) 878-6611",
"Address": "18877 Ferdondo Dr., Earlsboro, OK 74840"
}
Any and all help is appreciated. Thanks!

convert csv file to multiple nested json format

I have written a code to convert csv file to nested json format. I have multiple columns to be nested hence assigning separately for each column. The problem is I'm getting 2 fields for the same column in the json output.
import csv
import json
from collections import OrderedDict
csv_file = 'data.csv'
json_file = csv_file + '.json'
def main(input_file):
csv_rows = []
with open(input_file, 'r') as csvfile:
reader = csv.DictReader(csvfile, delimiter='|')
for row in reader:
row['TYPE'] = 'REVIEW', # adding new key, value
row['RAWID'] = 1,
row['CUSTOMER'] = {
"ID": row['CUSTOMER_ID'],
"NAME": row['CUSTOMER_NAME']
}
row['CATEGORY'] = {
"ID": row['CATEGORY_ID'],
"NAME": row['CATEGORY']
}
del (row["CUSTOMER_NAME"], row["CATEGORY_ID"],
row["CATEGORY"], row["CUSTOMER_ID"]) # deleting since fields coccuring twice
csv_rows.append(row)
with open(json_file, 'w') as f:
json.dump(csv_rows, f, sort_keys=True, indent=4, ensure_ascii=False)
f.write('\n')
The output is as below:
[
{
"CATEGORY": {
"ID": "1",
"NAME": "Consumers"
},
"CATEGORY_ID": "1",
"CUSTOMER_ID": "41",
"CUSTOMER": {
"ID": "41",
"NAME": "SA Port"
},
"CUSTOMER_NAME": "SA Port",
"RAWID": [
1
]
}
]
I'm getting 2 entries for the fields I have assigned using row[''].
Is there any other way to get rid of this? I want only one entry for a particular field in each record.
Also how can I convert the keys to lower case after reading from csv.DictReader(). In my csv file all the columns are in upper case and hence I'm using the same to assign. But I want to convert all of them to lower case.
In order to convert the keys to lower case, it would be simpler to generate a new dict per row. BTW, it should be enough to get rid of the duplicate fields:
for row in reader:
orow = collection.OrderedDict()
orow['type'] = 'REVIEW', # adding new key, value
orow['rawid'] = 1,
orow['customer'] = {
"id": row['CUSTOMER_ID'],
"name": row['CUSTOMER_NAME']
}
orow['category'] = {
"id": row['CATEGORY_ID'],
"name": row['CATEGORY']
}
csv_rows.append(orow)

Python - how to convert CSV to JSON and choose which columns to drop or put in front?

I have example CSV file:
zipcode,firstname,lastname,email
12345,mike,alson,mike#m.com
54321,joe,don,joe#j.com
Need to create output file in this format:
SET 12345 '{"firstname": "mike", "lastname": "alson", "email": "mike#m.com"}'
SET 54321 '{"firstname": "joe", "lastname": "don", "email": "joe#j.com"}'
How to achieve this?
I started with something like:
#!/usr/bin/python
import csv, json
csvreader = csv.reader(open('data.csv', 'rb'), delimiter=',', quotechar='"')
data = []
for row in csvreader:
r = []
for field in row:
if field == '': field = None
else: field = unicode(field, 'ISO-8859-1')
r.append(field)
data.append(r)
jsonStruct = {
'header': data[0],
'data': data[1:]
}
open('data.json', 'wb').write(json.dumps(jsonStruct))
Read your rows into a csv.DictReader, then write the zip code separately with a serialized JSON formatted string from json.dumps() of the remaining elements:
from csv import DictReader
from json import dumps
with open("rows.csv") as csv_file, open("output.txt", mode="w") as out_file:
csv_reader = DictReader(csv_file)
for row in csv_reader:
zipcode = row["zipcode"]
rest = {k: v for k, v in row.items() if k != "zipcode"}
out_file.write("SET %s '%s'\n" % (zipcode, dumps(rest)))
output.txt:
SET 12345 '{"firstname": "mike", "lastname": "alson", "email": "mike#m.com"}'
SET 54321 '{"firstname": "joe", "lastname": "don", "email": "joe#j.com"}'
Note: As of Python3.6, DictReader returns rows of collections.OrderedDict, which will maintain order when writing to the file. For lower versions, you will need to handle this manually.

Categories

Resources