Converting nested JSON to CSV without hard coding column values - python

I am a rookie at Python and I have data files that I would like to convert from JSON to CSV. The issues are that my code returns an error I am unable to resolve and the data varies from file to file and I would like to have one script that can be applied to multiple files by just changing the file location. I would like to not hard code company name and company type but i don't know how to go about that. The data is structured as follows:
{
"company_name": "Google",
"company_type": "Public",
"employees": [{
"staff": [{
"name": "John Doe",
"type": "FTE",
"id": "1111111111",
"region": "Northeast"
}, {
"name": "Jane Doe",
"type": "FTE",
"id": "222222222",
"region": "Northwest"
}],
"setup": [{
"description": "Onsite",
"location": "New York City"
}, {
"description": "Hybrid",
"location": "Seattle"
}],
"role": [{
"description": "Business Analyst",
"salary": "70000"
}, {
"description": "Manager",
"salary": "90000"
}]
}, {
"contractors": [{
"name": "Jessica Smith",
"type": "PTE",
"id": "333333333",
"region": "Southeast"
}],
"setup": [{
"description": "Remote",
"location": "Miami"
}],
"role": [{
"description": "Project Manager",
"salary": "80000"
}]
}]
}
The code I have so far is:
import json
import csv
import ijson
file = open("C:/Users/User1/sample_file.json","w")
file_writer = csv.writer(file)
file_writer.writerow(("Company Name","Company Type","Name","Type","ID","Region","Description","Location","Description","Salary"))
with open("C:/Users/User1/sample_file.json","rb") as f:
company_name = "Google"
company_type = "Public"
for record in ijson.items(f,"employees.item"):
name = record['staff'][0]['name']
type = record['staff'][0]['type']
id = record['staff'][0]['id']
region = record['staff'][0]['region']
description = record['setup'][0]['description']
location = record['setup'][0]['location']
description = record['role'][0]['description']
salary = record['role'][0]['salary']
file_writer.writerow((comapny_name, company_type, name, type, id, region, description, location, description, salary))
file.close()
Any help is greatly appreciated.

Assuming that all of your files have the same general structure, using a csv.DictWriter should work. Just iterate through the employee sections creating a single dictionary to represent each employee and call writer.writerow() once all of the data has been collected.
For example:
import csv
import json
data = json.load(open(filename))
columns = ["company name","company type","name","type","id","region","description","location","salary"]
def convert(data, headers):
with open("employees.csv", "wt") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=headers, extrasaction="ignore", restval=None)
writer.writeheader()
for emp_type in data["employees"]:
lst = []
for _, v in emp_type.items():
for i,x in enumerate(v):
if len(lst) <= i:
lst.append({"company name": data["company_name"],
"company type": data["company_type"]})
lst[i].update(x)
for item in lst:
writer.writerow(item)
convert(data, columns)
OUTPUT
company name,company type,name,type,id,region,description,location,salary
Google,Public,John Doe,FTE,1111111111,Northeast,Business Analyst,New York City,70000
Google,Public,Jane Doe,FTE,222222222,Northwest,Manager,Seattle,90000
Google,Public,Jessica Smith,PTE,333333333,Southeast,Project Manager,Miami,80000

Related

Retrieve data from json file using python

I'm new to python. I'm running python on Azure data bricks. I have a .json file. I'm putting the important fields of the json file here
{
"school": [
{
"schoolid": "mr1",
"board": "cbse",
"principal": "akseal",
"schoolName": "dps",
"schoolCategory": "UNKNOWN",
"schoolType": "UNKNOWN",
"city": "mumbai",
"sixhour": true,
"weighting": 3,
"paymentMethods": [
"cash",
"cheque"
],
"contactDetails": [
{
"name": "picsa",
"type": "studentactivities",
"information": [
{
"type": "PHONE",
"detail": "+917597980"
}
]
}
],
"addressLocations": [
{
"locationType": "School",
"address": {
"countryCode": "IN",
"city": "Mumbai",
"zipCode": "400061",
"street": "Madh",
"buildingNumber": "80"
},
"Location": {
"latitude": 49.313885,
"longitude": 72.877426
},
I need to create a data frame with schoolName as one column & latitude & longitude are others two columns. Can you please suggest me how to do that?
you can use the method json.load(), here's an example:
import json
with open('path_to_file/file.json') as f:
data = json.load(f)
print(data)
use this
import json # built-in
with open("filename.json", 'r') as jsonFile:
Data = jsonFile.load()
Data is now a dictionary of the contents exp.
for i in Data:
# loops through keys
print(Data[i]) # prints the value
For more on JSON:
https://docs.python.org/3/library/json.html
and python dictionaries:
https://www.programiz.com/python-programming/dictionary#:~:text=Python%20dictionary%20is%20an%20unordered,when%20the%20key%20is%20known.

Converting two CSV's into one json

This code produces the records in terminal but when I open the json it's blank. Can someone help me?
import csv
import json
refcsvfile = open('referralsource.csv', 'r')
jsonfile = open('redrock.json', 'w')
concsvfile = open('contacts.csv', 'r')
reffieldnames = ("ReferralSource_Name","OrganizationType","PrimaryRelationshipManager","ReferralSourceStatus","RSContactSourceType"
)
confieldnames = ("ReferralSource_Name","OrganizationName","IsOrganizationContact","FirstName","Middle","LastName","Role","Line1","City","State","Zip","Phone","PhoneType","PhonePrimary","OkToLeaveVM","PhoneActive","E-mail","OkToSendEmail","ContactPrimaryRelationshipManager","IsPrimaryContact","ContactSourceType"
)
refreader = csv.DictReader( refcsvfile, reffieldnames)
conreader = csv.DictReader( concsvfile, confieldnames)
output=[];
refcount=0
for refrow in refreader:
refrow['ReferralSourceContacts'] = []
output.append(refrow)
concsvfile.seek(0)
for conrow in conreader:
if(conrow["ReferralSource_Name"]==refrow["ReferralSource_Name"]):
refrow['ReferralSourceContacts'].append(conrow)
output.append(refrow)
refcount = refcount +1
print(output)
json.dump(output, jsonfile,sort_keys=True)
I am wanting the json to appear something like:
{
"ReferralSource_Name": "Demo Facility",
"OrganizationType": "Hospital",
"RSContactSourceType": "DirectInboundTelephone",
"ReferralSourceStatus": "Active",
"PrimaryRelationshipManager": "John Doe",
},
"ReferralSourceContacts": [
{
"IsOrganizationContact": true,
"OrganizationName": "Demo Facility",
"FirstName": "John",
"LastName": "Smith",
"Role": "Doctor",
"Line1": "123 abc Street",
"Zip": "44720",
"City": "Canton",
"State": "OH",
"Phone": "555-555-555",
"PhoneType": "Office",
"PhonePrimary": "True",
"PhoneActive": "True",
"Email": "doc#doc.doc",
"OkToLeaveVm": true,
"OkToSendEmail": true,
"ContactSourceType": "DirectInboundTelephone"
"ContactPrimaryRelationshipManager": "John Doe"
}
"IsOrganizationContact": true,
"OrganizationName": "Test Facility",
"FirstName": "Jane",
"LastName": "Smith",
"Role": "Doctor",
"Line1": "123 abc Street",
"Zip": "44720",
"City": "Canton",
"State": "OH",
"Phone": "555-555-555",
"PhoneType": "Office",
"PhonePrimary": "True",
"PhoneActive": "True",
"Email": "doc2#doc.doc",
"OkToLeaveVm": true,
"OkToSendEmail": true,
"ContactSourceType": "DirectInboundTelephone"
"ContactPrimaryRelationshipManager": "John Doe"
}
]
Basically I have a file for the parent entity referral source (think of as Companies), and another csv for the contacts (Think of them as the people at the companies). I need these two combined into the mentioned JSON for an import.
Something like this oughta work...
Use with to manage files.
Assuming the contacts list isn't huge, it's much better to load it into memory instead of seeking the file to the start on every row.
Even better, since it's now in memory, we can use a collections.defaultdict to pre-group by the referrer field, making creating the output a simple dict lookup.
import csv
import json
import collections
reffieldnames = (
"ReferralSource_Name",
"OrganizationType",
"PrimaryRelationshipManager",
"ReferralSourceStatus",
"RSContactSourceType",
)
confieldnames = (
"ReferralSource_Name",
"OrganizationName",
"IsOrganizationContact",
"FirstName",
"Middle",
"LastName",
"Role",
"Line1",
"City",
"State",
"Zip",
"Phone",
"PhoneType",
"PhonePrimary",
"OkToLeaveVM",
"PhoneActive",
"E-mail",
"OkToSendEmail",
"ContactPrimaryRelationshipManager",
"IsPrimaryContact",
"ContactSourceType",
)
# Read contacts into memory to avoid slow seek/re-read
with open("contacts.csv", "r") as concsvfile:
conreader = csv.DictReader(concsvfile, confieldnames)
con_rows = list(conreader)
# Group contacts by referrer for faster lookup
con_map = collections.defaultdict(list)
for con_row in con_rows:
con_map[con_row["ReferralSource_Name"]].append(con_row)
with open("referralsource.csv", "r") as refcsvfile:
output = []
for refrow in csv.DictReader(refcsvfile, reffieldnames):
refrow["ReferralSourceContacts"] = con_map[refrow["ReferralSource_Name"]]
output.append(refrow)
print("len(output):", len(output))
with open("redrock.json", "w") as jsonfile:
json.dump(output, jsonfile, sort_keys=True)

How to add a new dictionary on my JSON list

So I have a basic user.json file.
My question is: How can I add another dictionary to my JSON list?
So there is my code below, as you can see I tried this: append, update, insert, and I can't find any working result. The goal is to be able to add a new:
NAME: name1 // COUNTRY: coutry1 // GENDER: gender1... to the person JSON list....Thank you.
Python code
import json
with open("user.json") as f:
data = json.load(f)
new_dict = {"name": "name1",
"Country": "Country2",
"Gender": "Gender3"}
for person in data["person"]:
person.update(new_dict)
with open("user.json", "w") as f:
json.dump(data, f, indent=2)
user.json
{
"person": [
{
"name": "Peter",
"Country": "Montreal",
"Gender": "Male"
},
{
"name": "Alex",
"Country": "Laval",
"Gender": "Male"
},
{
"name": "Annie",
"Country": "Quebec",
"Gender": "Female"
},
{
"name": "Denise",
"Country": "Levis",
"Gender": "Female"
}
]
}
If you want to add another person object to the person list, all you have to go is to append the new object to the array of objects. You don't need to iterate over the person objects. Please check if my code below helps you:
with open("user.json") as f:
data = json.load(f)
new_dict = {"name": "name1",
"Country": "Country2",
"Gender": "Gender3"}
data["person"].append(new_dict)
with open("user.json", "w") as f:
json.dump(data, f, indent=2)
In the python code at person.update(new_dict), you are changing already existing entry person that will not add a new entry.

How to extract these data items from JSON file?

I have a JSON file and I would like to get 'id' value and 'key' value for each champion:
Here example with 2 champions of my champion.json file, but if I have 100 champions how can I do that?
{
"type": "champion",
"format": "standAloneComplex",
"version": "9.23.1",
"data": {
"Aatrox": {
"version": "9.23.1",
"id": "Aatrox",
"key": "266",
"name": "Aatrox",
"title": "the Darkin Blade"
},
"Ahri": {
"version": "9.23.1",
"id": "Ahri",
"key": "103",
"name": "Ahri",
"title": "the Nine-Tailed Fox"
}
}
}
My python file :
import json
all_data = open('champion.json', encoding="utf8")
data_champ = json.load(all_data)
for element in data_champ['data']:
print(data_champ[element]["key"])
print(data_champ[element]['id'])
In the structure of the JSON data in your question, the value associated with the data key is a dictionary-of-dictionaries, so you would need to access the value of each one of them like this:
import json
with open('champion.json', encoding="utf8") as all_data:
data_champ = json.load(all_data)
for value in data_champ['data'].values():
print(value["key"])
print(value['id'])
Output:
266
Aatrox
103
Ahri
I also changed the file handling to ensure it gets closed properly by using a with statement.

Need read some data from JSON

I need to make a get (id, name, fraction id) for each deputy in this json
{
"id": "75785",
"title": "(за основу)",
"asozdUrl": null,
"datetime": "2011-12-21T12:20:26+0400",
"votes": [
{
"deputy": {
"id": "99111772",
"name": "Абалаков Александр Николаевич",
"faction": {
"id": "72100004",
"title": "КПРФ"
}
},
"result": "accept"
},
{
"deputy": {
"id": "99100491",
"name": "Абдулатипов Рамазан Гаджимурадович",
"faction": {
"id": "72100024",
"title": "ЕР"
}
},
"result": "none"
}
.......,` etc
My code is looks like that:
urlData = "https://raw.githubusercontent.com/data-dumaGovRu/vote/master/poll/2011-12-21/75785.json"
response = urllib.request.urlopen(urlData)
content = response.read()
data = json.loads(content.decode("utf8"))
for i in data:
#print(data["name"])
`
And i dont know what to do with that #print line, how I should write it?
You can access the list containing the deputies with data['votes']. Iterating through the list, you can access the keys you're interested in as you would with dict key lookups. Nested dicts imply you have to walk through the keys starting from the root to your point of interest:
for d in data['votes']:
print(d['deputy']['id'], d['deputy']['name'], d['deputy']['faction']['id'])

Categories

Resources