I have a dynamic json, in whcih key(attributes) will be different based on records.
I want to write below json data into csv file:
[{"Id": "12345",
"attributes": {"Address": [{"label": "United Kingdom",
"value": {"AddressLine": [{"value": "Baker "
"Street"}]}},
{"label": "United States",
"value": {"AddressLine": [{"value": "Florida"}]}}],
"CountryCode": [{"value": "Australia"}],
"Identifiers": [{"value": {"Type": [{"value": "Licence Id"}]}},
{"value": {"Type": [{"value": "NPI"}]}}],
"StatusReasonCode": [{"value": "XXX"}],
"UniqueId": [{"value": "71581742"}]},
"createdBy": "Rahul"}]
Data is expected in below format of the csv:
ID, createdBy, CountryCode, StatusReasonCode, Identifiers, UniqueId, AddressLine
12345,Rahul,Australia,XXX,Licence Id,71581742,Baker Street
12345,Rahul,Australia,XXX,NPI,71581742,Florida
Here is my code to extract the data from json:
import json
with open('data.json') as f:
data = json.load(f)
for key,value in data.items():
if(type(value))==str:
print(key + ',' + value)
# global res
res =[]
if(type(value))==list:
for fg in value:
crosswalk_final=fg['value']
if (type(value))== dict:
for key1,val in value.items():
for k in val:
if type(k['value']) == dict:
for sub_key,sub_value in k.items():
if(type(sub_value)) == dict:
for child_key,child_value in sub_value.items():
if(type(child_value)) == list:
for m in child_value:
if type(m['value']) == dict:
for qaq,waq in m.items():
if (isinstance(waq, dict)):
for our,pur in waq.items():
for qq in pur:
print(our+','+qq['value'])
else:
pass
print(key1+'_'+sub_key+'_'+child_key+','+m['value'])
else:
attr1=(key+'_'+key1+','+k['value'])
print((attr1))
The above code is giving me result in below format:
Id,12345
createdBy,Rahul
attributes_UniqueId,71581742
attributes_CountryCode,Australia
attributes_StatusReasonCode,XXX
Address_value_AddressLine,Baker Street
Address_value_AddressLine,Florida
Identifiers_value_Type,Licence Id
Identifiers_value_Type,NPI
However I am not sure how to write it in my csv (in the same format shown above).
Write a function that flattens the dictionary into a list with constant length. If key is missing, set value to None.
Example:
data_dict = {"a": 1, "c": 12, "b": 0}
data_list = [data_dict.get("a"), data_dict.get("b"), data_dict.get("c")]
Then insert in csv.
Related
I am completely new to python and trying to covert nested json files to csv. The current code I am trying to use is:
import json
def read_json(filename: str) -> dict:
try:
with open(filename, "r") as f:
data = json.loads(f.read())
except:
raise Exception(f"Reading {filename} file encountered an error")
return data
def normalize_json(data: dict) -> dict:
new_data = dict()
for key, value in data.items():
if not isinstance(value, dict):
new_data[key] = value
else:
for k, v in value.items():
new_data[key + "_" + k] = v
return new_data
def generate_csv_data(data: dict) -> str:
# Defining CSV columns in a list to maintain
# the order
csv_columns = data.keys()
# Generate the first row of CSV
csv_data = ",".join(csv_columns) + "\n"
# Generate the single record present
new_row = list()
for col in csv_columns:
new_row.append(str(data[col]))
# Concatenate the record with the column information
# in CSV format
csv_data += ",".join(new_row) + "\n"
return csv_data
def write_to_file(data: str, filepath: str) -> bool:
try:
with open(filepath, "w+") as f:
f.write(data)
except:
raise Exception(f"Saving data to {filepath} encountered an error")
def main():
# Read the JSON file as python dictionary
data = read_json(filename="test2.json")
# Normalize the nested python dict
new_data = normalize_json(data=data)
# Pretty print the new dict object
print("New dict:", new_data)
# Generate the desired CSV data
csv_data = generate_csv_data(data=new_data)
# Save the generated CSV data to a CSV file
write_to_file(data=csv_data, filepath=data2.csv")
if __name__ == '__main__':
main()
It works partly: I get a CSV file that contains all values. However, for the nested key fields it only gives me the "highest" level (e.g. I get "currentEmployments" but not "currentEmployments_firmId").
Could someone help me with this?
Sample json file:
{
"basicInformation": {
"individualId": 10000,
"firstName": "Name",
"middleName": "middleName.",
"lastName": "lastName",
"bcScope": "Active",
"iaScope": "NotInScope",
"daysInIndustryCalculatedDate": "1/1/2000"
},
"currentEmployments": [
{
"firmId": 001,
"firmName": "firm1",
"iaOnly": "N",
"registrationBeginDate": "1/1/2005",
"firmBCScope": "ACTIVE",
"firmIAScope": "ACTIVE",
"iaSECNumber": "10000",
"iaSECNumberType": "100",
"bdSECNumber": "1000",
"branchOfficeLocations": [
{
"locatedAtFlag": "Y",
"supervisedFromFlag": "N",
"privateResidenceFlag": "N",
"branchOfficeId": "10000",
"street1": "street1",
"city": "city",
"state": "MD",
"country": "United States",
"zipCode": "10000"
}
]
}
],
"currentIAEmployments": [],
"previousEmployments": [
{
"iaOnly": "N",
"bdSECNumber": "20000",
"firmId": 200,
"firmName": "firm2",
"street1": "street",
"city": "city",
"state": "MD",
"country": "UNITED STATES",
"zipCode": "10000",
}
],
"examsCount": {
"stateExamCount": 0,
"principalExamCount": 0,
"productExamCount": 1
},
}
I have a json file, and I'm reading this file with json library
This is the json content (example)
{
"type": "champion",
"format": "standAloneComplex",
"version": "10.18.1",
"data": {
"Aatrox": {
"version": "10.18.1",
"id": "Aatrox",
"key": "266",
"name": "Aatrox"
},
"Ahri": {
"version": "10.18.1",
"id": "Ahri",
"key": "103",
"name": "Ahri",
},
}
Now how can I check if key is equal to 266 and return the value of name?
I was trying with something like this
import json
with open('./source/champion.json') as json_file:
data_champs = json.load(json_file)['data']
for champ in data_champs:
for champ_info in data_champs[champ]:
if champ['key'] == 266:
print(champ)
But return TypeError: string indices must be integers
Try the following:
import json
with open('./source/champion.json') as json_file:
for name, info in json.load(json_file)['data'].items():
if info['key'] == 266:
print(name)
Or even better, we can close the file after we get the data and not keep it open during processing:
import json
with open('./source/champion.json') as json_file:
data = json.load(json_file)['data']
for name, info in data.items():
if info['key'] == 266:
print(name)
Explanation
The easiest way to iterate over a dict's elements is by using its .items() method:
for key, value in d.items():
print(key, "-->", value)
below (iterating over the values only since the keys are not important here)
import json
with open('data.json') as f:
data = json.load(f)['data']
for v in data.values():
if v['key'] == '266':
print(v['name'])
break
output
Aatrox
Here you go:
import json
with open('champion.json') as json_file:
data_champs = json.load(json_file)['data']
for data in data_champs.keys():
if data_champs[data]['key']=='266':
print(data_champs[data]['name'])
Prints:
Aatrox
I have following JSON, returned from a REST service, where I want to generate a unique names for each value by combining parent keys. For example. name+phone+address+city+name , name+phone+address+city+population+skilled+male and so on.
{
"name": "name",
"phone": "343444444",
"address": {
"lat": 23.444,
"lng": 34.3322,
"city":{
"name": "city name",
"population": {
"skilled": {
"male": 2,
"female": 4
},
"uneducated": {
"male": 20,
"femail": 4
}
}
}
},
"email": "email",
"education": "phd"
}
I want to combine all key names starting from the parent of the JSON tree.
Here is what I am doing
class TestJson
def walk_through(self, json_object):
for k, v in json_object.items():
self.x_path = self.x_path + k
if type(v) is dict:
self.walk_through(v)
else:
print(self.x_path)
self.x_path = ""
This code is printing keys but only starting from the current parent node. I want to combine all keys up to root of the json.
If you ignore the name and phone keys, since they are not ancestors of city name or skilled male and the order of keys is not guaranteed, you can recursively build a flattened dict.
def walk_through(json_object):
d = {}
for k, v in json_object.items():
if isinstance(v, dict):
v = walk_through(v)
for vk, vv in v.items():
d["%s+%s" % (k, vk)] = vv
else:
d[k] = v
return d
print(json.dumps(walk_through(json_object), indent=2))
This prints:
{
"address+city+population+skilled+male": 2,
"name": "name",
"address+lng": 34.3322,
"address+city+name": "city name",
"address+lat": 23.444,
"address+city+population+uneducated+male": 20,
"phone": "343444444",
"address+city+population+uneducated+femail": 4,
"education": "phd",
"email": "email",
"address+city+population+skilled+female": 4
}
Note: this ignores lists an will not find dicts inside them.
If you want to print all keys of your python dict you can do the following:
def print_keys(d):
for key, value in d.iteritems():
print key,
if isinstance(value, dict):
print_keys(value)
Ok so I have the following list of dictionarys that I am trying to convert to a json file:
geojson_list = [
{'name': 'Parallelogram1', 'coordinates':
[[115.67097179583487, -32.36672530921233], [115.96656222999665,
-32.36672530921233], [115.90410905434761, -32.49580085924758], [115.60851862018583, -32.49580085924758], [115.67097179583487,
-32.36672530921233]], 'area': 0.0381534978746},
{'name': 'Parallelogram2', 'coordinates': [[116.00622565359758,
-32.5791364092627], [116.02283522420637, -32.5791364092627], [116.02126260408991, -32.59706839673082], [116.00465303348112,
-32.59706839673082], [116.00622565359758, -32.5791364092627]],'area': 0.000297842612008}
]
This is the converter code named GeojsonConverter.py:
import json
def convert_to_geojson(my_list):
"""
This function converts a list of dictionaries into GeoJSON format
The dictionaries require a "coordinates" key whose value will be a 2D
list, a "name" key, with all other additional data.
:param my_list: A list of dictionaries
:return: a GeoJSON string
"""
try:
for d in my_list:
coord_list = d["coordinates"]
name = d["name"]
for coord in coord_list:
float(coord[0])
float(coord[1])
except ValueError:
print "ValueError: Coordinate cannot be converted to float."
return "ValueError: Coordinate cannot be converted to float."
except KeyError:
print "KeyError: No 'coordinates' or 'name' key found in dictionary"
return "KeyError: No 'coordinates' or 'name' key found in dictionary"
except Exception as e:
raise e
else:
feature_list = []
property_dict = {}
for d in my_list:
coord_list = d["coordinates"]
coord_list.append(d["coordinates"][0])
name = d["name"]
for key in d:
if (key is not "name") and (key is not "coordinates"):
property_dict[key] = d[key]
the_geom = {"type": "MultiPolygon", "coordinates": [[coord_list]]}
feature = {"type": "Feature", "geometry": the_geom, "name": name, "properties": property_dict}
feature_list.append(feature)
feature_collection = {"type": "FeatureCollection", "features": feature_list}
return json.dumps(feature_collection)
The converter converts the list just fine right up until the area key. I keep getting the last value in area of the last dictionary for all dictionary areas, so in this case all areas = 0.000297842612008
This is the json file I get after running the list through the converter and writing it to a file:
{ "type": "FeatureCollection", "features": [{"geometry": {"type":
"MultiPolygon", "coordinates": [[[[115.67097179583487,
-32.36672530921233], [115.96656222999665, -32.36672530921233], [115.90410905434761, -32.49580085924758], [115.60851862018583,
-32.49580085924758], [115.67097179583487, -32.36672530921233]]]]}, "type": "Feature", "name": "Parallelogram1", "properties": {"area":
0.000629970457642}},
{"geometry": {"type": "MultiPolygon", "coordinates": [[[[116.00622565359758, -32.5791364092627],
[116.02283522420637, -32.5791364092627], [116.02126260408991,
-32.59706839673082], [116.00465303348112, -32.59706839673082], [116.00622565359758, -32.5791364092627]]]]}, "type": "Feature",
"name": "Parallelogram2", "properties": {"area": 0.000629970457642} }
Notice the two different areas equal the same result when they should not.
The following code is how I am writing to a file.
import GeojsonConverter
my_geojson_string = GeojsonConverter2.convert_to_geojson(geojson_list)
name = "test"
try:
name = name[:-4] #subtract .csv from name to add a character onto the end of the file name. Eg. zzza.csv, not zzz.csva
with open("./datafiles/" + name + "JSON" + ".geojson", 'w') as jsondata: #Save json data into nameJSON.geojson
try:
print ""
print ("Writing json file: " + name + "JSON" + ".geojson")
jsondata.write(my_geojson_string)
except:
print "Error writing to file. FN: write to file"
sys.exit()
except:
print "Error opening file. FN: geojson output"
Where am I going wrong?
edit:
changed the last bit of the converter code to this
for d in my_list:
coord_list = d["coordinates"]
coord_list.append(d["coordinates"][0])
name = d["name"]
area_list = d["area"]
for key in d:
if (key is not "name") and (key is not "coordinates") and (key is not "area"):
property_dict[key] = d[key]
the_geom = {"type": "MultiPolygon", "coordinates": [[coord_list]]}
feature = {"type": "Feature", "geometry": the_geom, "name": name, "area": area_list, "properties": property_dict, }
feature_list.append(feature)
feature_collection = {"type": "FeatureCollection", "features": feature_list}
You are having a problem caused by variable reuse.
Every run through the for d in mylist: modifies property_dict, which then gets added to the feature_list. The next time through the loop, you modify the same property_dict, which overwrites the previous data. Moveing the property_dict = {} into the outer loop will fix this problem.
i have the data in this format present in json file
[
{
"FIRST NAME": "Nasim",
"EMAIL": "ac#iaculisnec.net",
"ADDLINE1": "855-8805 Nunc. Avenue",
"CITY": "Masterton",
"LOCATION":{"ADDLINE2":"855-8805",
"ADDLINE3":"Avenue",
"PIN":"100"}
},
{
"FIRST NAME": "Xanthus",
"EMAIL": "adipiscing.elit#tinciduntcongue.edu",
"ADDLINE1": "357-4583 Curae; St.",
"CITY": "Basildon",
"LOCATION":{"ADDLINE2":"357-4583",
"ADDLINE3":"Curae; St.",
"PIN":"101"}
},
{
"FIRST NAME": "Hedley",
"EMAIL": "Quisque.libero.lacus#arcu.ca",
"ADDLINE1": "315-623 Nibh. Road",
"CITY": "Abingdon",
"LOCATION":{"ADDLINE2":"315-623",
"ADDLINE3":"Nibh. Road",
"PIN":"102"}
}]
this is my code
data=json.loads(file('grade.json').read())
for row in data:
row['ADDRESS']= row['ADDLINE1']+','+ row['CITY']
del row['CITY'], row['ADDLINE1']
row['LOCATION1']=row['LOCATION']['ADDLINE2']+','+row['LOCATION'] ['ADDLINE3']+','+row['LOCATION']['PIN']
del row['LOCATION']
data =json.loads(file('grade.json').read())
out = {}
for sub in data.values():
for key, value in sub.items():
if key in out:
del out[key]
else:
out[key] = value
print(out)
file('files','w').write(json.dumps(data))
out_path= "outfile9.csv"
fieldnames = list(set(k for d in data for k in d))
with open(out_path, 'wb') as out_file:
writer = csv.DictWriter(out_file, fieldnames=fieldnames, dialect='excel')
writer.writeheader()
writer.writerows(data)
i want to remove d nested dictionary(LOCATION1, here after formatting-previously was LOCATION) but retain ADDLINE2,3,PIN as the same. i want a flattened dictionary. what can i do to improvise it?
i require keys in this form
[firstname,email,address,location{addline2,addline3,pin}]
even if extra nested values are added it should dynamically appear in this form
data=json.loads(file('grade.json').read())
for row in data:
row['ADDRESS']= row['ADDLINE1']+','+ row['CITY']
del row['CITY'], row['ADDLINE1']
row['LOCATION1']=row['LOCATION']['ADDLINE2']+','+row['LOCATION'] ['ADDLINE3']+','+row['LOCATION']['PIN']
del row['LOCATION']
data =json.loads(file('grade.json').read())
the above is all useless because of the last line, resets data.
to flatten ADDLINE2,3,PIN , add in the above loop, before everything else
row['ADDLINE2'] = row['LOCATION']['ADDLINE2']
row['ADDLINE3'] = row['LOCATION']['ADDLINE3']
row['PIN '] = row['LOCATION']['PIN ']