Remove object from JSON whose values are NaN using Python? - python

My final output JSON file is in following format
[
{
"Type": "UPDATE",
"resource": {
"site ": "Lakeside mh041",
"name": "Total Flow",
"unit": "CubicMeters",
"device": "2160 LaserFlow Module",
"data": [
{
"timestamp": [
"1087009200"
],
"value": [
6945.68
]
},
{
"timestamp": [
"1087095600"
],
"value": [
NaN
]
},
{
"timestamp": [
"1087182000"
],
"value": [
7091.62
]
},
I want to remove the whole object if the "value" is NaN.
Expected Output
[
{
"Type": "UPDATE",
"resource": {
"site ": "Lakeside mh041",
"name": "Total Flow",
"unit": "CubicMeters",
"device": "2160 LaserFlow Module",
"data": [
{
"timestamp": [
"1087009200"
],
"value": [
6945.68
]
},
{
"timestamp": [
"1087182000"
],
"value": [
7091.62
]
},
I cannot remove the blank values from my csv file because of the format of the file.
I have tried this:
with open('Result.json' , 'r') as j:
json_dict = json.loads(j.read())
json_dict['data'] = [item for item in json_dict['data'] if
len([val for val in item['value'] if isnan(val)]) == 0]
print(json_dict)
Error - json_dict['data'] = [item for item in json_dict['data'] if len([val for val in item['value'] if isnan(val)]) == 0]
TypeError: list indices must be integers or slices, not str

In case you have more than one value for json"value": [...]
then,
import json
from math import isnan
json_str = '''
[
{
"Type": "UPDATE",
"resource": {
"site ": "Lakeside mh041",
"name": "Total Flow",
"unit": "CubicMeters",
"device": "2160 LaserFlow Module",
"data": [
{
"timestamp": [
"1087009200"
],
"value": [
6945.68
]
},
{
"timestamp": [
"1087095600"
],
"value": [
NaN
]
}
]
}
}
]
'''
json_dict = json.loads(json_str)
for typeObj in json_dict:
resource_node = typeObj['resource']
resource_node['data'] = [
item for item in resource_node['data']
if len([val for val in item['value'] if isnan(val)]) == 0
]
print(json_dict)

For testing if value is NaN you could use math.isnan() function (doc):
data = '''{"data": [
{
"timestamp": [
"1058367600"
],
"value": [
9.65
]
},
{
"timestamp": [
"1058368500"
],
"value": [
NaN
]
},
{
"timestamp": [
"1058367600"
],
"value": [
4.75
]
}
]}'''
import json
from math import isnan
data = json.loads(data)
data['data'] = [i for i in data['data'] if not isnan(i['value'][0])]
print(json.dumps(data, indent=4))
Prints:
{
"data": [
{
"timestamp": [
"1058367600"
],
"value": [
9.65
]
},
{
"timestamp": [
"1058367600"
],
"value": [
4.75
]
}
]
}

Related

Identify location of item in json file

Suppose I have the following json file. With data1["tenants"][1]['name'] I can select uniquename2. Is there a way to collect the '1' number by looping over the document?
{
"tenants": [{
"key": "identifier",
"name": "uniquename",
"image": "url",
"match": [
"identifier"
],
"tags": [
"tag1",
"tag2"
]
},
{
"key": "identifier",
"name": "uniquename2",
"image": "url",
"match": [
"identifier1",
"identifier2"
],
"tags": ["tag"]
}
]
}
in short: data1["tenants"][1]['name']= uniquename2 data1["tenants"][0]['name'] = uniquename
How can I find out which number has which name. So if I have uniquename2 what number/index corresponds with it?
you can iterate over the tenants to map the index to the name
data = {
"tenants": [{
"key": "identifier",
"name": "uniquename",
"image": "url",
"match": [
"identifier"
],
"tags": [
"tag1",
"tag2"
]
},
{
"key": "identifier",
"name": "uniquename2",
"image": "url",
"match": [
"identifier1",
"identifier2"
],
"tags": ["tag"]
}
]
}
for index, tenant in enumerate(data['tenants']):
print(index, tenant['name'])
OUTPUT
0 uniquename
1 uniquename2
Assuming, you have turned your json into a dictionary already, this is how you can get the index of the firs ocurrence of a name in your list (This relies on the names actually being unique):
data = {
"tenants": [{
"key": "identifier",
"name": "uniquename",
"image": "url",
"match": [
"identifier"
],
"tags": [
"tag1",
"tag2"
]
},
{
"key": "identifier",
"name": "uniquename2",
"image": "url",
"match": [
"identifier1",
"identifier2"
],
"tags": ["tag"]
}
]
}
def index_of(tenants, tenant_name):
try:
return tenants.index(
next(
tenant for tenant in tenants
if tenant["name"] == tenant_name
)
)
except StopIteration:
raise ValueError(
f"tenants list does not have tenant by name {tenant_name}."
)
index_of(data["tenants"], "uniquename") # 0

How to search into nested JSON by values with Python

In the following JSON object (that I can rewrite if it is easier) I need to seach the sshPort for id="Talend" and envs.id="DEV" and sshServer="jardev0"
I've tried using Pandas but it flattens the nested JSON objets, so I have to create 3 intermediate Pandas objets and search 3 times into them.
I would expect easier syntax, something like " [x.id=="talend" && x[envs].id=="DEV" && x|envs].sshServer="jardev0"]."sshPort"
[
{
"id": "talend",
"envs":
[
{"id":"DEV",
"lServeurs" :
[
{"sshServer":"jardev0", "sshPort":"20022","sshUser":"talend"},
{"sshServer":"jardev01","sshPort":"20022","sshUser":"talend"}
]
},
{"id": "PROD",
"lServeurs" :
[
{"sshServer": "jardprd01", "sshPort": "20023","sshUser":"talend"}
]
}
]
},
{
"id": "eprel",
"envs":
[
{"id": "DEV",
"lServeurs" :
[
{"sshServer": "jardev0", "sshPort": "20024", "sshUser":"eprel"},
{"sshServer": "jardev01", "sshPort": "20025", "sshUser":"eprel"}
]
},
{"id": "PROD",
"lServeurs" :
[
{"sshServer": "jardprd01", "sshPort": "20026","sshUser":"eprel"}
]
}
]
}
]
this should return "20022"
LINQ style filtering syntax AFAIK is not available unless you can go for list comprehension.
But you can have a method like this
def get_ssh_port(id,env_id,ssh_server):
import json
ssh_ports= []
input_dict = json.loads(input_json)
output_dict = [x for x in input_dict if x['id'] == id]
for x in output_dict:
for i in x['envs']:
if(i['id'] == env_id):
for j in i['lServeurs']:
if(j['sshServer']==ssh_server):
ssh_ports.append(j['sshPort'])
return ssh_ports
Usage:
input_json = """
[
{
"id": "talend",
"envs":
[
{"id":"DEV",
"lServeurs" :
[
{"sshServer":"jardev0", "sshPort":"20022","sshUser":"talend"},
{"sshServer":"jardev01","sshPort":"20022","sshUser":"talend"}
]
},
{"id": "PROD",
"lServeurs" :
[
{"sshServer": "jardprd01", "sshPort": "20023","sshUser":"talend"}
]
}
]
},
{
"id": "eprel",
"envs":
[
{"id": "DEV",
"lServeurs" :
[
{"sshServer": "jardev0", "sshPort": "20024", "sshUser":"eprel"},
{"sshServer": "jardev01", "sshPort": "20025", "sshUser":"eprel"}
]
},
{"id": "PROD",
"lServeurs" :
[
{"sshServer": "jardprd01", "sshPort": "20026","sshUser":"eprel"}
]
}
]
}
]
"""
get_ssh_port(id = 'talend', env_id = 'DEV', ssh_server = 'jardev0')
Using nested list comprehensions (3 nested iterators are very clumsy though):
import json
jsn = """[
{
"id": "talend",
"envs":
[
{"id":"DEV",
"lServeurs" :
[
{"sshServer":"jardev0", "sshPort":"20022","sshUser":"talend"},
{"sshServer":"jardev01","sshPort":"20022","sshUser":"talend"}
]
},
{"id": "PROD",
"lServeurs" :
[
{"sshServer": "jardprd01", "sshPort": "20023","sshUser":"talend"}
]
}
]
},
{
"id": "eprel",
"envs":
[
{"id": "DEV",
"lServeurs" :
[
{"sshServer": "jardev0", "sshPort": "20024", "sshUser":"eprel"},
{"sshServer": "jardev01", "sshPort": "20025", "sshUser":"eprel"}
]
},
{"id": "PROD",
"lServeurs" :
[
{"sshServer": "jardprd01", "sshPort": "20026","sshUser":"eprel"}
]
}
]
}
]"""
x = json.loads(jsn)
targetPorts = [server['sshPort'] for envsKV in x for env in envsKV['envs'] for server in env['lServeurs'] if envsKV['id']=="talend" and env['id']=="DEV" and server['sshServer']=="jardev0"]
print(targetPorts[0])

Insert and delete geojson object based on conditions in Python

I have a Geojson as follows:
data = {
"type": "FeatureCollection",
"name": "entities",
"features": [
{
"type": "Feature",
"properties": {
"Layer": "0",
"SubClasses": "AcDbEntity:AcDbBlockReference",
"EntityHandle": "3C8",
"area": "141.81",
"type": "p",
"Text": "area:141.81;type:p"
},
"geometry": {
"type": "Polygon",
"coordinates": [
[
[
2721.1572741014097,
1454.3223948456648
],
[
2720.121847266826,
1454.3223948456648
],
[
2720.121847266826,
1452.6092152478227
],
[
2710.5679254269344,
1452.6092152478227
],
[
2721.1572741014097,
1430.1478385206133
],
[
2721.1572741014097,
1454.3223948456648
]
]
]
}
},
{
"type": "Feature",
"properties": {
"Layer": "0",
"SubClasses": "AcDbEntity:AcDbBlockReference",
"EntityHandle": "3CE",
"area": "44.79",
"type": "h",
"Text": "area:44.79;type:h"
},
"geometry": {
"type": "Polygon",
"coordinates": [
[
[
2710.723323781393,
1450.3320226620049
],
[
2720.0654518264787,
1450.3320226620049
],
[
2720.0654518264787,
1445.537183875705
],
[
2710.723323781393,
1445.537183875705
],
[
2710.723323781393,
1450.3320226620049
]
]
]
}
},
{
"type": "Feature",
"properties": {
"Layer": "0",
"SubClasses": "AcDbEntity:AcDbBlockReference",
"EntityHandle": "610",
"name": "706",
"area": "92.28",
"type": "o",
"Text": "name:706;area:92.28;type:o"
},
"geometry": {
"type": "Polygon",
"coordinates": [
[
[
2714.603212251531,
1462.7249212430308
],
[
2711.7289360797,
1462.7249212430308
],
[
2711.7289360797,
1464.852506681824
],
[
2705.7302059101926,
1460.6840827804538
],
[
2710.567925426934,
1454.3223948456637
],
[
2710.567925426934,
1453.838838298367
],
[
2714.603212251531,
1453.838838298367
],
[
2714.603212251531,
1462.7249212430308
]
]
]
}
}
]
}
I want to insert "name": "" if name does not exist in properties, and delete "Text" object since it's duplicated, how can I do that in Python?
Thanks a lot at advance!
Expected result:
data = {
"type": "FeatureCollection",
"name": "entities",
"features": [
{
"type": "Feature",
"properties": {
"Layer": "0",
"SubClasses": "AcDbEntity:AcDbBlockReference",
"EntityHandle": "3C8",
"name": "",
"area": "141.81",
"type": "p"
},
"geometry": {
"type": "Polygon",
"coordinates": [
[
[
2721.1572741014097,
1454.3223948456648
],
[
2720.121847266826,
1454.3223948456648
],
[
2720.121847266826,
1452.6092152478227
],
[
2710.5679254269344,
1452.6092152478227
],
[
2721.1572741014097,
1430.1478385206133
],
[
2721.1572741014097,
1454.3223948456648
]
]
]
}
},
{
"type": "Feature",
"properties": {
"Layer": "0",
"SubClasses": "AcDbEntity:AcDbBlockReference",
"EntityHandle": "3CE",
"name": "",
"area": "44.79",
"type": "h"
},
"geometry": {
"type": "Polygon",
"coordinates": [
[
[
2710.723323781393,
1450.3320226620049
],
[
2720.0654518264787,
1450.3320226620049
],
[
2720.0654518264787,
1445.537183875705
],
[
2710.723323781393,
1445.537183875705
],
[
2710.723323781393,
1450.3320226620049
]
]
]
}
},
{
"type": "Feature",
"properties": {
"Layer": "0",
"SubClasses": "AcDbEntity:AcDbBlockReference",
"EntityHandle": "610",
"name": "706",
"area": "92.28",
"type": "o"
},
"geometry": {
"type": "Polygon",
"coordinates": [
[
[
2714.603212251531,
1462.7249212430308
],
[
2711.7289360797,
1462.7249212430308
],
[
2711.7289360797,
1464.852506681824
],
[
2705.7302059101926,
1460.6840827804538
],
[
2710.567925426934,
1454.3223948456637
],
[
2710.567925426934,
1453.838838298367
],
[
2714.603212251531,
1453.838838298367
],
[
2714.603212251531,
1462.7249212430308
]
]
]
}
}
]
}
UPDATE:
My solution so far, it seems works.
import json
features = data["features"]
for i in features:
d = i["properties"]
if "name" not in d:
d["name"] = ""
if i["properties"]["Text"] is not None:
del i["properties"]["Text"]
I define it as a function, but in some cases I get an error as follows. Does someone know how to fix it? Thanks.
Traceback (most recent call last):
File "<ipython-input-1-8e3095f67c57>", line 138, in <module>
modify_geojson(output_file)
File "<ipython-input-1-8e3095f67c57>", line 102, in modify_geojson
if i["properties"]["Text"] is not None:
KeyError: 'Text'
In each property 'Text' is only present once. Please explain where it's duplicated?
My solution so far, it seems works.
import json
features = data["features"]
for i in features:
d = i["properties"]
if "name" not in d:
d["name"] = ""
if i["properties"]["Text"] is not None:
del i["properties"]["Text"]
I define it as a function, but in some cases I get an error as follows. Does someone know how to fix it? Thanks.
Traceback (most recent call last):
File "<ipython-input-1-8e3095f67c57>", line 138, in <module>
modify_geojson(output_file)
File "<ipython-input-1-8e3095f67c57>", line 102, in modify_geojson
if i["properties"]["Text"] is not None:
KeyError: 'Text'

Update values in geojson file in Python

I have geojson file as follows:
{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"properties": {},
"geometry": {
"type": "LineString",
"coordinates": [
[
57.45849609375,
57.36801461845934
],
[
57.10693359375,
56.31044317134597
],
[
59.205322265625,
56.20059291588374
],
[
59.4140625,
57.29091812634045
],
[
57.55737304687501,
57.36801461845934
]
]
}
},
{
"type": "Feature",
"properties": {},
"geometry": {
"type": "LineString",
"coordinates": [
[
59.40307617187499,
57.29685437021898
],
[
60.8203125,
57.314657355733274
],
[
60.74340820312499,
56.26776108757582
],
[
59.227294921875,
56.21281407174654
],
[
59.447021484375,
57.29091812634045
]
]
}
}
]
}
I want to replace LineString in "type": "LineString" with Polygon, and also, replace coordinates last point of each linestring by coordinates of first point to make it close if it has more than 3 points.
How can I do it in Python with geopandas or pandas? Thanks.
Here is expected output:
{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"properties": {},
"geometry": {
"type": "Polygon",
"coordinates": [
[
57.45849609375,
57.36801461845934
],
[
57.10693359375,
56.31044317134597
],
[
59.205322265625,
56.20059291588374
],
[
59.4140625,
57.29091812634045
],
[
57.45849609375,
57.36801461845934
]
]
}
},
{
"type": "Feature",
"properties": {},
"geometry": {
"type": "Polygon",
"coordinates": [
[
59.40307617187499,
57.29685437021898
],
[
60.8203125,
57.314657355733274
],
[
60.74340820312499,
56.26776108757582
],
[
59.227294921875,
56.21281407174654
],
[
59.40307617187499,
57.29685437021898
]
]
}
}
]
}
Script to get type and coordinates of first LineString:
import json
from pprint import pprint
with open('data.geojson') as f:
data = json.load(f)
pprint(data)
data["features"][0]["geometry"]['type']
data["features"][0]["geometry"]['coordinates']
You can achieve that with the json module:
file_line = 'file.json'
file_poly = 'file_poly.json'
import json
with open(file_line, 'r') as f:
data = json.load(f)
for feature in data['features']:
if (feature['geometry']['type'] == 'LineString') & (len(feature['geometry']['coordinates']) >= 3):
feature['geometry']['type'] = 'Polygon'
feature['geometry']['coordinates'].append(feature['geometry']['coordinates'][0])
with open(file_poly, 'w+') as f:
json.dump(data, f, indent=2)

Convert Nested JSON to Excel using Python

I want to convert Nested JSON to Excel file format using Python. I've done nearly as per requirements but I want to achieve excel format as below.
JSON
[
{
"url": "https://www.amazon.com/Best-Sellers-Appliances-Cooktops/zgbs/appliances/3741261",
"subCategory": [
],
"title": "Cooktops"
},
{
"url": "https://www.amazon.com/Best-Sellers-Appliances-Dishwashers/zgbs/appliances/3741271",
"subCategory": [
{
"url": "https://www.amazon.com/Best-Sellers-Appliances-Built-Dishwashers/zgbs/appliances/3741281",
"subCategory": [
],
"title": "Built-In Dishwashers"
},
{
"url": "https://www.amazon.com/Best-Sellers-Appliances-Portable-Countertop-Dishwashers/zgbs/appliances/3741301",
"subCategory": [
],
"title": "Portable & Countertop Dishwashers"
}
],
"title": "Dishwashers"
},
{
"url": "https://www.amazon.com/Best-Sellers-Appliances-Freezers/zgbs/appliances/3741331",
"subCategory": [
{
"url": "https://www.amazon.com/Best-Sellers-Appliances-Chest-Freezers/zgbs/appliances/3741341",
"subCategory": [
],
"title": "Chest Freezers"
},
{
"url": "https://www.amazon.com/Best-Sellers-Appliances-Upright-Freezers/zgbs/appliances/3741351",
"subCategory": [
],
"title": "Upright Freezers"
}
],
"title": "Freezers"
},
{
"url": "https://www.amazon.com/Best-Sellers-Appliances-Ice-Makers/zgbs/appliances/2399939011",
"subCategory": [
],
"title": "Ice Makers"
},
{
"url": "https://www.amazon.com/Best-Sellers-Appliances-Range-Hoods/zgbs/appliances/3741441",
"subCategory": [
],
"title": "Range Hoods"
},
{
"url": "https://www.amazon.com/Best-Sellers-Appliances-Ranges/zgbs/appliances/3741411",
"subCategory": [
{
"url": "https://www.amazon.com/Best-Sellers-Appliances-Drop-Ranges/zgbs/appliances/3741421",
"subCategory": [
],
"title": "Drop-In Ranges"
},
{
"url": "https://www.amazon.com/Best-Sellers-Appliances-Freestanding-Ranges/zgbs/appliances/3741431",
"subCategory": [
],
"title": "Freestanding Ranges"
},
{
"url": "https://www.amazon.com/Best-Sellers-Appliances-Slide-Ranges/zgbs/appliances/2399946011",
"subCategory": [
],
"title": "Slide-In Ranges"
}
],
"title": "Ranges"
},
{
"url": "https://www.amazon.com/Best-Sellers-Appliances-Refrigerators/zgbs/appliances/3741361",
"subCategory": [
],
"title": "Refrigerators"
},
{
"url": "https://www.amazon.com/Best-Sellers-Appliances-Wall-Ovens/zgbs/appliances/3741481",
"subCategory": [
{
"url": "https://www.amazon.com/Best-Sellers-Appliances-Combination-Microwave-Wall-Ovens/zgbs/appliances/3741491",
"subCategory": [
],
"title": "Combination Microwave & Wall Ovens"
},
{
"url": "https://www.amazon.com/Best-Sellers-Appliances-Double-Wall-Ovens/zgbs/appliances/3741501",
"subCategory": [
],
"title": "Double Wall Ovens"
},
{
"url": "https://www.amazon.com/Best-Sellers-Appliances-Single-Wall-Ovens/zgbs/appliances/3741511",
"subCategory": [
],
"title": "Single Wall Ovens"
}
],
"title": "Wall Ovens"
},
{
"url": "https://www.amazon.com/Best-Sellers-Appliances-Warming-Drawers/zgbs/appliances/2399955011",
"subCategory": [
],
"title": "Warming Drawers"
},
{
"url": "https://www.amazon.com/Best-Sellers-Appliances-Washers-Dryers/zgbs/appliances/2383576011",
"subCategory": [
{
"url": "https://www.amazon.com/Best-Sellers-Appliances-Clothes-Dryers/zgbs/appliances/13397481",
"subCategory": [
],
"title": "Dryers"
},
{
"url": "https://www.amazon.com/Best-Sellers-Appliances-Clothes-Washing-Machines/zgbs/appliances/13397491",
"subCategory": [
],
"title": "Washers"
},
{
"url": "https://www.amazon.com/Best-Sellers-Appliances-Combination-Washers-Dryers/zgbs/appliances/13755271",
"subCategory": [
],
"title": "All-in-One Combination Washers & Dryers"
},
{
"url": "https://www.amazon.com/Best-Sellers-Appliances-Stacked-Washer-Dryer-Units/zgbs/appliances/2399957011",
"subCategory": [
],
"title": "Stacked Washer & Dryer Units"
}
],
"title": "Washers & Dryers"
},
{
"url": "https://www.amazon.com/Best-Sellers-Appliances-Wine-Cellars/zgbs/appliances/3741521",
"subCategory": [
{
"url": "https://www.amazon.com/Best-Sellers-Appliances-Built-Wine-Cellars/zgbs/appliances/3741551",
"subCategory": [
],
"title": "Built-In Wine Cellars"
},
{
"url": "https://www.amazon.com/Best-Sellers-Appliances-Freestanding-Wine-Cellars/zgbs/appliances/3741541",
"subCategory": [
],
"title": "Freestanding Wine Cellars"
},
{
"url": "https://www.amazon.com/Best-Sellers-Appliances-Furniture-Style-Wine-Cellars/zgbs/appliances/3741561",
"subCategory": [
],
"title": "Furniture-Style Wine Cellars"
},
{
"url": "https://www.amazon.com/Best-Sellers-Appliances-Small-Wine-Cellars/zgbs/appliances/3741531",
"subCategory": [
],
"title": "Small Wine Cellars"
},
{
"url": "https://www.amazon.com/Best-Sellers-Appliances-Wine-Cellar-Cooling-Systems/zgbs/appliances/3741581",
"subCategory": [
],
"title": "Wine Cellar Cooling Systems"
},
{
"url": "https://www.amazon.com/Best-Sellers-Appliances-Wine-Rooms/zgbs/appliances/3741571",
"subCategory": [
],
"title": "Wine Rooms"
}
],
"title": "Wine Cellars"
},
{
"url": "https://www.amazon.com/Best-Sellers-Appliances-Home-Appliance-Warranties/zgbs/appliances/2242350011",
"subCategory": [
],
"title": "Appliance Warranties"
}
]
I'm traversing all subCategories like this:
row = 1
def TraverseJSONTree(jsonObject, count=0):
title = jsonObject.get('title')
url = jsonObject.get('url')
print 'Title: ' + title + ' , Position: ' + str(count)
worksheet.write_string(row, count, title)
worksheet.write_string(row, 6, url)
global row
row+=1
subCategories = jsonObject.get('subCategory',[])
for category in subCategories:
TraverseJSONTree(category, count+1)
for jsonObject in json.loads(jsonArray):
TraverseJSONTree(jsonObject)
Expected Result
row = 1
def TraverseJSONTree(jsonObject, main_title=None, count=0):
if main_title is None:
main_title = title = jsonObject.get('title')
else:
title = jsonObject.get('title')
url = jsonObject.get('url')
print 'Title: ' + title + ' , Position: ' + str(count)
if main_title is not None:
worksheet.write_string(row, 0, title)
worksheet.write_string(row, count, title)
worksheet.write_string(row, 6, url)
global row
row+=1
subCategories = jsonObject.get('subCategory',[])
for category in subCategories:
TraverseJSONTree(category, main_title, count+1)
for jsonObject in json.loads(jsonArray):
TraverseJSONTree(jsonObject)
it will return your expected output as it needs a check if category is there then you have to right the original title on the 0th col in excel reamin as same.
Modification :
Simplest way to do this would be to use csv module, say we have the whole json in the variable a
import csv
import cPickle as pickle
fieldnames = ['Category1', 'Category1.1', 'url']
csvfile = open("category.csv", 'wb')
csvfilewriter = csv.DictWriter(csvfile, fieldnames=fieldnames,dialect='excel', delimiter=',')
csvfilewriter.writeheader()
for b in a:
data = []
data.append(b['title'])
data.append("")
data.append(b['url'])
csvfilewriter.writerow(dict(zip(fieldnames,data)))
data = []
for i in xrange(len(b['subCategory'])):
data.append(b['title'])
data.append(b['subCategory'][i]['title'])
data.append(b['subCategory'][i]['url'])
csvfilewriter.writerow(dict(zip(fieldnames,data)))
You will have the desired csv in the same location. This works for only two subcategories (because i have checked the data given by you and say there were only two categories (ie 1 and 1.1)) but in case you want for more than repeat the same(I know it's not the most efficient way couldn't think of any in such a short time)
You can also use pandas module to convert the dictionary
import pandas as pd
pd.DataFrame.from_dict(dcitionaty_element)
And then do it on all the dictionaries in that json and merge them and save it to a csv file.

Categories

Resources