manipulating json in python using recursion

manipulating json in python using recursion - python

All,
I am trying to change the way some json looks by going through and formatting it in the following way:
1. flatten all of the fields lists
2. Then remove the fields lists and replace them with the name : flatten list
Example:
{
"name": "",
"fields": [{
"name": "keys",
"fields": [{
"node-name": "0/0/CPU0"
},
{
"interface-name": "TenGigE0/0/0/47"
},
{
"device-id": "ASR9K-H1902.corp.cisco.com"
}
]
},
{
"name": "content",
"fields": [{
"name": "lldp-neighbor",
"fields": [{
"receiving-interface-name": "TenGigE0/0/0/47"
},
{
"receiving-parent-interface-name": "Bundle-Ether403"
},
{
"device-id": "ASR9K-H1902.corp.cisco.com"
},
{
"chassis-id": "78ba.f975.a64f"
},
{
"port-id-detail": "Te0/1/0/4/0"
},
{
"header-version": 0
},
{
"hold-time": 120
},
{
"enabled-capabilities": "R"
},
{
"platform": ""
}
]
}]
}
]
}
Would turn into:
{
"": [{
"keys": [{
"node-name": "0/0/CPU0",
"interface-name": "TenGigE0/0/0/47",
"device-id": "ASR9K-H1902.corp.cisco.com"
}]
},
{
"content": [{
"lldp-neighbor": [{
"receiving-interface-name": "TenGigE0/0/0/47",
"receiving-parent-interface-name": "Bundle-Ether403",
"device-id": "ASR9K-H1902.corp.cisco.com",
"chassis-id": "78ba.f975.a64f",
"port-id-detail": "Te0/1/0/4/0",
"header-version": 0,
"hold-time": 120,
"enabled-capabilities": "R",
"platform": ""
}]
}]
}
]
}
I have tried the following to get the list flattened:
def _flatten_fields(self, fields_list):
c = {}
for b in [d for d in fields_list if bool(d)]:
c.update(b)
return c
This seems to work but I can't figure out a way to get into the sub levels using recursion, I am saving all flatten lists and names into a new dictionary, is there a way to do it by just manipulating the original dictionary?

This worked on the example you provided:
import json
def flatten(data):
result = dict()
if isinstance(data, dict):
if 'name' in data:
name = data['name']
result[name] = flatten(data['fields'])
else:
key = data.keys()[0]
value = data.values()[0]
result[key] = value
else:
for entry in data:
result.update(flatten(entry))
return result
print json.dumps(flatten(data), indent=4)
Output
{
"": {
"keys": {
"node-name": "0/0/CPU0",
"interface-name": "TenGigE0/0/0/47",
"device-id": "ASR9K-H1902.corp.cisco.com"
},
"content": {
"lldp-neighbor": {
"receiving-interface-name": "TenGigE0/0/0/47",
"receiving-parent-interface-name": "Bundle-Ether403",
"header-version": 0,
"port-id-detail": "Te0/1/0/4/0",
"chassis-id": "78ba.f975.a64f",
"platform": "",
"device-id": "ASR9K-H1902.corp.cisco.com",
"hold-time": 120,
"enabled-capabilities": "R"
}
}
}
}
It doesn't have the extra list layers shown in your expected output, but I don't think you want those.

This worked on the example you provided:
def flatten_fields(fields_list):
c = {}
for item in fields_list:
for key in item:
if key == "fields":
c[item["name"]] = flatten_fields(item["fields"])
elif key != "name":
c[key] = item[key]
break
return [c]
But it works on a list of dictionaries, so you should call it like flatten_fields([data])[0].
The output is:
{
"": [{
"keys": [{
"node-name": "0/0/CP0",
"interface-name": "TenGigE0/0/0/47",
"device-id": "ASR9K-H1902.corp.cisco.com"
}],
"content": [{
"lldp-neighbor": [{
"chassis-id": "78ba.f975.a64f",
"receiving-parent-interface-name": "Bndle-Ether403",
"enabled-capabilities": "R",
"device-id": "ASR9K-H1902.corp.cisco.com",
"hold-time": 120,
"receiving-interface-name": "TenGigE0/0/0/47",
"platform": "",
"header-version": 0,
"port-id-detail": "Te0/1/0/4/0"
}]
}]
}]
}

Related

how do I access this json data in python?

hi I'm pretty new at coding and I was trying to create a program in python that reads and save in another file the data inside a json file (not everything, just what I want). I googled how to parse data but there's something I don't understand.
that's a part of the json file:
`
{
"profileRevision": 548789,
"profileId": "campaign",
"profileChangesBaseRevision": 548789,
"profileChanges": [
{
"changeType": "fullProfileUpdate",
"profile": {
"_id": "2da4f079f8984cc48e84fc99dace495d",
"created": "2018-03-29T11:02:15.190Z",
"updated": "2022-10-31T17:34:43.284Z",
"rvn": 548789,
"wipeNumber": 9,
"accountId": "63881e614ef543b2932c70fed1196f34",
"profileId": "campaign",
"version": "refund_teddy_perks_september_2022",
"items": {
"8ec8f13f-6bf6-4933-a7db-43767a055e66": {
"templateId": "Quest:heroquest_loadout_constructor_2",
"attributes": {
"quest_state": "Claimed",
"creation_time": "min",
"last_state_change_time": "2019-05-18T16:09:12.750Z",
"completion_complete_pve03_diff26_loadout_constructor": 300,
"level": -1,
"item_seen": true,
"sent_new_notification": true,
"quest_rarity": "uncommon",
"xp_reward_scalar": 1
},
"quantity": 1
},
"6940c71b-c74b-4581-9f1e-c0a87e246884": {
"templateId": "Worker:workerbasic_sr_t01",
"attributes": {
"gender": "2",
"personality": "Homebase.Worker.Personality.IsDreamer",
"level": 1,
"item_seen": true,
"squad_slot_idx": -1,
"portrait": "WorkerPortrait:IconDef-WorkerPortrait-Dreamer-F02",
"building_slot_used": -1,
"set_bonus": "Homebase.Worker.SetBonus.IsMeleeDamageLow"
}
}
}
]
}
`
I can access profileChanges. I wrote this to create another json file with only the profileChanges things:
`
myjsonfile= open("file.json",'r')
jsondata=myjsonfile.read()
obj=json.loads(jsondata)
ciso=obj['profileChanges']
for i in ciso:
print(i)
with open("file2", "w") as outfile:
json.dump( ciso, outfile, indent=1)
the issue I have is that I can't access "profile" (inside profileChanges) in the same way by parsing the new file and I have no idea on how to do it

Access to JSON or dict element is realized by list indexes, please look at below example:
a = [
{
"friends": [
{
"id": 0,
"name": "Reba May"
}
],
"greeting": "Hello, Doris Gallagher! You have 2 unread messages.",
"favoriteFruit": "strawberry"
},
]
b = a['friends']['id] # b = 0

I've added a couple of closing braces to make your snippet valid json:
s = '''{
"profileRevision": 548789,
"profileId": "campaign",
"profileChangesBaseRevision": 548789,
"profileChanges": [
{
"changeType": "fullProfileUpdate",
"profile": {
"_id": "2da4f079f8984cc48e84fc99dace495d",
"created": "2018-03-29T11:02:15.190Z",
"updated": "2022-10-31T17:34:43.284Z",
"rvn": 548789,
"wipeNumber": 9,
"accountId": "63881e614ef543b2932c70fed1196f34",
"profileId": "campaign",
"version": "refund_teddy_perks_september_2022",
"items": {
"8ec8f13f-6bf6-4933-a7db-43767a055e66": {
"templateId": "Quest:heroquest_loadout_constructor_2",
"attributes": {
"quest_state": "Claimed",
"creation_time": "min",
"last_state_change_time": "2019-05-18T16:09:12.750Z",
"completion_complete_pve03_diff26_loadout_constructor": 300,
"level": -1,
"item_seen": true,
"sent_new_notification": true,
"quest_rarity": "uncommon",
"xp_reward_scalar": 1
},
"quantity": 1
},
"6940c71b-c74b-4581-9f1e-c0a87e246884": {
"templateId": "Worker:workerbasic_sr_t01",
"attributes": {
"gender": "2",
"personality": "Homebase.Worker.Personality.IsDreamer",
"level": 1,
"item_seen": true,
"squad_slot_idx": -1,
"portrait": "WorkerPortrait:IconDef-WorkerPortrait-Dreamer-F02",
"building_slot_used": -1,
"set_bonus": "Homebase.Worker.SetBonus.IsMeleeDamageLow"
}
}
}
}
}
]
}
'''
d = json.loads(s)
print(d['profileChanges'][0]['profile']['version'])
This prints refund_teddy_perks_september_2022
Explanation:
d is a dict
d['profileChanges'] is a list of dicts
d['profileChanges'][0] is the first dict in the list
d['profileChanges'][0]['profile'] is a dict
d['profileChanges'][0]['profile']['version'] is the value of version key in the profile dict in the first entry of the profileChanges list.

Filter nested python dict by value

I have a python dictionary, where I don't exactly know, how deeply nested it is, but here is an example of such:
{
"name":"a_struct",
"type":"int",
"data":{
"type":"struct",
"elements":[
{
"data":[
{
"name":"test1",
"data_id":0,
"type":"uint8",
"wire_type":0,
"data":0
},
{
"name":"test2",
"data_id":2,
"type":"uint32",
"wire_type":2,
"data":0
},
{
"name":"test3",
"data_id":3,
"type":"int",
"wire_type":4,
"data":{
"type":"uint32",
"elements":[
]
}
},
{
"name":"test4",
"data_id":4,
"type":"uint32",
"wire_type":2,
"data":0
},
{
"name":"test5",
"data_id":5,
"type":"int",
"wire_type":4,
"data":{
"type":"uint32",
"elements":[
]
}
}
]
}
]
}
}
My goal is to filter out each dictionary that does not contains values ["test1", "test3", "test5"] by the name key. This shall be applicable to various deeply nested dictionaries.
So in that case, the result shall be a filtered dictionary:
{
"name":"a_struct",
"type":"int",
"data":{
"type":"struct",
"elements":[
{
"data":[
{
"name":"test1",
"data_id":0,
"type":"uint8",
"wire_type":0,
"data":0
},
{
"name":"test3",
"data_id":3,
"type":"int",
"wire_type":4,
"data":{
"type":"uint32",
"elements":[
]
}
},
{
"name":"test5",
"data_id":5,
"type":"int",
"wire_type":4,
"data":{
"type":"uint32",
"elements":[
]
}
}
]
}
]
}
}
I tried to use the dpath lib (https://pypi.org/project/dpath/), by providing a filter criteria like so:
def afilter(x):
if isinstance(x, dict):
if "name" in x:
if x["name"] in ["test1", "test3", "test5"]:
return True
else:
return False
else:
return False
result = dpath.util.search(my_dict, "**", afilter=afilter)
But I get a wrong result, so every other key, has been filtered out, which is not what I want:
{
"data":{
"elements":[
{
"data":[
{
"name":"test1",
"data_id":0,
"type":"uint8",
"wire_type":0,
"data":0
},
null,
{
"name":"test3",
"data_id":3,
"type":"int",
"wire_type":4,
"data":{
"type":"uint32",
"elements":[
]
}
},
null,
{
"name":"test5",
"data_id":5,
"type":"int",
"wire_type":4,
"data":{
"type":"uint32",
"elements":[
]
}
}
]
}
]
}
}
How to get this right?
PS: I'm not forced to use the dpath lib. So, the solution might be written in pure python.

You can recursively process your dictionary while filtering unneeded records:
def delete_keys(data, keys_to_keep):
res = {}
for k, v in data.items():
if isinstance(v, dict):
res[k] = delete_keys(v, keys_to_keep)
elif isinstance(v, list):
if k == "data":
res[k] = [delete_keys(obj, keys_to_keep) for obj in v if obj.get('name') in keys_to_keep]
else:
res[k] = [delete_keys(obj, keys_to_keep) for obj in v]
else:
res[k] = v
return res
keys_to_keep = {'test1', 'test3', 'test5'}
print(delete_keys(data, keys_to_keep))
For your input, it gives:
{
"name": "a_struct",
"type": "int",
"data": {
"type": "struct",
"elements": [
{
"data": [
{
"name": "test1",
"data_id": 0,
"type": "uint8",
"wire_type": 0,
"data": 0,
},
{
"name": "test3",
"data_id": 3,
"type": "int",
"wire_type": 4,
"data": {"type": "uint32", "elements": []},
},
{
"name": "test5",
"data_id": 5,
"type": "int",
"wire_type": 4,
"data": {"type": "uint32", "elements": []},
},
]
}
],
},
}

Changing Key name in mongodb based on its value

I have a list of a element element_list=['A','C'] and my document in mongodb is like:
"product_id": {
"$oid": "AA"
},
"output": [
{
"product": {
"$oid": "A"
},
"value": 1
},
{
"product": {
"$oid": "B"
},
"value": 1
},
]
}
what I want is based on my element_list value the key should change like:
"product_id": {
"$oid": "AA"
},
"products": [
{
"product": {
"$oid": "A"
},
"value": 1
},
{
"Offer": {
"$oid": "B"
},
"value": 1
},
]
}
'B' is not present in element_list, that's why its key is Offer. How to automatically update multiple similar documents in python?

try
oids = set([e['product_id']['$oid'] for e in data])
for product in data:
new_products = []
for output in product['output']:
key = 'Offer' if output['product']['$oid'] not in oids else 'product'
new_products.append({key: {'$oid': output['product']['$oid'], 'value': output['value']}})
product['products'] = new_products
del product['output']
print(data)

Manipulating data from json to reflect a single value from each entry

Setup:
This data set has 50 "issues", within these "issues" i have captured the data that I need to then put into my postgresql database. But when i get to "components" is where i have trouble. I am able to get a list of all "names" of "components" but only want to have 1 instance of "name" for each "issue", and some of them have 2. Some are empty and would like to return null for those.
Here is some sample data that should suffice:
{
"issues": [
{
"key": "1",
"fields": {
"components": [],
"customfield_1": null,
"customfield_2": null
}
},
{
"key": "2",
"fields": {
"components": [
{
"name": "Testing"
}
],
"customfield_1": null,
"customfield_2": null
}
},
{
"key": "3",
"fields": {
"components": [
{
"name": "Documentation"
},
{
"name": "Manufacturing"
}
],
"customfield_1": null,
"customfield_2": 5
}
}
]
}
I am looking to return (just for the component name piece):
['null', 'Testing', 'Documentation']
I set up the other data for entry into the db like so:
values = list((item['key'],
//components list,
item['fields']['customfield_1'],
item['fields']['customfield_2']) for item in data_story['issues'])
I am wondering if there is a possible way to enter in the created components list where i have commented "components list" above
Just for recap, i want to have only 1 component name for each issue null or not and be able to have it put in the the values variable with the rest of the data. Also the first name in components will work for each "issue"

Here's what I would do, assuming that we are working with a data variable:
values = [(x['fields']['components'][0]['name'] if len(x['fields']['components']) != 0 else 'null') for x in data['issues']]
Let me know if you have any queries.

in dict comprehension use if/else
example code is
results = [ (x['fields']['components'][0]['name'] if 'components' in x['fields'] and len(x['fields']['components']) > 0 else 'null') for x in data['issues'] ]
full sample code is
import json
data = json.loads('''{ "issues": [
{
"key": "1",
"fields": {
"components": [],
"customfield_1": null,
"customfield_2": null
}
},
{
"key": "2",
"fields": {
"components": [
{
"name": "Testing"
}
],
"customfield_1": null,
"customfield_2": null
}
},
{
"key": "3",
"fields": {
"components": [
{
"name": "Documentation"
},
{
"name": "Manufacturing"
}
],
"customfield_1": null,
"customfield_2": 5
}
}
]
}''')
results = [ (x['fields']['components'][0]['name'] if 'components' in x['fields'] and len(x['fields']['components']) > 0 else 'null') for x in data['issues'] ]
print(results)
output is ['null', u'Testing', u'Documentation']

If you just want to delete all but one of the names from the list, then you can do that this way:
issues={
"issues": [
{
"key": "1",
"fields": {
"components": [],
"customfield_1": "null",
"customfield_2": "null"
}
},
{
"key": "2",
"fields": {
"components": [
{
"name": "Testing"
}
],
"customfield_1": "null",
"customfield_2": "null"
}
},
{
"key": "3",
"fields": {
"components": [
{
"name": "Documentation"
},
{
"name": "Manufacturing"
}
],
"customfield_1": "null",
"customfield_2": 5
}
}
]
}
Data^
componentlist=[]
for i in range(len(issues["issues"])):
x= issues["issues"][i]["fields"]["components"]
if len(x)==0:
x="null"
componentlist.append(x)
else:
x=issues["issues"][i]["fields"]["components"][0]
componentlist.append(x)
print(componentlist)
>>>['null', {'name': 'Testing'}, {'name': 'Documentation'}]
Or, if you just want the values, and not the dictionary keys:
else:
x=issues["issues"][i]["fields"]["components"][0]["name"]
componentlist.append(x)
['null', 'Testing', 'Documentation']

Reorder and return the whole of nested dictionary

I am trying to retain the whole contents of a nested dictionary but only with its contents reordered..
This is an example of my nested dictionaries (pardon the long example..) -
{
"pages": {
"rotatingTest": {
"elements": {
"apvfafwkbnjn2bjt": {
"name": "animRot_tilt40_v001",
"data": {
"description": "tilt testing",
"project": "TEST",
"created": "26/11/18 16:32",
},
"type": "AnimWidget",
"uid": "apvfafwkbnjn2bjt"
},
"p0pkje1hjcc9jukq": {
"name": "poseRot_positionD_v003",
"data": {
"description": "posing test for positionD",
"created": "10/01/18 14:16",
"project": "TEST",
},
"type": "PosedWidget",
"uid": "p0pkje1hjcc9jukq"
},
"k1gzzc5uy1ynqtnj": {
"name": "animRot_positionH_v001",
"data": {
"description": "rotational posing test for positionH",
"created": "13/06/18 14:19",
"project": "TEST",
},
"type": "AnimWidget",
"uid": "k1gzzc5uy1ynqtnj"
}
}
},
"panningTest": {
"elements": {
"7lyuri8g8u5ctwsa": {
"name": "posePan_positionZ_v001",
"data": {
"description": "panning test for posZ",
"created": "04/10/18 12:43",
"project": "TEST",
},
"type": "PosedWidget",
"uid": "7lyuri8g8u5ctwsa"
}
}
},
"zoomingTest": {
"elements": {
"prtn0i6ehudhz475": {
"name": "posZoom_positionH_v010",
"data": {
"description": "zoom test",
"created": "11/10/18 12:42",
"project": "TEST",
},
"type": "PosedWidget",
"uid": "prtn0i6ehudhz475"
}
}
}
},
"page_order": [
"rotatingTest",
"zoomingTest",
"panningTest"
]
}
and this is my code:
for k1, v1 in test_dict.get('pages', {}).items():
return (sorted(v1.get('elements').items(), key=lambda (k2,v2): v2['data']['created']))
In the code, keys such as the page_order, pages etc are missing...
Or if there is/ are any commands where it will enables me to retain the 'whole' of the dictionary?
Appreciate in advance for any advice.

If you're using Python 3.7, a dict will preserve insert order. Otherwise, you need to use an OrderedDict.Additionally, you need to convert the date string to a date to get the correct sort order:
from datetime import datetime
def sortedPage(d):
return {k: {'elements': dict(sorted(list(v['elements'].items()), key=lambda tuple: datetime.strptime(tuple[1]['data']['created'], '%d/%m/%y %H:%M')))} for k,v in d.items()}
output = {k: sortedPage(v) if k == 'pages' else v for k,v in input.items()}

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

manipulating json in python using recursion - python

Related

how do I access this json data in python?

Filter nested python dict by value

Changing Key name in mongodb based on its value

Manipulating data from json to reflect a single value from each entry

Reorder and return the whole of nested dictionary

Categories

Resources