I have a dictionary of unknown depth and structure. It might contain more dictionaries, lists of dictionaries, etc. It's created from deserializing some JSON input created by another system. There's a key, perhaps multiple keys with the same name, at various places in that dictionary. I'd like to get the values at each of those keys and ideally update them.
Given a directory structure like this:
{
"tags":{
"social-1":{
"email":True,
"twitter":True,
"facebook":True,
"linkedin":True,
"type":"social"
},
"primary":{
"type":"flexible",
"width":"auto",
"blocks":[
{
"type":"combo",
"data":{
"styles":{
"margin":"10",
"padding":"0",
"borderColor":"#000",
"borderWidth":"0",
"borderStyle":"solid",
"backgroundColor":"transparent",
"width":"auto"
},
"placeholder":True,
"headline":{
"visible":False
},
"subHeadline":{
"visible":False
},
"imageHolder":{
"visible":True,
"value":[
{
"url":None,
"caption":None,
"width":220,
"height":140,
"padding":10,
"alt":"",
"href":None,
"filePath":None,
"sizing":"original",
"source":"disk",
"displayWidth":200,
"displayHeight":140,
"displayPadding":{
"left":10,
"top":0,
"right":10,
"bottom":10
}
}
],
"smartSizing":True,
"captions":False,
"captionDefault":None
},
"content":{
"visible":True,
"value":"<p>Your text here.</p>"
},
"imagePosition":"left",
"textWrap":False,
"type":"combo"
}
},
{
"type":"image",
"data":{
"styles":{
"margin":"10",
"padding":"0",
"borderColor":"#000",
"borderWidth":"0",
"borderStyle":"solid",
"backgroundColor":"transparent",
"width":"auto"
},
"placeholder":False,
"imageHolder":[
{
"url":None,
"caption":None,
"width":0,
"height":140,
"padding":10,
"alt":"",
"href":None,
"filePath":None,
"sizing":"original",
"source":"disk",
"displayWidth":213,
"displayHeight":159,
"displayPadding":{
"left":10,
"top":10,
"right":5,
"bottom":10
}
},
{
"url":None,
"caption":None,
"width":0,
"height":140,
"padding":10,
"alt":"",
"href":None,
"filePath":None,
"displayWidth":213,
"displayHeight":159,
"source":"disk",
"sizing":"original",
"displayPadding":{
"left":5,
"top":10,
"right":5,
"bottom":10
}
},
{
"url":None,
"caption":None,
"width":0,
"height":140,
"padding":10,
"alt":"",
"href":None,
"filePath":None,
"displayWidth":213,
"displayHeight":159,
"source":"disk",
"sizing":"original",
"displayPadding":{
"left":5,
"top":10,
"right":10,
"bottom":10
}
}
],
"orientation":"horizontal",
"smartSizing":True,
"captions":False,
"captionDefault":None,
"type":"image"
}
}
]
}
}
}
How would I examine and update the values of the imageHolder keys?
You can use a recursive function descending into list and dictionary values:
def get_all(data, key):
sub_iter = []
if isinstance(data, dict):
if key in data:
yield data[key]
sub_iter = data.itervalues()
if isinstance(data, list):
sub_iter = data
for x in sub_iter:
for y in get_all(x, key):
yield y
Recursively walk the tree structure to find the elements you care about. E.g.:
>>> def process(blob):
... for (k,v) in blob.items():
... if k == 'imageHolder':
... ...do-something...
... if isinstance(v, dict):
... process(v)
That if isinstance(v,dict): line might be replaced with various alternatives, depending on exactly what you expect your input to be -- e.g., you could do duck-typing with something like if hasattr(v, 'items').
You need recursion:
def search_key(mykey, mydict):
if isinstance(mydict, dict):
for key, value in mydict.items():
search_key(mykey, value)
if mykey in mydict:
print 'found old_value=%r' % mydict[mykey]
mydict[mykey]='foo' # update
elif isinstance(mydict, (list, tuple)):
for value in mydict:
search_key(mykey, value)
search_key('imageHolder', mydict)
mydict={...} # your long json structure
search_key('imageHolder', mydict)
Related
I have already tried numerous approaches, but unfortunately do not come to any useful result. I have the following problem: I have a very deep and arbitrary nested dictionary.
d = {
"aaa":{
"bbb":"xyz",
"ccc":{
"description":"xyz",
"data":"abc"
},
"description":"xyz"
},
"xxx":{
"description":"xyz",
"bbb":{
"ccc":{
"ddd":{
"description":"xyz"
},
"aaa":{
"description":{
"hhh": "xyz"
}
},
"zzz":{
"description":"xyz"
}
}
}
},
"lll":{
"description":"xyz",
"bbb":{
"ccc":{
"hhh":{
"description":"xyz"
},
"ooo":{
"description":"xyz",
"aaa":{
"ddd":{
"description":"xyz"
}
},
"zzz":{
"ddd":{
"description":"xyz"
}
}
},
"zzz":{
"description":"xyz"
}
}
}
}
}
Now I want to search all levels of the dictionary and check if the keys "aaa" & "zzz" occur in this level. If this is the case, I want to output the keys with the respective values in a list with tuples.
[('aaa: {'ddd':{'description':'xyz'}', 'zzz: {'description':'xyz'}'),
('aaa:{'ddd':{'description':'xyz'}}', 'zzz:{'description':'xyz'})
]
I know that with
d.keys()
can print all keys in one layer.
I know that with this function I can go through all keys and values in the dictionary
def recursive_items(dictionary):
for key, value in dictionary.items():
if type(value) is dict:
yield from recursive_items(value)
else:
yield (key, value)
However, I am having trouble linking the two and putting the output into a list of tuples.
Try (d is dictionary from your question):
def find(d, keys=("aaa", "zzz")):
if isinstance(d, dict):
if all(k in d for k in keys):
yield tuple((k, d[k]) for k in keys)
for v in d.values():
yield from find(v, keys)
elif isinstance(d, list):
for v in d:
yield from find(v, keys)
print(list(find(d)))
Prints:
[
(
("aaa", {"description": {"hhh": "xyz"}}),
("zzz", {"description": "xyz"})),
(
("aaa", {"ddd": {"description": "xyz"}}),
("zzz", {"ddd": {"description": "xyz"}}),
),
]
I have created a var that is equal to t.json. The JSON file is a follows:
{
"groups": {
"customerduy": {
"nonprod": {
"name": "customerduynonprod",
"id": "529646781943",
"owner": "cloudops#coerce.com",
"manager_email": ""
},
"prod": {
"name": "phishing_duyaccountprod",
"id": "241683454720",
"owner": "cloudops#coerce.com",
"manager_email": ""
}
},
"customerduyprod": {
"nonprod": {
"name": "phishing_duyaccountnonprod",
"id": "638968214142",
"owner": "cloudops#coerce.com",
"manager_email": ""
}
},
"ciasuppliergenius": {
"prod": {
"name": "ciasuppliergeniusprod",
"id": "220753788760",
"owner": "cia_developers#coerce.com",
"manager_email": "jarks#coerce.com"
}
}
}
}
my goal was to pars this JSON file and get value for "owner" and output it to a new var. Example below:
t.json = group_map
group_id_aws = group(
group.upper(),
"accounts",
template,
owner = group_map['groups']['prod'],
manager_description = "Groups for teams to access their product accounts.",
The error I keep getting is: KeyError: 'prod'
Owner occurs 4 times, so here is how to get all of them.
import json
# read the json
with open("C:\\test\\test.json") as f:
data = json.load(f)
# get all 4 occurances
owner_1 = data['groups']['customerduy']['nonprod']['owner']
owner_2 = data['groups']['customerduy']['prod']['owner']
owner_3 = data['groups']['customerduyprod']['nonprod']['owner']
owner_4 = data['groups']['ciasuppliergenius']['prod']['owner']
# print results
print(owner_1)
print(owner_2)
print(owner_3)
print(owner_4)
the result:
cloudops#coerce.com
cloudops#coerce.com
cloudops#coerce.com
cia_developers#coerce.com
You get a key error since the key 'prod' is not in 'groups'
What you have is
group_map['groups']['customerduy']['prod']
group_map['groups']['ciasuppliergenius']['prod']
So you will have to extract the 'owner' from each element in the tree:
def s(d,t):
for k,v in d.items():
if t == k:
yield v
try:
for i in s(v,t):
yield i
except:
pass
print(','.join(s(j,'owner')))
If your JSON is loaded in variable data, you can use a recursive function
that deals with the two containers types (dict and list) that can occur
in a JSON file, recursively:
def find_all_values_for_key(d, key, result):
if isinstance(d, dict):
if key in d:
result.append(d[key])
return
for k, v in d.items():
find_all_values_for_key(v, key, result)
elif isinstance(d, list):
for elem in d:
find_all_values_for_key(elem, key, result)
owners = []
find_all_values_for_key(data, 'owner', owners)
print(f'{owners=}')
which gives:
owners=['cloudops#coerce.com', 'cloudops#coerce.com', 'cloudops#coerce.com', 'cia_developers#coerce.com']
This way you don't have to bother with the names of intermediate keys, or in general the structure of your JSON file.
You don't have any lists in your example, but it is trivial to recurse through
them to any dict with an owner key that might "lurk" somewhere nested
under a a list element, so it is better to deal with potential future changes
to the JSON.
I have a dictionary which contains the following json elements.
myjsonDictionary = \
{
"Teams": {
"TeamA": {
"#oid": "123.0.0.1",
"dataRequestList": {
"state": {
"#default": "0",
"#oid": "2"
}
},
"TeamSub": {
"#oid": "3",
"dataRequestList": {
"state": {
"#default": "0",
"#oid": "2"
}
}
}
},
# ....many nested layers
}
}
I have the following issue and am currently very confused on how to solve this problem.
I want to be able to parse this dictionary and get the concatenation of the "#oid" value and the respective "#oid" when I request the "key" such as "TeamA" or "TeamSub".
I have a function which takes in the gettheiDLevelConcatoid(myjsonDictionary, key).
I can call this function like this:
gettheiDLevelConcatoid(myjsonDictionary, key) where "key" is like "TeamA"
And the expected output should be "123.0.0.1.2". Note the 2 appended to the 123.0.0.1.
gettheiDLevelConcatoid(myjsonDictionary, key) where "key" is like TeamSub
Output is "123.0.0.1.3.2". Note the "3.2" added to the "123.0.0.1".
My current implementation:
def gettheiDLevelConcatoid(myjsonDictionary, key)
for item in myjsonDictionary:
if (item == key):
#not sure what to do
I am so lost on how to implement a generic method or approach for this.
With recursive traversal for specific keys:
def get_team_idlvel_oid_pair(d, search_key):
for k, v in d.items():
if k.startswith('Team'):
if k == search_key:
return '{}{}.{}'.format(d['#oid'] + '.' if '#oid' in d else '',
v['#oid'], v['dataRequestList']['state']['#oid'])
elif any(k.startswith('Team') for k_ in v):
return get_team_idlvel_oid_pair(v, search_key)
print(get_team_idlvel_oid_pair(myjsonDictionary['Teams'], 'TeamA'))
print(get_team_idlvel_oid_pair(myjsonDictionary['Teams'], 'TeamSub'))
Sample output:
123.0.0.1.2
123.0.0.1.3.2
I have the following .json file:
{
"level1_one":"1",
"level1_two":{
"level2_one":"2",
"level2_two":{
"level3_one":"bottom"
}
}
}
I need to give it nested and flatten representation like so:
{
"level1_two": {
"level2_two": {
"level3_one": "bottom"
},
"level2_one": "2"
},
"level1_one": "1"
}
and
{
"level2_one": "2",
"level3_one": "bottom",
"level1_one": "1"
}
I know I can easily achieve the nested form by simply doing this:
def json_parser(filename):
data = json.load(open(filename))
print(data)
The problem is it needs to be done by using recursion for both cases. I tried almost everything I could find around here but without much success. How I can do this?
You can try this:
s = {
"level1_one":"1",
"level1_two":{
"level2_one":"2",
"level2_two":{
"level3_one":"bottom"
}
}
}
def flatten(s):
for i in s:
if not isinstance(s[i], dict):
yield (i, s[i])
else:
for b in flatten(s[i]):
yield b
new_data = dict(list(flatten(s)))
Output:
{'level2_one': '2', 'level3_one': 'bottom', 'level1_one': '1'}
Something like this
def flatten_dict(d):
def items():
for key, value in d.items():
if isinstance(value, dict):
for subkey, subvalue in flatten_dict(value).items():
yield subkey, subvalue
else:
yield key, value
return dict(items())
this returns
{'level1_one': '1', 'level2_one': '2', 'level3_one': 'bottom'}
for the example you posted
I've got one dict from an api:
initial_dict = {
"content": {
"text":
},
"meta": {
"title": "something",
"created": "2016-03-04 15:30",
"author": "Pete",
"extra": {
"a": 123,
"b": 456
}
}
}
and I need to map this to another dict:
new_dict = {
"content_text": initial_dict['content']['text'],
"meta_title": initial_dict['meta']['title'],
"meta_extras": {
"time_related": {
initial_dict['meta']['created']
},
"by": initial_dict['meta']['author']
}
}
The problem is that not all fields are always in the initial_dict. I can of course wrap the whole creation of new_dict into a try/except, but then it would fail if one of the initial fields doesn't exist.
Is there no other way than creating a try/except for each and every field I add to the new_dict? In reality the dict is way bigger than this (about 400 key/value pairs), so this will become a mess quite fast.
Isn't there a better and more pythonic way of doing this?
How about using dict.get? Instead of throwing an error, this returns None if the key isn't in the dictionary.
new_dict = {
"content_text": initial_dict['content'].get('text'),
"meta_title": initial_dict['meta'].get('title'),
"meta_extras": {
"time_related": {
initial_dict['meta'].get('created')
},
"by": initial_dict['meta'].get('author')
}
}
If this goes deeper than one level, you can do some_dict.get('key1', {}).get('key2') as was suggested in the comments.
Converting the original dict to a defaultdict is also an option, which allows you to keep using the [] notation (more practical than having to chain get methods):
from collections import defaultdict
def to_defaultdict(d):
return defaultdict(lambda: None, ((k, to_defaultdict(v) if isinstance(v, dict) else v)
for k, v in d.items()))
initial_dict = to_defaultdict(initial_dict)
You can then filter out the None values:
def filter_dict(d):
return dict((k, filter_dict(v) if isinstance(v, dict) else v)
for k, v in d.items() if v is not None)
new_dict = filter_dict(new_dict)