How to remove all empty fields in a nested dict? - python

If I have a dict, which field's values may also be a dict or an array. How can I remove all empty fields in it?
"Empty field" means a field's value is empty array([]), None, or empty dict(all sub-fields are empty).
Example:
Input:
{
"fruit": [
{"apple": 1},
{"banana": None}
],
"veg": [],
"result": {
"apple": 1,
"banana": None
}
}
Output:
{
"fruit": [
{"apple": 1}
],
"result": {
"apple": 1
}
}

Use a recursive function that returns a new dictionary:
def clean_empty(d):
if isinstance(d, dict):
return {
k: v
for k, v in ((k, clean_empty(v)) for k, v in d.items())
if v
}
if isinstance(d, list):
return [v for v in map(clean_empty, d) if v]
return d
The {..} construct is a dictionary comprehension; it'll only include keys from the original dictionary if v is true, e.g. not empty. Similarly the [..] construct builds a list.
The nested (.. for ..) construct is a generator expression that allows the code to compactly filter empty objects after recursing.
Another way of constructing such a function is to use the #singledispatch decorator; you then write multiple functions, one per object type:
from functools import singledispatch
#singledispatch
def clean_empty(obj):
return obj
#clean_empty.register
def _dicts(d: dict):
items = ((k, clean_empty(v)) for k, v in d.items())
return {k: v for k, v in items if v}
#clean_empty.register
def _lists(l: list):
items = map(clean_empty, l)
return [v for v in items if v]
The above #singledispatch version does exactly the same thing as the first function but the isinstance() tests are now taken care of by the decorator implementation, based on the type annotations of the registered functions. I also put the nested iterators (the generator expression and map() function) into a separate variable to improve readability further.
Note that any values set to numeric 0 (integer 0, float 0.0) will also be cleared. You can retain numeric 0 values with if v or v == 0.
Demo of the first function:
>>> sample = {
... "fruit": [
... {"apple": 1},
... {"banana": None}
... ],
... "veg": [],
... "result": {
... "apple": 1,
... "banana": None
... }
... }
>>> def clean_empty(d):
... if isinstance(d, dict):
... return {
... k: v
... for k, v in ((k, clean_empty(v)) for k, v in d.items())
... if v
... }
... if isinstance(d, list):
... return [v for v in map(clean_empty, d) if v]
... return d
...
>>> clean_empty(sample)
{'fruit': [{'apple': 1}], 'result': {'apple': 1}}

If you want a full-featured, yet succinct approach to handling real-world data structures which are often nested, and can even contain cycles and other kinds of containers, I recommend looking at the remap utility from the boltons utility package.
After pip install boltons or copying iterutils.py into your project, just do:
from boltons.iterutils import remap
data = {'veg': [], 'fruit': [{'apple': 1}, {'banana': None}], 'result': {'apple': 1, 'banana': None}}
drop_falsey = lambda path, key, value: bool(value)
clean = remap(data, visit=drop_falsey)
print(clean)
# Output:
{'fruit': [{'apple': 1}], 'result': {'apple': 1}}
This page has many more examples, including ones working with much larger objects from Github's API.
It's pure-Python, so it works everywhere, and is fully tested in Python 2.7 and 3.3+. Best of all, I wrote it for exactly cases like this, so if you find a case it doesn't handle, you can bug me to fix it right here.

#mojoken - How about this to overcome the boolean problem
def clean_empty(d):
if not isinstance(d, (dict, list)):
return d
if isinstance(d, list):
return [v for v in (clean_empty(v) for v in d) if isinstance(v, bool) or v]
return {k: v for k, v in ((k, clean_empty(v)) for k, v in d.items()) if isinstance(v, bool) or v}

def not_empty(o):
# you can define what is empty.
if not (isinstance(o, dict) or isinstance(o, list)):
return True
return len(o) > 0
def remove_empty(o):
# here to choose what container you not need to recursive or to remove
if not (isinstance(o, dict) or isinstance(o, list)):
return o
if isinstance(o, dict):
return {k: remove_empty(v) for k, v in o.items() if not_empty(v)}
if isinstance(o, list):
return [remove_empty(v) for v in o if not_empty(v)]

def remove_empty_fields(data_):
"""
Recursively remove all empty fields from a nested
dict structure. Note, a non-empty field could turn
into an empty one after its children deleted.
:param data_: A dict or list.
:return: Data after cleaning.
"""
if isinstance(data_, dict):
for key, value in data_.items():
# Dive into a deeper level.
if isinstance(value, dict) or isinstance(value, list):
value = remove_empty_fields(value)
# Delete the field if it's empty.
if value in ["", None, [], {}]:
del data_[key]
elif isinstance(data_, list):
for index in reversed(range(len(data_))):
value = data_[index]
# Dive into a deeper level.
if isinstance(value, dict) or isinstance(value, list):
value = remove_empty_fields(value)
# Delete the field if it's empty.
if value in ["", None, [], {}]:
data_.pop(index)
return data_

Related

Python Creating new dict from specific keys in other dict (nested)

(Please note I searched and couldn't find an answer for this type of nested, with dict and lists, and with keeping keys names and values).
I'm trying to create a new dict from existing dict with specific keys-value pairs that I need.
Example/origin dict:
{
"test1":{
"test2":[
]
},
"test3":[
],
"test4":{
"test5":0,
"what":{
"in":"2",
"out":"4"
}
},
"test12":[
{
"in2":"a",
"out2":"b"
},
{
"in2":"a33",
"out2":"b33"
}
],
"test9":255
}
I want to select keys for example: ['test1'], ['test4'], ['test12']['in2']
in such way that the result dict will be:
{
"test1":{
"test2":[
]
},
"test4":{
"test5":0,
"what":{
"in":"2",
"out":"4"
}
},
"test12":[
{
"in2":"a"
},
{
"in2":"a33"
}
]
}
I'm aware its possible to do manually, i want to see the pythonic way :)
Thanks!!!
Try a dictionary comprehension with isinstance list:
>>> {k: ([{'in2': i['in2']} for i in v] if isinstance(v, list) else v) for k, v in dct.items() if not isinstance(v, int) and v}
{'test1': {'test2': []},
'test4': {'test5': 0, 'what': {'in': '2', 'out': '4'}},
'test12': [{'in2': 'a'}, {'in2': 'a33'}]}
>>>
I don't think there is one "pythonic" way to do what you want here as there is an infinite number of possible values for your nested dict.
Here is a start of answer that you can adapt to your need !
import copy
def _transform(source_dict: dict, keys_to_keep: list):
dict_copy = copy.deepcopy(source_dict) # no side-effects
for key, value in source_dict.items():
if key not in keys_to_keep:
dict_copy.pop(key)
elif isinstance(value, dict):
dict_copy[key] = _transform(value, keys_to_keep)
elif isinstance(value, list):
dict_copy[key] = [
_transform(el, keys_to_keep) if isinstance(el, dict) else el for el in value
]
return dict_copy

How to find dictionary that contains a key in nested dictionary via recursion? [duplicate]

for k, v in d.iteritems():
if type(v) is dict:
for t, c in v.iteritems():
print "{0} : {1}".format(t, c)
I'm trying to loop through a dictionary and print out all key value pairs where the value is not a nested dictionary. If the value is a dictionary I want to go into it and print out its key value pairs...etc. Any help?
EDIT
How about this? It still only prints one thing.
def printDict(d):
for k, v in d.iteritems():
if type(v) is dict:
printDict(v)
else:
print "{0} : {1}".format(k, v)
Full Test Case
Dictionary:
{u'xml': {u'config': {u'portstatus': {u'status': u'good'}, u'target': u'1'},
u'port': u'11'}}
Result:
xml : {u'config': {u'portstatus': {u'status': u'good'}, u'target': u'1'}, u'port': u'11'}
As said by Niklas, you need recursion, i.e. you want to define a function to print your dict, and if the value is a dict, you want to call your print function using this new dict.
Something like :
def myprint(d):
for k, v in d.items():
if isinstance(v, dict):
myprint(v)
else:
print("{0} : {1}".format(k, v))
There are potential problems if you write your own recursive implementation or the iterative equivalent with stack. See this example:
dic = {}
dic["key1"] = {}
dic["key1"]["key1.1"] = "value1"
dic["key2"] = {}
dic["key2"]["key2.1"] = "value2"
dic["key2"]["key2.2"] = dic["key1"]
dic["key2"]["key2.3"] = dic
In the normal sense, nested dictionary will be a n-nary tree like data structure. But the definition doesn't exclude the possibility of a cross edge or even a back edge (thus no longer a tree). For instance, here key2.2 holds to the dictionary from key1, key2.3 points to the entire dictionary(back edge/cycle). When there is a back edge(cycle), the stack/recursion will run infinitely.
root<-------back edge
/ \ |
_key1 __key2__ |
/ / \ \ |
|->key1.1 key2.1 key2.2 key2.3
| / | |
| value1 value2 |
| |
cross edge----------|
If you print this dictionary with this implementation from Scharron
def myprint(d):
for k, v in d.items():
if isinstance(v, dict):
myprint(v)
else:
print "{0} : {1}".format(k, v)
You would see this error:
> RuntimeError: maximum recursion depth exceeded while calling a Python object
The same goes with the implementation from senderle.
Similarly, you get an infinite loop with this implementation from Fred Foo:
def myprint(d):
stack = list(d.items())
while stack:
k, v = stack.pop()
if isinstance(v, dict):
stack.extend(v.items())
else:
print("%s: %s" % (k, v))
However, Python actually detects cycles in nested dictionary:
print dic
{'key2': {'key2.1': 'value2', 'key2.3': {...},
'key2.2': {'key1.1': 'value1'}}, 'key1': {'key1.1': 'value1'}}
"{...}" is where a cycle is detected.
As requested by Moondra this is a way to avoid cycles (DFS):
def myprint(d):
stack = list(d.items())
visited = set()
while stack:
k, v = stack.pop()
if isinstance(v, dict):
if k not in visited:
stack.extend(v.items())
else:
print("%s: %s" % (k, v))
visited.add(k)
Since a dict is iterable, you can apply the classic nested container iterable formula to this problem with only a couple of minor changes. Here's a Python 2 version (see below for 3):
import collections
def nested_dict_iter(nested):
for key, value in nested.iteritems():
if isinstance(value, collections.Mapping):
for inner_key, inner_value in nested_dict_iter(value):
yield inner_key, inner_value
else:
yield key, value
Test:
list(nested_dict_iter({'a':{'b':{'c':1, 'd':2},
'e':{'f':3, 'g':4}},
'h':{'i':5, 'j':6}}))
# output: [('g', 4), ('f', 3), ('c', 1), ('d', 2), ('i', 5), ('j', 6)]
In Python 2, It might be possible to create a custom Mapping that qualifies as a Mapping but doesn't contain iteritems, in which case this will fail. The docs don't indicate that iteritems is required for a Mapping; on the other hand, the source gives Mapping types an iteritems method. So for custom Mappings, inherit from collections.Mapping explicitly just in case.
In Python 3, there are a number of improvements to be made. As of Python 3.3, abstract base classes live in collections.abc. They remain in collections too for backwards compatibility, but it's nicer having our abstract base classes together in one namespace. So this imports abc from collections. Python 3.3 also adds yield from, which is designed for just these sorts of situations. This is not empty syntactic sugar; it may lead to faster code and more sensible interactions with coroutines.
from collections import abc
def nested_dict_iter(nested):
for key, value in nested.items():
if isinstance(value, abc.Mapping):
yield from nested_dict_iter(value)
else:
yield key, value
Alternative iterative solution:
def myprint(d):
stack = d.items()
while stack:
k, v = stack.pop()
if isinstance(v, dict):
stack.extend(v.iteritems())
else:
print("%s: %s" % (k, v))
Slightly different version I wrote that keeps track of the keys along the way to get there
def print_dict(v, prefix=''):
if isinstance(v, dict):
for k, v2 in v.items():
p2 = "{}['{}']".format(prefix, k)
print_dict(v2, p2)
elif isinstance(v, list):
for i, v2 in enumerate(v):
p2 = "{}[{}]".format(prefix, i)
print_dict(v2, p2)
else:
print('{} = {}'.format(prefix, repr(v)))
On your data, it'll print
data['xml']['config']['portstatus']['status'] = u'good'
data['xml']['config']['target'] = u'1'
data['xml']['port'] = u'11'
It's also easy to modify it to track the prefix as a tuple of keys rather than a string if you need it that way.
Here is pythonic way to do it. This function will allow you to loop through key-value pair in all the levels. It does not save the whole thing to the memory but rather walks through the dict as you loop through it
def recursive_items(dictionary):
for key, value in dictionary.items():
if type(value) is dict:
yield (key, value)
yield from recursive_items(value)
else:
yield (key, value)
a = {'a': {1: {1: 2, 3: 4}, 2: {5: 6}}}
for key, value in recursive_items(a):
print(key, value)
Prints
a {1: {1: 2, 3: 4}, 2: {5: 6}}
1 {1: 2, 3: 4}
1 2
3 4
2 {5: 6}
5 6
A alternative solution to work with lists based on Scharron's solution
def myprint(d):
my_list = d.iteritems() if isinstance(d, dict) else enumerate(d)
for k, v in my_list:
if isinstance(v, dict) or isinstance(v, list):
myprint(v)
else:
print u"{0} : {1}".format(k, v)
I am using the following code to print all the values of a nested dictionary, taking into account where the value could be a list containing dictionaries. This was useful to me when parsing a JSON file into a dictionary and needing to quickly check whether any of its values are None.
d = {
"user": 10,
"time": "2017-03-15T14:02:49.301000",
"metadata": [
{"foo": "bar"},
"some_string"
]
}
def print_nested(d):
if isinstance(d, dict):
for k, v in d.items():
print_nested(v)
elif hasattr(d, '__iter__') and not isinstance(d, str):
for item in d:
print_nested(item)
elif isinstance(d, str):
print(d)
else:
print(d)
print_nested(d)
Output:
10
2017-03-15T14:02:49.301000
bar
some_string
Your question already has been answered well, but I recommend using isinstance(d, collections.Mapping) instead of isinstance(d, dict). It works for dict(), collections.OrderedDict(), and collections.UserDict().
The generally correct version is:
def myprint(d):
for k, v in d.items():
if isinstance(v, collections.Mapping):
myprint(v)
else:
print("{0} : {1}".format(k, v))
Iterative solution as an alternative:
def traverse_nested_dict(d):
iters = [d.iteritems()]
while iters:
it = iters.pop()
try:
k, v = it.next()
except StopIteration:
continue
iters.append(it)
if isinstance(v, dict):
iters.append(v.iteritems())
else:
yield k, v
d = {"a": 1, "b": 2, "c": {"d": 3, "e": {"f": 4}}}
for k, v in traverse_nested_dict(d):
print k, v
Here's a modified version of Fred Foo's answer for Python 2. In the original response, only the deepest level of nesting is output. If you output the keys as lists, you can keep the keys for all levels, although to reference them you need to reference a list of lists.
Here's the function:
def NestIter(nested):
for key, value in nested.iteritems():
if isinstance(value, collections.Mapping):
for inner_key, inner_value in NestIter(value):
yield [key, inner_key], inner_value
else:
yield [key],value
To reference the keys:
for keys, vals in mynested:
print(mynested[keys[0]][keys[1][0]][keys[1][1][0]])
for a three-level dictionary.
You need to know the number of levels before to access multiple keys and the number of levels should be constant (it may be possible to add a small bit of script to check the number of nesting levels when iterating through values, but I haven't yet looked at this).
I find this approach a bit more flexible, here you just providing generator function that emits key, value pairs and can be easily extended to also iterate over lists.
def traverse(value, key=None):
if isinstance(value, dict):
for k, v in value.items():
yield from traverse(v, k)
else:
yield key, value
Then you can write your own myprint function, then would print those key value pairs.
def myprint(d):
for k, v in traverse(d):
print(f"{k} : {v}")
A test:
myprint({
'xml': {
'config': {
'portstatus': {
'status': 'good',
},
'target': '1',
},
'port': '11',
},
})
Output:
status : good
target : 1
port : 11
I tested this on Python 3.6.
Nested dictionaries looping using isinstance() and yield function.
**isinstance is afunction that returns the given input and reference is true or false as in below case dict is true so it go for iteration.
**Yield is used to return from a function without destroying the states of its local variable and when the function is called, the execution starts from the last yield statement. Any function that contains a yield keyword is termed a generator.
students= {'emp1': {'name': 'Bob', 'job': 'Mgr'},
'emp2': {'name': 'Kim', 'job': 'Dev','emp3': {'namee': 'Saam', 'j0ob': 'Deev'}},
'emp4': {'name': 'Sam', 'job': 'Dev'}}
def nested_dict_pairs_iterator(dict_obj):
for key, value in dict_obj.items():
# Check if value is of dict type
if isinstance(value, dict):
# If value is dict then iterate over all its values
for pair in nested_dict_pairs_iterator(value):
yield (key, *pair)
else:
# If value is not dict type then yield the value
yield (key, value)
for pair in nested_dict_pairs_iterator(students):
print(pair)
For a ready-made solution install ndicts
pip install ndicts
Import a NestedDict in your script
from ndicts.ndicts import NestedDict
Initialize
dictionary = {
u'xml': {
u'config': {
u'portstatus': {u'status': u'good'},
u'target': u'1'
},
u'port': u'11'
}
}
nd = NestedDict(dictionary)
Iterate
for key, value in nd.items():
print(key, value)
While the original solution from #Scharron is beautiful and simple, it cannot handle the list very well:
def myprint(d):
for k, v in d.items():
if isinstance(v, dict):
myprint(v)
else:
print("{0} : {1}".format(k, v))
So this code can be slightly modified like this to handle list in elements:
def myprint(d):
for k, v in d.items():
if isinstance(v, dict):
myprint(v)
elif isinstance(v, list):
for i in v:
myprint(i)
else:
print("{0} : {1}".format(k, v))
These answers work for only 2 levels of sub-dictionaries. For more try this:
nested_dict = {'dictA': {'key_1': 'value_1', 'key_1A': 'value_1A','key_1Asub1': {'Asub1': 'Asub1_val', 'sub_subA1': {'sub_subA1_key':'sub_subA1_val'}}},
'dictB': {'key_2': 'value_2'},
1: {'key_3': 'value_3', 'key_3A': 'value_3A'}}
def print_dict(dictionary):
dictionary_array = [dictionary]
for sub_dictionary in dictionary_array:
if type(sub_dictionary) is dict:
for key, value in sub_dictionary.items():
print("key=", key)
print("value", value)
if type(value) is dict:
dictionary_array.append(value)
print_dict(nested_dict)
You can print recursively with a dictionary comprehension:
def print_key_pairs(d):
{k: print_key_pairs(v) if isinstance(v, dict) else print(f'{k}: {v}') for k, v in d.items()}
For your test case this is the output:
>>> print_key_pairs({u'xml': {u'config': {u'portstatus': {u'status': u'good'}, u'target': u'1'}, u'port': u'11'}})
status: good
target: 1
port: 11
Returns a tuple of each key and value and the key contains the full path
from typing import Mapping, Tuple, Iterator
def traverse_dict(nested: Mapping, parent_key="", keys_to_not_traverse_further=tuple()) -> Iterator[Tuple[str, str]]:
"""Each key is joined with it's parent using dot as a separator.
Once a `parent_key` matches `keys_to_not_traverse_further`
it will no longer find its child dicts.
"""
for key, value in nested.items():
if isinstance(value, abc.Mapping) and key not in keys_to_not_traverse_further:
yield from traverse_dict(value, f"{parent_key}.{key}", keys_to_not_traverse_further)
else:
yield f"{parent_key}.{key}", value
Let's test it
my_dict = {
"isbn": "123-456-222",
"author": {"lastname": "Doe", "firstname": "Jane"},
"editor": {"lastname": "Smith", "firstname": "Jane"},
"title": "The Ultimate Database Study Guide",
"category": ["Non-Fiction", "Technology"],
"first": {
"second": {"third": {"fourth": {"blah": "yadda"}}},
"fifth": {"sixth": "seventh"},
},
}
for k, v in traverse_dict(my_dict):
print(k, v)
Returns
.isbn 123-456-222
.author.lastname Doe
.author.firstname Jane
.editor.lastname Smith
.editor.firstname Jane
.title The Ultimate Database Study Guide
.category ['Non-Fiction', 'Technology']
.first.second.third.fourth.blah yadda
.first.fifth.sixth seventh
If you don't care about some child dicts e.g names in this case then
use the keys_to_not_traverse_further
for k, v in traverse_dict(my_dict, parent_key="", keys_to_not_traverse_further=("author","editor")):
print(k, v)
Returns
.isbn 123-456-222
.author {'lastname': 'Doe', 'firstname': 'Jane'}
.editor {'lastname': 'Smith', 'firstname': 'Jane'}
.title The Ultimate Database Study Guide
.category ['Non-Fiction', 'Technology']
.first.second.third.fourth.blah yadda
.first.fifth.sixth seventh

Pythonic way to retrieve single level from nested dictionary

I am trying to retrieve a single level from a nested dictionary. So for instance, given the dictionary below, I would expect to see the following results for the first and second level.
nested_dict = {
'foo':'bar',
'baz':{
'foo':'baz'
}}
# level 0
{'foo':'bar'}
# level 1
{'foo':'baz'}
I can retrieve the first level using a dict comprehension:
{k:v for (k,v) in nested_dict.items() if type(v) is not dict}
>>> {'foo':'bar'}
Or retrieve a specified level using a recursion:
def get_level(nested_dict, level):
if level == 0:
return {k:v for (k,v) in nested_dict.items() if type(v) is not dict}
else:
this_level = {}
for (k,v) in nested_dict.items():
if type(v) is dict:
this_level.update(v)
return get_level(this_level, level - 1)
get_level(nested_dict, 1)
>>> {'foo':'baz'}
I am now wondering if there is a more Pythonic/clean/out of the box way to retrieve the levels of nested dictionaries (as I already did above), if necessary with help of a package.
As stated in the comments, I'm assuming you don't have duplicates:
nested_dict = {
'foo':'bar',
'baz':{
'foo':'baz',
'fee': {
'foo': 'fee'
}
},
'baz2':{
'foo2':'baz2'
}
}
def get_level(dct, level):
if level == 0:
yield from ((k, v) for k, v in dct.items() if not isinstance(v, dict))
else:
yield from ((kk, vv) for v in dct.values() if isinstance(v, dict) for kk, vv in get_level(v, level-1))
print(dict(get_level(nested_dict, 1)))
Prints:
{'foo': 'baz', 'foo2': 'baz2'}
print(dict(get_level(nested_dict, 2)))
Prints:
{'foo': 'fee'}

Python nested dictionary update value where any nested key matches

I have a nested dictionary where every element can be of any type including a list or
dictionary.
I'm looking for a method to update any key at any depth with a particular value.
(So the replacement occurs if the target value is not a list or dictionary)
e.g
{
'a': 1,
'b': 2,
'c': [{'a': 2, 'b': 3}],
'd': [{'d_d': {'a': 1, 'b': 2}}],
'e': {'a': 4},
}
would become
{
'a': 'xx',
'b': 2,
'c': [{'a': 'xx', 'b': 3}],
'd': [{'d_d': {'a': 'xx', 'b': 2}}],
'e': {'a': 'xx'},
}
where the function takes a dictionary, key and new value like so
update_nested(dict, key='a', value='xx')
Let's look at which parts you need and how to implement them:
iterate over a dictionary
there are three different methods that help you to iterate over a dictionary:
dict.keys()
iterating over all keys in the dict. e.g.
for key in {"Hello": 10, "World", 20}.keys():
print(k)
# output: Hello\nWorld
dict.items()
iterating over all (key, value) tuples in the dict. for k, v in d.items()
dict.values()
iterating over all values in the dict. for v in d.values()
Handle nested dictionaries
When you have nested structures a good concept would recursion
In short: You call the same function in itself with different parameters.
putting both concepts together.
Iterate over the array
If the key is the same as the one you are searching for: Replace the value
If the value is a dict: Call the function again with the value as dict parameter
If the value is a list: Iterate over all items and check if they are dicts. If so handle them like above
Possible final code:
def update_nested(in_dict, key, value):
for k, v in in_dict.items():
if key == k:
in_dict[k] = value
elif isinstance(v, dict):
update_nested(v, key, value)
elif isinstance(v, list):
for o in v:
if isinstance(o, dict):
update_nested(o, key, value)
Extra note:
You should never use a builtin name/type as a variable name. In your case dict. This will override the builtin type and can lead to unexpected behavior.
You can use a recursive function that checks on instances of dict and list:
def nested_update(obj, key, value):
if isinstance(obj, dict):
for k, v in obj.items():
if isinstance(v, (dict, list)):
nested_update(v, key, value)
elif k == key:
obj[k] = value
elif isinstance(obj, list):
for item in obj:
nested_update(item, key, value)
I had a similar problem to update imported JSON values in varying nested dictionaries.
Using #Uli Sotschok's solution, this is my working code:
def changejsonimage(key, value, dictionary):
for k,v in dictionary.items():
if isinstance(v, str) and 'images' in v: #if string and has 'images' in the value
dictionary[k] = 'InlineImage(tpl,"' + v + '")' #change the value
elif isinstance(v, dict): #any value that is a dictionary, loop back into the fn
changejsonimage(key,value,v)
This function updates at any depth and remove all None values so keys can be deleted.
def deep_update(main_dict, update_dict):
main_dict.update([(k, deep_update(main_dict[k], v) if isinstance(main_dict.get(k), dict) and isinstance(v, dict) else v) for k, v in update_dict.items()])
return dict((k, v) for k, v in main_dict.items() if v is not None)

Cleaner way to unpack nested dictionaries

I am receiving data in batches from an API in JSON format. I wish to store only the values, in a list.
The raw data looks like this and will always look like this, i.e: all {...} will look like the first example:
data = content.get('data')
>>> [{'a':1, 'b':{'c':2, 'd':3}, 'e':4}, {...}, {...}, ...]
The nested dictionary is making this harder; I need this unpacked as well.
Here is what I have, which works but it feels so bad:
unpacked = []
data = content.get('data')
for d in data:
item = []
for k, v in d.items():
if k == 'b':
for val in v.values():
item.append(val)
else:
item.append(v)
unpacked.append(item)
Output:
>>> [[1,2,3,4], [...], [...], ...]
How can I improve this?
You could use a recursive function and some type tests:
data = [{'a':1, 'b':{'c':2, 'd':3}, 'e':4}, {'f':5,'g':6}]
def extract_nested_values(it):
if isinstance(it, list):
for sub_it in it:
yield from extract_nested_values(sub_it)
elif isinstance(it, dict):
for value in it.values():
yield from extract_nested_values(value)
else:
yield it
print(list(extract_nested_values(data)))
# [1, 2, 3, 4, 5, 6]
Note that it outputs a flat generator, not a list of lists.
Assuming your dictionaries do not contain inner lists, you could define a simple routine to unpack a nested dictionary, and iterate through each item in data using a loop.
def unpack(data):
for k, v in data.items():
if isinstance(v, dict):
yield from unpack(v)
else:
yield v
Note that this function is as simple as it is thanks to the magic of yield from. Now, let's call it with some data.
data = [{'a':1, 'b':{'c':2, 'd':3}, 'e':4}, {'f':5,'g':6}] # Data "borrowed" from Kaushik NP
result = [list(unpack(x)) for x in data]
print(result)
[[2, 3, 1, 4], [5, 6]]
Note the lack of order in your result, because of the arbitrary order of dictionaries.
For completeness, based on the excellent answer of Eric Duminil, here is a function that returns the maximum depth of a nested dict or list:
def depth(it, count=0):
"""Depth of a nested dict.
# Arguments
it: a nested dict or list.
count: a constant value used in internal calculations.
# Returns
Numeric value.
"""
if isinstance(it, list):
if any(isinstance(v, list) or isinstance(v, dict) for v in it):
for v in it:
if isinstance(v, list) or isinstance(v, dict):
return depth(v, count + 1)
else:
return count
elif isinstance(it, dict):
if any(isinstance(v, list) or isinstance(v, dict) for v in it.values()):
for v in it.values():
if isinstance(v, list) or isinstance(v, dict):
return depth(v, count + 1)
else:
return count
else:
return count
In the Python tradition, it is zero-based.
Other answers (especially #COLDSPEED's) have already covered the situation, but here is a slightly different code based on the old adage it's better to ask forgiveness than permission , which I tend to prefer to type checking:
def unpack(data):
try:
for value in data.values():
yield from unpack(value)
except AttributeError:
yield data
data = [{'a':1, 'b':{'c':2, 'd':3}, 'e':4}]
unpacked = [list(unpack(item)) for item in data]
Doing recursively :
def traverse(d):
for key,val in d.items():
if isinstance(val, dict):
traverse(val)
else:
l.append(val)
out=[]
for d in data:
l=[]
traverse(d)
out.append(l)
print(out)
#driver values :
IN : data = [{'a':1, 'b':{'c':2, 'd':3}, 'e':4}, {'f':5,'g':6}]
OUT : out = [[1, 2, 3, 4], [5, 6]]
EDIT : A better way to do this is using yield so as not to have to rely on global variables as in the first method.
def traverse(d):
for key,val in d.items():
if isinstance(val, dict):
yield from traverse(val)
else:
yield val
out = [list(traverse(d)) for d in data]

Categories

Resources