My list is being deleted along with my dictionary - python

I wrote the following code and i cannot understand why my dictionary is being deleted alone with my list.
Really hope you could help me with it.. iam kind of stuck here.
This is my code :
course_dict = {'I': 3, 'love': 3, 'self.py!': 2}
save_dict = {}
def inverse_dict(Recived_dict):
global save_dict
list_counter = 0
new_dict = {}
my_list = []
current_value = list(Recived_dict.values())[0]
for key, value in Recived_dict.items():
if value == current_value:
my_list.append(key)
new_dict[value] = my_list
save_dict = new_dict
else:
if list_counter == 0:
del my_list[0:]
list_counter =1
my_list.append(key)
new_dict[value] = my_list
print(new_dict)
inverse_dict(course_dict)

Since you have values that are not unique, you need a way to aggregate the original keys that correspond to the original values. You can start with a dictionary with the new keys and blank lists for the values, and then append the new values:
course_dict = {'I': 3, 'love': 3, 'self.py!': 2}
def inverse_dict(Recived_dict):
new_dict = {v: [] for v in Recived_dict.values()}
for k, v in Recived_dict.items():
new_dict[v].append(k)
print(new_dict)
inverse_dict(course_dict)
{3: ['I', 'love'], 2: ['self.py!']}
Assuming you will want to be able to use this new dictionary, your function should probably return the inverted dictionary:
def inverse_dict(Recived_dict):
new_dict = {v: [] for v in Recived_dict.values()}
for k, v in Recived_dict.items():
new_dict[v].append(k)
return new_dict
course_dict = {'I': 3, 'love': 3, 'self.py!': 2}
save_dict = inverse_dict(course_dict)
print(save_dict)
{3: ['I', 'love'], 2: ['self.py!']}

defaultdict will make this much easier.
from collections import defaultdict
a = {3: 'foo', 5: 'foo', 4: 'bar'}
d = defaultdict(list)
for k, v in a.items():
d[v].append(k)
Result:
defaultdict(<class 'list'>, {'foo': [3, 5], 'bar': [4]})

Related

How can I remove an dictionary element in list?

There are some dictionary in list:
a = [{1: 2}, {1: 3}, {2: 5}, {2: 3}]
All of the keys and values of this dictionary are int type, some dicts have the same key, I want to remain those dictionaries who has the same key and the value is bigger than others, how can I do it?
example:
I want to obtain this list:
[{1: 3}, {2: 5}]
Simple approach using collections.defaultdict:
from collections import defaultdict
a = [{1: 2}, {1: 3}, {2: 5}, {2: 3}]
temp = defaultdict(lambda: float("-inf"))
for item in a:
[(i, v)] = item.items()
temp[i] = max(v, temp[i])
res = [{i: v} for i, v in temp.items()]
print(res)
Output
[{1: 3}, {2: 5}]
In the code the expression float("-inf") acts an unbounded lower value for comparison, this means that:
float("-inf") < x
for any value of x, unless x is float("nan") or float("-inf") itself. Read more in here.
After each first call setting temp[i] for a given i, the value is going to be the first appearing v.
Assuming that each dictionary has only one key/value pair, here is a possible solution. Essentially, create a new dictionary to keep track of the maximum values for each key. Then, turn this into the final list.
def remove_duplicates(dicts):
merged = {}
for d in dicts:
key, value = list(d.items())[0]
if key not in merged:
merged[key] = value
else:
merged[key] = max(
merged[key], value
)
return [
{key: value}
for key, value in merged.items()
]
a = [{1: 2}, {1: 3}, {2: 5}, {2: 3}]
output = {}
for d in a:
k = list(d.keys())[0]
v = list(d.values())[0]
if k not in output.keys():
output[k] = v
else:
if output[k] < v:
output[k] = v
output = [{k:v} for k, v in output.items()]
print(output)
from itertools import groupby
a = [{1: 2}, {1: 3}, {2: 5}, {2: 3}]
[{k, max(y[1] for y in g)} # build up the result by recreating the list of maps by taking the keys in the group and map them to the biggest value in the group
for k, g
in groupby((sorted( # group everything together, so that those touples with the same key (=index 0) are in the same group
((k, v) for m in a for k, v in m.items()) # transform the list of maps to a generator of tuples
, key=lambda x: x[0])), key=lambda x: x[0])]
So here I first transform the list of maps to a list of tuples (which makes much more sense, but that is just me), group them together by the key, and afterwards create new maps for each key, by taking the biggest value inside each group.
Same logic as BrownieInMotion's approach, but with cleaner code using list comprehension for the result.
a = [{1: 2}, {1: 3}, {2: 5}, {2: 3}]
temp = {}
for item in a:
i, v = list(*item.items())
if i not in temp:
temp[i] = v
else:
temp[i] = max(v, temp[i])
print([{i: v} for i, v in temp.items()])
The unpacking usage list(*item.items()) restricts to dictionaries of single key. If you want to remove the restriction, you can go ahead and use list(item.items())[0].

Extract varying levels of nested key value pairs from dictionary [duplicate]

Suppose you have a dictionary like:
{'a': 1,
'c': {'a': 2,
'b': {'x': 5,
'y' : 10}},
'd': [1, 2, 3]}
How would you go about flattening that into something like:
{'a': 1,
'c_a': 2,
'c_b_x': 5,
'c_b_y': 10,
'd': [1, 2, 3]}
Basically the same way you would flatten a nested list, you just have to do the extra work for iterating the dict by key/value, creating new keys for your new dictionary and creating the dictionary at final step.
import collections
def flatten(d, parent_key='', sep='_'):
items = []
for k, v in d.items():
new_key = parent_key + sep + k if parent_key else k
if isinstance(v, collections.MutableMapping):
items.extend(flatten(v, new_key, sep=sep).items())
else:
items.append((new_key, v))
return dict(items)
>>> flatten({'a': 1, 'c': {'a': 2, 'b': {'x': 5, 'y' : 10}}, 'd': [1, 2, 3]})
{'a': 1, 'c_a': 2, 'c_b_x': 5, 'd': [1, 2, 3], 'c_b_y': 10}
For Python >= 3.3, change the import to from collections.abc import MutableMapping to avoid a deprecation warning and change collections.MutableMapping to just MutableMapping.
Or if you are already using pandas, You can do it with json_normalize() like so:
import pandas as pd
d = {'a': 1,
'c': {'a': 2, 'b': {'x': 5, 'y' : 10}},
'd': [1, 2, 3]}
df = pd.json_normalize(d, sep='_')
print(df.to_dict(orient='records')[0])
Output:
{'a': 1, 'c_a': 2, 'c_b_x': 5, 'c_b_y': 10, 'd': [1, 2, 3]}
There are two big considerations that the original poster needs to consider:
Are there keyspace clobbering issues? For example, {'a_b':{'c':1}, 'a':{'b_c':2}} would result in {'a_b_c':???}. The below solution evades the problem by returning an iterable of pairs.
If performance is an issue, does the key-reducer function (which I hereby refer to as 'join') require access to the entire key-path, or can it just do O(1) work at every node in the tree? If you want to be able to say joinedKey = '_'.join(*keys), that will cost you O(N^2) running time. However if you're willing to say nextKey = previousKey+'_'+thisKey, that gets you O(N) time. The solution below lets you do both (since you could merely concatenate all the keys, then postprocess them).
(Performance is not likely an issue, but I'll elaborate on the second point in case anyone else cares: In implementing this, there are numerous dangerous choices. If you do this recursively and yield and re-yield, or anything equivalent which touches nodes more than once (which is quite easy to accidentally do), you are doing potentially O(N^2) work rather than O(N). This is because maybe you are calculating a key a then a_1 then a_1_i..., and then calculating a then a_1 then a_1_ii..., but really you shouldn't have to calculate a_1 again. Even if you aren't recalculating it, re-yielding it (a 'level-by-level' approach) is just as bad. A good example is to think about the performance on {1:{1:{1:{1:...(N times)...{1:SOME_LARGE_DICTIONARY_OF_SIZE_N}...}}}})
Below is a function I wrote flattenDict(d, join=..., lift=...) which can be adapted to many purposes and can do what you want. Sadly it is fairly hard to make a lazy version of this function without incurring the above performance penalties (many python builtins like chain.from_iterable aren't actually efficient, which I only realized after extensive testing of three different versions of this code before settling on this one).
from collections import Mapping
from itertools import chain
from operator import add
_FLAG_FIRST = object()
def flattenDict(d, join=add, lift=lambda x:(x,)):
results = []
def visit(subdict, results, partialKey):
for k,v in subdict.items():
newKey = lift(k) if partialKey==_FLAG_FIRST else join(partialKey,lift(k))
if isinstance(v,Mapping):
visit(v, results, newKey)
else:
results.append((newKey,v))
visit(d, results, _FLAG_FIRST)
return results
To better understand what's going on, below is a diagram for those unfamiliar with reduce(left), otherwise known as "fold left". Sometimes it is drawn with an initial value in place of k0 (not part of the list, passed into the function). Here, J is our join function. We preprocess each kn with lift(k).
[k0,k1,...,kN].foldleft(J)
/ \
... kN
/
J(k0,J(k1,J(k2,k3)))
/ \
/ \
J(J(k0,k1),k2) k3
/ \
/ \
J(k0,k1) k2
/ \
/ \
k0 k1
This is in fact the same as functools.reduce, but where our function does this to all key-paths of the tree.
>>> reduce(lambda a,b:(a,b), range(5))
((((0, 1), 2), 3), 4)
Demonstration (which I'd otherwise put in docstring):
>>> testData = {
'a':1,
'b':2,
'c':{
'aa':11,
'bb':22,
'cc':{
'aaa':111
}
}
}
from pprint import pprint as pp
>>> pp(dict( flattenDict(testData) ))
{('a',): 1,
('b',): 2,
('c', 'aa'): 11,
('c', 'bb'): 22,
('c', 'cc', 'aaa'): 111}
>>> pp(dict( flattenDict(testData, join=lambda a,b:a+'_'+b, lift=lambda x:x) ))
{'a': 1, 'b': 2, 'c_aa': 11, 'c_bb': 22, 'c_cc_aaa': 111}
>>> pp(dict( (v,k) for k,v in flattenDict(testData, lift=hash, join=lambda a,b:hash((a,b))) ))
{1: 12416037344,
2: 12544037731,
11: 5470935132935744593,
22: 4885734186131977315,
111: 3461911260025554326}
Performance:
from functools import reduce
def makeEvilDict(n):
return reduce(lambda acc,x:{x:acc}, [{i:0 for i in range(n)}]+range(n))
import timeit
def time(runnable):
t0 = timeit.default_timer()
_ = runnable()
t1 = timeit.default_timer()
print('took {:.2f} seconds'.format(t1-t0))
>>> pp(makeEvilDict(8))
{7: {6: {5: {4: {3: {2: {1: {0: {0: 0,
1: 0,
2: 0,
3: 0,
4: 0,
5: 0,
6: 0,
7: 0}}}}}}}}}
import sys
sys.setrecursionlimit(1000000)
forget = lambda a,b:''
>>> time(lambda: dict(flattenDict(makeEvilDict(10000), join=forget)) )
took 0.10 seconds
>>> time(lambda: dict(flattenDict(makeEvilDict(100000), join=forget)) )
[1] 12569 segmentation fault python
... sigh, don't think that one is my fault...
[unimportant historical note due to moderation issues]
Regarding the alleged duplicate of Flatten a dictionary of dictionaries (2 levels deep) of lists
That question's solution can be implemented in terms of this one by doing sorted( sum(flatten(...),[]) ). The reverse is not possible: while it is true that the values of flatten(...) can be recovered from the alleged duplicate by mapping a higher-order accumulator, one cannot recover the keys. (edit: Also it turns out that the alleged duplicate owner's question is completely different, in that it only deals with dictionaries exactly 2-level deep, though one of the answers on that page gives a general solution.)
If you're using pandas there is a function hidden in pandas.io.json._normalize1 called nested_to_record which does this exactly.
from pandas.io.json._normalize import nested_to_record
flat = nested_to_record(my_dict, sep='_')
1 In pandas versions 0.24.x and older use pandas.io.json.normalize (without the _)
Here is a kind of a "functional", "one-liner" implementation. It is recursive, and based on a conditional expression and a dict comprehension.
def flatten_dict(dd, separator='_', prefix=''):
return { prefix + separator + k if prefix else k : v
for kk, vv in dd.items()
for k, v in flatten_dict(vv, separator, kk).items()
} if isinstance(dd, dict) else { prefix : dd }
Test:
In [2]: flatten_dict({'abc':123, 'hgf':{'gh':432, 'yu':433}, 'gfd':902, 'xzxzxz':{"432":{'0b0b0b':231}, "43234":1321}}, '.')
Out[2]:
{'abc': 123,
'gfd': 902,
'hgf.gh': 432,
'hgf.yu': 433,
'xzxzxz.432.0b0b0b': 231,
'xzxzxz.43234': 1321}
Not exactly what the OP asked, but lots of folks are coming here looking for ways to flatten real-world nested JSON data which can have nested key-value json objects and arrays and json objects inside the arrays and so on. JSON doesn't include tuples, so we don't have to fret over those.
I found an implementation of the list-inclusion comment by #roneo to the answer posted by #Imran :
https://github.com/ScriptSmith/socialreaper/blob/master/socialreaper/tools.py#L8
import collections
def flatten(dictionary, parent_key=False, separator='.'):
"""
Turn a nested dictionary into a flattened dictionary
:param dictionary: The dictionary to flatten
:param parent_key: The string to prepend to dictionary's keys
:param separator: The string used to separate flattened keys
:return: A flattened dictionary
"""
items = []
for key, value in dictionary.items():
new_key = str(parent_key) + separator + key if parent_key else key
if isinstance(value, collections.MutableMapping):
items.extend(flatten(value, new_key, separator).items())
elif isinstance(value, list):
for k, v in enumerate(value):
items.extend(flatten({str(k): v}, new_key).items())
else:
items.append((new_key, value))
return dict(items)
Test it:
flatten({'a': 1, 'c': {'a': 2, 'b': {'x': 5, 'y' : 10}}, 'd': [1, 2, 3] })
>> {'a': 1, 'c.a': 2, 'c.b.x': 5, 'c.b.y': 10, 'd.0': 1, 'd.1': 2, 'd.2': 3}
Annd that does the job I need done: I throw any complicated json at this and it flattens it out for me.
All credits to https://github.com/ScriptSmith .
Code:
test = {'a': 1, 'c': {'a': 2, 'b': {'x': 5, 'y' : 10}}, 'd': [1, 2, 3]}
def parse_dict(init, lkey=''):
ret = {}
for rkey,val in init.items():
key = lkey+rkey
if isinstance(val, dict):
ret.update(parse_dict(val, key+'_'))
else:
ret[key] = val
return ret
print(parse_dict(test,''))
Results:
$ python test.py
{'a': 1, 'c_a': 2, 'c_b_x': 5, 'd': [1, 2, 3], 'c_b_y': 10}
I am using python3.2, update for your version of python.
This is not restricted to dictionaries, but every mapping type that implements .items(). Further ist faster as it avoides an if condition. Nevertheless credits go to Imran:
def flatten(d, parent_key=''):
items = []
for k, v in d.items():
try:
items.extend(flatten(v, '%s%s_' % (parent_key, k)).items())
except AttributeError:
items.append(('%s%s' % (parent_key, k), v))
return dict(items)
How about a functional and performant solution in Python3.5?
from functools import reduce
def _reducer(items, key, val, pref):
if isinstance(val, dict):
return {**items, **flatten(val, pref + key)}
else:
return {**items, pref + key: val}
def flatten(d, pref=''):
return(reduce(
lambda new_d, kv: _reducer(new_d, *kv, pref),
d.items(),
{}
))
This is even more performant:
def flatten(d, pref=''):
return(reduce(
lambda new_d, kv: \
isinstance(kv[1], dict) and \
{**new_d, **flatten(kv[1], pref + kv[0])} or \
{**new_d, pref + kv[0]: kv[1]},
d.items(),
{}
))
In use:
my_obj = {'a': 1, 'c': {'a': 2, 'b': {'x': 5, 'y': 10}}, 'd': [1, 2, 3]}
print(flatten(my_obj))
# {'d': [1, 2, 3], 'cby': 10, 'cbx': 5, 'ca': 2, 'a': 1}
If you are a fan of pythonic oneliners:
my_dict={'a': 1,'c': {'a': 2,'b': {'x': 5,'y' : 10}},'d': [1, 2, 3]}
list(pd.json_normalize(my_dict).T.to_dict().values())[0]
returns:
{'a': 1, 'c.a': 2, 'c.b.x': 5, 'c.b.y': 10, 'd': [1, 2, 3]}
You can leave the [0] from the end, if you have a list of dictionaries and not just a single dictionary.
My Python 3.3 Solution using generators:
def flattenit(pyobj, keystring=''):
if type(pyobj) is dict:
if (type(pyobj) is dict):
keystring = keystring + "_" if keystring else keystring
for k in pyobj:
yield from flattenit(pyobj[k], keystring + k)
elif (type(pyobj) is list):
for lelm in pyobj:
yield from flatten(lelm, keystring)
else:
yield keystring, pyobj
my_obj = {'a': 1, 'c': {'a': 2, 'b': {'x': 5, 'y': 10}}, 'd': [1, 2, 3]}
#your flattened dictionary object
flattened={k:v for k,v in flattenit(my_obj)}
print(flattened)
# result: {'c_b_y': 10, 'd': [1, 2, 3], 'c_a': 2, 'a': 1, 'c_b_x': 5}
Utilizing recursion, keeping it simple and human readable:
def flatten_dict(dictionary, accumulator=None, parent_key=None, separator="."):
if accumulator is None:
accumulator = {}
for k, v in dictionary.items():
k = f"{parent_key}{separator}{k}" if parent_key else k
if isinstance(v, dict):
flatten_dict(dictionary=v, accumulator=accumulator, parent_key=k)
continue
accumulator[k] = v
return accumulator
Call is simple:
new_dict = flatten_dict(dictionary)
or
new_dict = flatten_dict(dictionary, separator="_")
if we want to change the default separator.
A little breakdown:
When the function is first called, it is called only passing the dictionary we want to flatten. The accumulator parameter is here to support recursion, which we see later. So, we instantiate accumulator to an empty dictionary where we will put all of the nested values from the original dictionary.
if accumulator is None:
accumulator = {}
As we iterate over the dictionary's values, we construct a key for every value. The parent_key argument will be None for the first call, while for every nested dictionary, it will contain the key pointing to it, so we prepend that key.
k = f"{parent_key}{separator}{k}" if parent_key else k
In case the value v the key k is pointing to is a dictionary, the function calls itself, passing the nested dictionary, the accumulator (which is passed by reference, so all changes done to it are done on the same instance) and the key k so that we can construct the concatenated key. Notice the continue statement. We want to skip the next line, outside of the if block, so that the nested dictionary doesn't end up in the accumulator under key k.
if isinstance(v, dict):
flatten_dict(dict=v, accumulator=accumulator, parent_key=k)
continue
So, what do we do in case the value v is not a dictionary? Just put it unchanged inside the accumulator.
accumulator[k] = v
Once we're done we just return the accumulator, leaving the original dictionary argument untouched.
NOTE
This will work only with dictionaries that have strings as keys. It will work with hashable objects implementing the __repr__ method, but will yield unwanted results.
Simple function to flatten nested dictionaries. For Python 3, replace .iteritems() with .items()
def flatten_dict(init_dict):
res_dict = {}
if type(init_dict) is not dict:
return res_dict
for k, v in init_dict.iteritems():
if type(v) == dict:
res_dict.update(flatten_dict(v))
else:
res_dict[k] = v
return res_dict
The idea/requirement was:
Get flat dictionaries with no keeping parent keys.
Example of usage:
dd = {'a': 3,
'b': {'c': 4, 'd': 5},
'e': {'f':
{'g': 1, 'h': 2}
},
'i': 9,
}
flatten_dict(dd)
>> {'a': 3, 'c': 4, 'd': 5, 'g': 1, 'h': 2, 'i': 9}
Keeping parent keys is simple as well.
I was thinking of a subclass of UserDict to automagically flat the keys.
class FlatDict(UserDict):
def __init__(self, *args, separator='.', **kwargs):
self.separator = separator
super().__init__(*args, **kwargs)
def __setitem__(self, key, value):
if isinstance(value, dict):
for k1, v1 in FlatDict(value, separator=self.separator).items():
super().__setitem__(f"{key}{self.separator}{k1}", v1)
else:
super().__setitem__(key, value)
‌
The advantages it that keys can be added on the fly, or using standard dict instanciation, without surprise:
‌
>>> fd = FlatDict(
... {
... 'person': {
... 'sexe': 'male',
... 'name': {
... 'first': 'jacques',
... 'last': 'dupond'
... }
... }
... }
... )
>>> fd
{'person.sexe': 'male', 'person.name.first': 'jacques', 'person.name.last': 'dupond'}
>>> fd['person'] = {'name': {'nickname': 'Bob'}}
>>> fd
{'person.sexe': 'male', 'person.name.first': 'jacques', 'person.name.last': 'dupond', 'person.name.nickname': 'Bob'}
>>> fd['person.name'] = {'civility': 'Dr'}
>>> fd
{'person.sexe': 'male', 'person.name.first': 'jacques', 'person.name.last': 'dupond', 'person.name.nickname': 'Bob', 'person.name.civility': 'Dr'}
This is similar to both imran's and ralu's answer. It does not use a generator, but instead employs recursion with a closure:
def flatten_dict(d, separator='_'):
final = {}
def _flatten_dict(obj, parent_keys=[]):
for k, v in obj.iteritems():
if isinstance(v, dict):
_flatten_dict(v, parent_keys + [k])
else:
key = separator.join(parent_keys + [k])
final[key] = v
_flatten_dict(d)
return final
>>> print flatten_dict({'a': 1, 'c': {'a': 2, 'b': {'x': 5, 'y' : 10}}, 'd': [1, 2, 3]})
{'a': 1, 'c_a': 2, 'c_b_x': 5, 'd': [1, 2, 3], 'c_b_y': 10}
The answers above work really well. Just thought I'd add the unflatten function that I wrote:
def unflatten(d):
ud = {}
for k, v in d.items():
context = ud
for sub_key in k.split('_')[:-1]:
if sub_key not in context:
context[sub_key] = {}
context = context[sub_key]
context[k.split('_')[-1]] = v
return ud
Note: This doesn't account for '_' already present in keys, much like the flatten counterparts.
Davoud's solution is very nice but doesn't give satisfactory results when the nested dict also contains lists of dicts, but his code be adapted for that case:
def flatten_dict(d):
items = []
for k, v in d.items():
try:
if (type(v)==type([])):
for l in v: items.extend(flatten_dict(l).items())
else:
items.extend(flatten_dict(v).items())
except AttributeError:
items.append((k, v))
return dict(items)
def flatten(unflattened_dict, separator='_'):
flattened_dict = {}
for k, v in unflattened_dict.items():
if isinstance(v, dict):
sub_flattened_dict = flatten(v, separator)
for k2, v2 in sub_flattened_dict.items():
flattened_dict[k + separator + k2] = v2
else:
flattened_dict[k] = v
return flattened_dict
I actually wrote a package called cherrypicker recently to deal with this exact sort of thing since I had to do it so often!
I think the following code would give you exactly what you're after:
from cherrypicker import CherryPicker
dct = {
'a': 1,
'c': {
'a': 2,
'b': {
'x': 5,
'y' : 10
}
},
'd': [1, 2, 3]
}
picker = CherryPicker(dct)
picker.flatten().get()
You can install the package with:
pip install cherrypicker
...and there's more docs and guidance at https://cherrypicker.readthedocs.io.
Other methods may be faster, but the priority of this package is to make such tasks easy. If you do have a large list of objects to flatten though, you can also tell CherryPicker to use parallel processing to speed things up.
here's a solution using a stack. No recursion.
def flatten_nested_dict(nested):
stack = list(nested.items())
ans = {}
while stack:
key, val = stack.pop()
if isinstance(val, dict):
for sub_key, sub_val in val.items():
stack.append((f"{key}_{sub_key}", sub_val))
else:
ans[key] = val
return ans
Using generators:
def flat_dic_helper(prepand,d):
if len(prepand) > 0:
prepand = prepand + "_"
for k in d:
i = d[k]
if isinstance(i, dict):
r = flat_dic_helper(prepand + k,i)
for j in r:
yield j
else:
yield (prepand + k,i)
def flat_dic(d):
return dict(flat_dic_helper("",d))
d = {'a': 1, 'c': {'a': 2, 'b': {'x': 5, 'y' : 10}}, 'd': [1, 2, 3]}
print(flat_dic(d))
>> {'a': 1, 'c_a': 2, 'c_b_x': 5, 'd': [1, 2, 3], 'c_b_y': 10}
Here's an algorithm for elegant, in-place replacement. Tested with Python 2.7 and Python 3.5. Using the dot character as a separator.
def flatten_json(json):
if type(json) == dict:
for k, v in list(json.items()):
if type(v) == dict:
flatten_json(v)
json.pop(k)
for k2, v2 in v.items():
json[k+"."+k2] = v2
Example:
d = {'a': {'b': 'c'}}
flatten_json(d)
print(d)
unflatten_json(d)
print(d)
Output:
{'a.b': 'c'}
{'a': {'b': 'c'}}
I published this code here along with the matching unflatten_json function.
If you want to flat nested dictionary and want all unique keys list then here is the solution:
def flat_dict_return_unique_key(data, unique_keys=set()):
if isinstance(data, dict):
[unique_keys.add(i) for i in data.keys()]
for each_v in data.values():
if isinstance(each_v, dict):
flat_dict_return_unique_key(each_v, unique_keys)
return list(set(unique_keys))
I always prefer access dict objects via .items(), so for flattening dicts I use the following recursive generator flat_items(d). If you like to have dict again, simply wrap it like this: flat = dict(flat_items(d))
def flat_items(d, key_separator='.'):
"""
Flattens the dictionary containing other dictionaries like here: https://stackoverflow.com/questions/6027558/flatten-nested-python-dictionaries-compressing-keys
>>> example = {'a': 1, 'c': {'a': 2, 'b': {'x': 5, 'y' : 10}}, 'd': [1, 2, 3]}
>>> flat = dict(flat_items(example, key_separator='_'))
>>> assert flat['c_b_y'] == 10
"""
for k, v in d.items():
if type(v) is dict:
for k1, v1 in flat_items(v, key_separator=key_separator):
yield key_separator.join((k, k1)), v1
else:
yield k, v
def flatten_nested_dict(_dict, _str=''):
'''
recursive function to flatten a nested dictionary json
'''
ret_dict = {}
for k, v in _dict.items():
if isinstance(v, dict):
ret_dict.update(flatten_nested_dict(v, _str = '_'.join([_str, k]).strip('_')))
elif isinstance(v, list):
for index, item in enumerate(v):
if isinstance(item, dict):
ret_dict.update(flatten_nested_dict(item, _str= '_'.join([_str, k, str(index)]).strip('_')))
else:
ret_dict['_'.join([_str, k, str(index)]).strip('_')] = item
else:
ret_dict['_'.join([_str, k]).strip('_')] = v
return ret_dict
Using dict.popitem() in straightforward nested-list-like recursion:
def flatten(d):
if d == {}:
return d
else:
k,v = d.popitem()
if (dict != type(v)):
return {k:v, **flatten(d)}
else:
flat_kv = flatten(v)
for k1 in list(flat_kv.keys()):
flat_kv[k + '_' + k1] = flat_kv[k1]
del flat_kv[k1]
return {**flat_kv, **flatten(d)}
If you do not mind recursive functions, here is a solution. I have also taken the liberty to include an exclusion-parameter in case there are one or more values you wish to maintain.
Code:
def flatten_dict(dictionary, exclude = [], delimiter ='_'):
flat_dict = dict()
for key, value in dictionary.items():
if isinstance(value, dict) and key not in exclude:
flatten_value_dict = flatten_dict(value, exclude, delimiter)
for k, v in flatten_value_dict.items():
flat_dict[f"{key}{delimiter}{k}"] = v
else:
flat_dict[key] = value
return flat_dict
Usage:
d = {'a':1, 'b':[1, 2], 'c':3, 'd':{'a':4, 'b':{'a':7, 'b':8}, 'c':6}, 'e':{'a':1,'b':2}}
flat_d = flatten_dict(dictionary=d, exclude=['e'], delimiter='.')
print(flat_d)
Output:
{'a': 1, 'b': [1, 2], 'c': 3, 'd.a': 4, 'd.b.a': 7, 'd.b.b': 8, 'd.c': 6, 'e': {'a': 1, 'b': 2}}
Variation of this Flatten nested dictionaries, compressing keys with max_level and custom reducer.
def flatten(d, max_level=None, reducer='tuple'):
if reducer == 'tuple':
reducer_seed = tuple()
reducer_func = lambda x, y: (*x, y)
else:
raise ValueError(f'Unknown reducer: {reducer}')
def impl(d, pref, level):
return reduce(
lambda new_d, kv:
(max_level is None or level < max_level)
and isinstance(kv[1], dict)
and {**new_d, **impl(kv[1], reducer_func(pref, kv[0]), level + 1)}
or {**new_d, reducer_func(pref, kv[0]): kv[1]},
d.items(),
{}
)
return impl(d, reducer_seed, 0)
I tried some of the solutions on this page - though not all - but those I tried failed to handle the nested list of dict.
Consider a dict like this:
d = {
'owner': {
'name': {'first_name': 'Steven', 'last_name': 'Smith'},
'lottery_nums': [1, 2, 3, 'four', '11', None],
'address': {},
'tuple': (1, 2, 'three'),
'tuple_with_dict': (1, 2, 'three', {'is_valid': False}),
'set': {1, 2, 3, 4, 'five'},
'children': [
{'name': {'first_name': 'Jessica',
'last_name': 'Smith', },
'children': []
},
{'name': {'first_name': 'George',
'last_name': 'Smith'},
'children': []
}
]
}
}
Here's my makeshift solution:
def flatten_dict(input_node: dict, key_: str = '', output_dict: dict = {}):
if isinstance(input_node, dict):
for key, val in input_node.items():
new_key = f"{key_}.{key}" if key_ else f"{key}"
flatten_dict(val, new_key, output_dict)
elif isinstance(input_node, list):
for idx, item in enumerate(input_node):
flatten_dict(item, f"{key_}.{idx}", output_dict)
else:
output_dict[key_] = input_node
return output_dict
which produces:
{
owner.name.first_name: Steven,
owner.name.last_name: Smith,
owner.lottery_nums.0: 1,
owner.lottery_nums.1: 2,
owner.lottery_nums.2: 3,
owner.lottery_nums.3: four,
owner.lottery_nums.4: 11,
owner.lottery_nums.5: None,
owner.tuple: (1, 2, 'three'),
owner.tuple_with_dict: (1, 2, 'three', {'is_valid': False}),
owner.set: {1, 2, 3, 4, 'five'},
owner.children.0.name.first_name: Jessica,
owner.children.0.name.last_name: Smith,
owner.children.1.name.first_name: George,
owner.children.1.name.last_name: Smith,
}
A makeshift solution and it's not perfect.
NOTE:
it doesn't keep empty dicts such as the address: {} k/v pair.
it won't flatten dicts in nested tuples - though it would be easy to add using the fact that python tuples act similar to lists.
You can use recursion in order to flatten your dictionary.
import collections
def flatten(
nested_dict,
seperator='.',
name=None,
):
flatten_dict = {}
if not nested_dict:
return flatten_dict
if isinstance(
nested_dict,
collections.abc.MutableMapping,
):
for key, value in nested_dict.items():
if name is not None:
flatten_dict.update(
flatten(
nested_dict=value,
seperator=seperator,
name=f'{name}{seperator}{key}',
),
)
else:
flatten_dict.update(
flatten(
nested_dict=value,
seperator=seperator,
name=key,
),
)
else:
flatten_dict[name] = nested_dict
return flatten_dict
if __name__ == '__main__':
nested_dict = {
1: 'a',
2: {
3: 'c',
4: {
5: 'e',
},
6: [1, 2, 3, 4, 5, ],
},
}
print(
flatten(
nested_dict=nested_dict,
),
)
Output:
{
"1":"a",
"2.3":"c",
"2.4.5":"e",
"2.6":[1, 2, 3, 4, 5]
}

remove element from dictionary in python

I am trying to remove the element that have minimum value in the dictionary to try to arrange the rest to the ascending order;
dict={}
for line in file:
line = line.split()
dict.update({line[0]:line[1]})
while dict.items():
min = float(100)
for x in dict:
print(dict[x])
g=float(dict[x])
if (g<min):
min=float(dict[x])
print(min)
for name, avg in dict.items():
if float(avg) == min:
print(name)
fileout.write(name+ '\t' + str(avg) + '\n')
del dict[name]
print(dict)
It would be easier to build a new dictionary from the previous one. By using sorted, dict.items() and passing key as lambda i: i[1] we can build a sorted dictionary. Finally we want to get all bar the first value and so we need to slice the result of sorted
d = {'a': 3, 'b': 1, 'c': 2}
new_d = dict(sorted(d.items(), key=lambda i: i[1])[1:])
print(new_d)
# {'c': 2, 'a': 3}
If you have to mutate the first object then we can just clear and rebuild it with dict.update.
d = {'a': 3, 'b': 1, 'c': 2}
new_d = sorted(d.items(), key=lambda i: i[1])[1:]
d.clear()
d.update(new_d)
print(d)
# {'c': 2, 'a': 3}

Need help finishing this code to flatten a dictionary [duplicate]

Suppose you have a dictionary like:
{'a': 1,
'c': {'a': 2,
'b': {'x': 5,
'y' : 10}},
'd': [1, 2, 3]}
How would you go about flattening that into something like:
{'a': 1,
'c_a': 2,
'c_b_x': 5,
'c_b_y': 10,
'd': [1, 2, 3]}
Basically the same way you would flatten a nested list, you just have to do the extra work for iterating the dict by key/value, creating new keys for your new dictionary and creating the dictionary at final step.
import collections
def flatten(d, parent_key='', sep='_'):
items = []
for k, v in d.items():
new_key = parent_key + sep + k if parent_key else k
if isinstance(v, collections.MutableMapping):
items.extend(flatten(v, new_key, sep=sep).items())
else:
items.append((new_key, v))
return dict(items)
>>> flatten({'a': 1, 'c': {'a': 2, 'b': {'x': 5, 'y' : 10}}, 'd': [1, 2, 3]})
{'a': 1, 'c_a': 2, 'c_b_x': 5, 'd': [1, 2, 3], 'c_b_y': 10}
For Python >= 3.3, change the import to from collections.abc import MutableMapping to avoid a deprecation warning and change collections.MutableMapping to just MutableMapping.
Or if you are already using pandas, You can do it with json_normalize() like so:
import pandas as pd
d = {'a': 1,
'c': {'a': 2, 'b': {'x': 5, 'y' : 10}},
'd': [1, 2, 3]}
df = pd.json_normalize(d, sep='_')
print(df.to_dict(orient='records')[0])
Output:
{'a': 1, 'c_a': 2, 'c_b_x': 5, 'c_b_y': 10, 'd': [1, 2, 3]}
There are two big considerations that the original poster needs to consider:
Are there keyspace clobbering issues? For example, {'a_b':{'c':1}, 'a':{'b_c':2}} would result in {'a_b_c':???}. The below solution evades the problem by returning an iterable of pairs.
If performance is an issue, does the key-reducer function (which I hereby refer to as 'join') require access to the entire key-path, or can it just do O(1) work at every node in the tree? If you want to be able to say joinedKey = '_'.join(*keys), that will cost you O(N^2) running time. However if you're willing to say nextKey = previousKey+'_'+thisKey, that gets you O(N) time. The solution below lets you do both (since you could merely concatenate all the keys, then postprocess them).
(Performance is not likely an issue, but I'll elaborate on the second point in case anyone else cares: In implementing this, there are numerous dangerous choices. If you do this recursively and yield and re-yield, or anything equivalent which touches nodes more than once (which is quite easy to accidentally do), you are doing potentially O(N^2) work rather than O(N). This is because maybe you are calculating a key a then a_1 then a_1_i..., and then calculating a then a_1 then a_1_ii..., but really you shouldn't have to calculate a_1 again. Even if you aren't recalculating it, re-yielding it (a 'level-by-level' approach) is just as bad. A good example is to think about the performance on {1:{1:{1:{1:...(N times)...{1:SOME_LARGE_DICTIONARY_OF_SIZE_N}...}}}})
Below is a function I wrote flattenDict(d, join=..., lift=...) which can be adapted to many purposes and can do what you want. Sadly it is fairly hard to make a lazy version of this function without incurring the above performance penalties (many python builtins like chain.from_iterable aren't actually efficient, which I only realized after extensive testing of three different versions of this code before settling on this one).
from collections import Mapping
from itertools import chain
from operator import add
_FLAG_FIRST = object()
def flattenDict(d, join=add, lift=lambda x:(x,)):
results = []
def visit(subdict, results, partialKey):
for k,v in subdict.items():
newKey = lift(k) if partialKey==_FLAG_FIRST else join(partialKey,lift(k))
if isinstance(v,Mapping):
visit(v, results, newKey)
else:
results.append((newKey,v))
visit(d, results, _FLAG_FIRST)
return results
To better understand what's going on, below is a diagram for those unfamiliar with reduce(left), otherwise known as "fold left". Sometimes it is drawn with an initial value in place of k0 (not part of the list, passed into the function). Here, J is our join function. We preprocess each kn with lift(k).
[k0,k1,...,kN].foldleft(J)
/ \
... kN
/
J(k0,J(k1,J(k2,k3)))
/ \
/ \
J(J(k0,k1),k2) k3
/ \
/ \
J(k0,k1) k2
/ \
/ \
k0 k1
This is in fact the same as functools.reduce, but where our function does this to all key-paths of the tree.
>>> reduce(lambda a,b:(a,b), range(5))
((((0, 1), 2), 3), 4)
Demonstration (which I'd otherwise put in docstring):
>>> testData = {
'a':1,
'b':2,
'c':{
'aa':11,
'bb':22,
'cc':{
'aaa':111
}
}
}
from pprint import pprint as pp
>>> pp(dict( flattenDict(testData) ))
{('a',): 1,
('b',): 2,
('c', 'aa'): 11,
('c', 'bb'): 22,
('c', 'cc', 'aaa'): 111}
>>> pp(dict( flattenDict(testData, join=lambda a,b:a+'_'+b, lift=lambda x:x) ))
{'a': 1, 'b': 2, 'c_aa': 11, 'c_bb': 22, 'c_cc_aaa': 111}
>>> pp(dict( (v,k) for k,v in flattenDict(testData, lift=hash, join=lambda a,b:hash((a,b))) ))
{1: 12416037344,
2: 12544037731,
11: 5470935132935744593,
22: 4885734186131977315,
111: 3461911260025554326}
Performance:
from functools import reduce
def makeEvilDict(n):
return reduce(lambda acc,x:{x:acc}, [{i:0 for i in range(n)}]+range(n))
import timeit
def time(runnable):
t0 = timeit.default_timer()
_ = runnable()
t1 = timeit.default_timer()
print('took {:.2f} seconds'.format(t1-t0))
>>> pp(makeEvilDict(8))
{7: {6: {5: {4: {3: {2: {1: {0: {0: 0,
1: 0,
2: 0,
3: 0,
4: 0,
5: 0,
6: 0,
7: 0}}}}}}}}}
import sys
sys.setrecursionlimit(1000000)
forget = lambda a,b:''
>>> time(lambda: dict(flattenDict(makeEvilDict(10000), join=forget)) )
took 0.10 seconds
>>> time(lambda: dict(flattenDict(makeEvilDict(100000), join=forget)) )
[1] 12569 segmentation fault python
... sigh, don't think that one is my fault...
[unimportant historical note due to moderation issues]
Regarding the alleged duplicate of Flatten a dictionary of dictionaries (2 levels deep) of lists
That question's solution can be implemented in terms of this one by doing sorted( sum(flatten(...),[]) ). The reverse is not possible: while it is true that the values of flatten(...) can be recovered from the alleged duplicate by mapping a higher-order accumulator, one cannot recover the keys. (edit: Also it turns out that the alleged duplicate owner's question is completely different, in that it only deals with dictionaries exactly 2-level deep, though one of the answers on that page gives a general solution.)
If you're using pandas there is a function hidden in pandas.io.json._normalize1 called nested_to_record which does this exactly.
from pandas.io.json._normalize import nested_to_record
flat = nested_to_record(my_dict, sep='_')
1 In pandas versions 0.24.x and older use pandas.io.json.normalize (without the _)
Here is a kind of a "functional", "one-liner" implementation. It is recursive, and based on a conditional expression and a dict comprehension.
def flatten_dict(dd, separator='_', prefix=''):
return { prefix + separator + k if prefix else k : v
for kk, vv in dd.items()
for k, v in flatten_dict(vv, separator, kk).items()
} if isinstance(dd, dict) else { prefix : dd }
Test:
In [2]: flatten_dict({'abc':123, 'hgf':{'gh':432, 'yu':433}, 'gfd':902, 'xzxzxz':{"432":{'0b0b0b':231}, "43234":1321}}, '.')
Out[2]:
{'abc': 123,
'gfd': 902,
'hgf.gh': 432,
'hgf.yu': 433,
'xzxzxz.432.0b0b0b': 231,
'xzxzxz.43234': 1321}
Not exactly what the OP asked, but lots of folks are coming here looking for ways to flatten real-world nested JSON data which can have nested key-value json objects and arrays and json objects inside the arrays and so on. JSON doesn't include tuples, so we don't have to fret over those.
I found an implementation of the list-inclusion comment by #roneo to the answer posted by #Imran :
https://github.com/ScriptSmith/socialreaper/blob/master/socialreaper/tools.py#L8
import collections
def flatten(dictionary, parent_key=False, separator='.'):
"""
Turn a nested dictionary into a flattened dictionary
:param dictionary: The dictionary to flatten
:param parent_key: The string to prepend to dictionary's keys
:param separator: The string used to separate flattened keys
:return: A flattened dictionary
"""
items = []
for key, value in dictionary.items():
new_key = str(parent_key) + separator + key if parent_key else key
if isinstance(value, collections.MutableMapping):
items.extend(flatten(value, new_key, separator).items())
elif isinstance(value, list):
for k, v in enumerate(value):
items.extend(flatten({str(k): v}, new_key).items())
else:
items.append((new_key, value))
return dict(items)
Test it:
flatten({'a': 1, 'c': {'a': 2, 'b': {'x': 5, 'y' : 10}}, 'd': [1, 2, 3] })
>> {'a': 1, 'c.a': 2, 'c.b.x': 5, 'c.b.y': 10, 'd.0': 1, 'd.1': 2, 'd.2': 3}
Annd that does the job I need done: I throw any complicated json at this and it flattens it out for me.
All credits to https://github.com/ScriptSmith .
Code:
test = {'a': 1, 'c': {'a': 2, 'b': {'x': 5, 'y' : 10}}, 'd': [1, 2, 3]}
def parse_dict(init, lkey=''):
ret = {}
for rkey,val in init.items():
key = lkey+rkey
if isinstance(val, dict):
ret.update(parse_dict(val, key+'_'))
else:
ret[key] = val
return ret
print(parse_dict(test,''))
Results:
$ python test.py
{'a': 1, 'c_a': 2, 'c_b_x': 5, 'd': [1, 2, 3], 'c_b_y': 10}
I am using python3.2, update for your version of python.
This is not restricted to dictionaries, but every mapping type that implements .items(). Further ist faster as it avoides an if condition. Nevertheless credits go to Imran:
def flatten(d, parent_key=''):
items = []
for k, v in d.items():
try:
items.extend(flatten(v, '%s%s_' % (parent_key, k)).items())
except AttributeError:
items.append(('%s%s' % (parent_key, k), v))
return dict(items)
How about a functional and performant solution in Python3.5?
from functools import reduce
def _reducer(items, key, val, pref):
if isinstance(val, dict):
return {**items, **flatten(val, pref + key)}
else:
return {**items, pref + key: val}
def flatten(d, pref=''):
return(reduce(
lambda new_d, kv: _reducer(new_d, *kv, pref),
d.items(),
{}
))
This is even more performant:
def flatten(d, pref=''):
return(reduce(
lambda new_d, kv: \
isinstance(kv[1], dict) and \
{**new_d, **flatten(kv[1], pref + kv[0])} or \
{**new_d, pref + kv[0]: kv[1]},
d.items(),
{}
))
In use:
my_obj = {'a': 1, 'c': {'a': 2, 'b': {'x': 5, 'y': 10}}, 'd': [1, 2, 3]}
print(flatten(my_obj))
# {'d': [1, 2, 3], 'cby': 10, 'cbx': 5, 'ca': 2, 'a': 1}
If you are a fan of pythonic oneliners:
my_dict={'a': 1,'c': {'a': 2,'b': {'x': 5,'y' : 10}},'d': [1, 2, 3]}
list(pd.json_normalize(my_dict).T.to_dict().values())[0]
returns:
{'a': 1, 'c.a': 2, 'c.b.x': 5, 'c.b.y': 10, 'd': [1, 2, 3]}
You can leave the [0] from the end, if you have a list of dictionaries and not just a single dictionary.
My Python 3.3 Solution using generators:
def flattenit(pyobj, keystring=''):
if type(pyobj) is dict:
if (type(pyobj) is dict):
keystring = keystring + "_" if keystring else keystring
for k in pyobj:
yield from flattenit(pyobj[k], keystring + k)
elif (type(pyobj) is list):
for lelm in pyobj:
yield from flatten(lelm, keystring)
else:
yield keystring, pyobj
my_obj = {'a': 1, 'c': {'a': 2, 'b': {'x': 5, 'y': 10}}, 'd': [1, 2, 3]}
#your flattened dictionary object
flattened={k:v for k,v in flattenit(my_obj)}
print(flattened)
# result: {'c_b_y': 10, 'd': [1, 2, 3], 'c_a': 2, 'a': 1, 'c_b_x': 5}
Utilizing recursion, keeping it simple and human readable:
def flatten_dict(dictionary, accumulator=None, parent_key=None, separator="."):
if accumulator is None:
accumulator = {}
for k, v in dictionary.items():
k = f"{parent_key}{separator}{k}" if parent_key else k
if isinstance(v, dict):
flatten_dict(dictionary=v, accumulator=accumulator, parent_key=k)
continue
accumulator[k] = v
return accumulator
Call is simple:
new_dict = flatten_dict(dictionary)
or
new_dict = flatten_dict(dictionary, separator="_")
if we want to change the default separator.
A little breakdown:
When the function is first called, it is called only passing the dictionary we want to flatten. The accumulator parameter is here to support recursion, which we see later. So, we instantiate accumulator to an empty dictionary where we will put all of the nested values from the original dictionary.
if accumulator is None:
accumulator = {}
As we iterate over the dictionary's values, we construct a key for every value. The parent_key argument will be None for the first call, while for every nested dictionary, it will contain the key pointing to it, so we prepend that key.
k = f"{parent_key}{separator}{k}" if parent_key else k
In case the value v the key k is pointing to is a dictionary, the function calls itself, passing the nested dictionary, the accumulator (which is passed by reference, so all changes done to it are done on the same instance) and the key k so that we can construct the concatenated key. Notice the continue statement. We want to skip the next line, outside of the if block, so that the nested dictionary doesn't end up in the accumulator under key k.
if isinstance(v, dict):
flatten_dict(dict=v, accumulator=accumulator, parent_key=k)
continue
So, what do we do in case the value v is not a dictionary? Just put it unchanged inside the accumulator.
accumulator[k] = v
Once we're done we just return the accumulator, leaving the original dictionary argument untouched.
NOTE
This will work only with dictionaries that have strings as keys. It will work with hashable objects implementing the __repr__ method, but will yield unwanted results.
Simple function to flatten nested dictionaries. For Python 3, replace .iteritems() with .items()
def flatten_dict(init_dict):
res_dict = {}
if type(init_dict) is not dict:
return res_dict
for k, v in init_dict.iteritems():
if type(v) == dict:
res_dict.update(flatten_dict(v))
else:
res_dict[k] = v
return res_dict
The idea/requirement was:
Get flat dictionaries with no keeping parent keys.
Example of usage:
dd = {'a': 3,
'b': {'c': 4, 'd': 5},
'e': {'f':
{'g': 1, 'h': 2}
},
'i': 9,
}
flatten_dict(dd)
>> {'a': 3, 'c': 4, 'd': 5, 'g': 1, 'h': 2, 'i': 9}
Keeping parent keys is simple as well.
I was thinking of a subclass of UserDict to automagically flat the keys.
class FlatDict(UserDict):
def __init__(self, *args, separator='.', **kwargs):
self.separator = separator
super().__init__(*args, **kwargs)
def __setitem__(self, key, value):
if isinstance(value, dict):
for k1, v1 in FlatDict(value, separator=self.separator).items():
super().__setitem__(f"{key}{self.separator}{k1}", v1)
else:
super().__setitem__(key, value)
‌
The advantages it that keys can be added on the fly, or using standard dict instanciation, without surprise:
‌
>>> fd = FlatDict(
... {
... 'person': {
... 'sexe': 'male',
... 'name': {
... 'first': 'jacques',
... 'last': 'dupond'
... }
... }
... }
... )
>>> fd
{'person.sexe': 'male', 'person.name.first': 'jacques', 'person.name.last': 'dupond'}
>>> fd['person'] = {'name': {'nickname': 'Bob'}}
>>> fd
{'person.sexe': 'male', 'person.name.first': 'jacques', 'person.name.last': 'dupond', 'person.name.nickname': 'Bob'}
>>> fd['person.name'] = {'civility': 'Dr'}
>>> fd
{'person.sexe': 'male', 'person.name.first': 'jacques', 'person.name.last': 'dupond', 'person.name.nickname': 'Bob', 'person.name.civility': 'Dr'}
This is similar to both imran's and ralu's answer. It does not use a generator, but instead employs recursion with a closure:
def flatten_dict(d, separator='_'):
final = {}
def _flatten_dict(obj, parent_keys=[]):
for k, v in obj.iteritems():
if isinstance(v, dict):
_flatten_dict(v, parent_keys + [k])
else:
key = separator.join(parent_keys + [k])
final[key] = v
_flatten_dict(d)
return final
>>> print flatten_dict({'a': 1, 'c': {'a': 2, 'b': {'x': 5, 'y' : 10}}, 'd': [1, 2, 3]})
{'a': 1, 'c_a': 2, 'c_b_x': 5, 'd': [1, 2, 3], 'c_b_y': 10}
The answers above work really well. Just thought I'd add the unflatten function that I wrote:
def unflatten(d):
ud = {}
for k, v in d.items():
context = ud
for sub_key in k.split('_')[:-1]:
if sub_key not in context:
context[sub_key] = {}
context = context[sub_key]
context[k.split('_')[-1]] = v
return ud
Note: This doesn't account for '_' already present in keys, much like the flatten counterparts.
Davoud's solution is very nice but doesn't give satisfactory results when the nested dict also contains lists of dicts, but his code be adapted for that case:
def flatten_dict(d):
items = []
for k, v in d.items():
try:
if (type(v)==type([])):
for l in v: items.extend(flatten_dict(l).items())
else:
items.extend(flatten_dict(v).items())
except AttributeError:
items.append((k, v))
return dict(items)
def flatten(unflattened_dict, separator='_'):
flattened_dict = {}
for k, v in unflattened_dict.items():
if isinstance(v, dict):
sub_flattened_dict = flatten(v, separator)
for k2, v2 in sub_flattened_dict.items():
flattened_dict[k + separator + k2] = v2
else:
flattened_dict[k] = v
return flattened_dict
I actually wrote a package called cherrypicker recently to deal with this exact sort of thing since I had to do it so often!
I think the following code would give you exactly what you're after:
from cherrypicker import CherryPicker
dct = {
'a': 1,
'c': {
'a': 2,
'b': {
'x': 5,
'y' : 10
}
},
'd': [1, 2, 3]
}
picker = CherryPicker(dct)
picker.flatten().get()
You can install the package with:
pip install cherrypicker
...and there's more docs and guidance at https://cherrypicker.readthedocs.io.
Other methods may be faster, but the priority of this package is to make such tasks easy. If you do have a large list of objects to flatten though, you can also tell CherryPicker to use parallel processing to speed things up.
here's a solution using a stack. No recursion.
def flatten_nested_dict(nested):
stack = list(nested.items())
ans = {}
while stack:
key, val = stack.pop()
if isinstance(val, dict):
for sub_key, sub_val in val.items():
stack.append((f"{key}_{sub_key}", sub_val))
else:
ans[key] = val
return ans
Using generators:
def flat_dic_helper(prepand,d):
if len(prepand) > 0:
prepand = prepand + "_"
for k in d:
i = d[k]
if isinstance(i, dict):
r = flat_dic_helper(prepand + k,i)
for j in r:
yield j
else:
yield (prepand + k,i)
def flat_dic(d):
return dict(flat_dic_helper("",d))
d = {'a': 1, 'c': {'a': 2, 'b': {'x': 5, 'y' : 10}}, 'd': [1, 2, 3]}
print(flat_dic(d))
>> {'a': 1, 'c_a': 2, 'c_b_x': 5, 'd': [1, 2, 3], 'c_b_y': 10}
Here's an algorithm for elegant, in-place replacement. Tested with Python 2.7 and Python 3.5. Using the dot character as a separator.
def flatten_json(json):
if type(json) == dict:
for k, v in list(json.items()):
if type(v) == dict:
flatten_json(v)
json.pop(k)
for k2, v2 in v.items():
json[k+"."+k2] = v2
Example:
d = {'a': {'b': 'c'}}
flatten_json(d)
print(d)
unflatten_json(d)
print(d)
Output:
{'a.b': 'c'}
{'a': {'b': 'c'}}
I published this code here along with the matching unflatten_json function.
If you want to flat nested dictionary and want all unique keys list then here is the solution:
def flat_dict_return_unique_key(data, unique_keys=set()):
if isinstance(data, dict):
[unique_keys.add(i) for i in data.keys()]
for each_v in data.values():
if isinstance(each_v, dict):
flat_dict_return_unique_key(each_v, unique_keys)
return list(set(unique_keys))
I always prefer access dict objects via .items(), so for flattening dicts I use the following recursive generator flat_items(d). If you like to have dict again, simply wrap it like this: flat = dict(flat_items(d))
def flat_items(d, key_separator='.'):
"""
Flattens the dictionary containing other dictionaries like here: https://stackoverflow.com/questions/6027558/flatten-nested-python-dictionaries-compressing-keys
>>> example = {'a': 1, 'c': {'a': 2, 'b': {'x': 5, 'y' : 10}}, 'd': [1, 2, 3]}
>>> flat = dict(flat_items(example, key_separator='_'))
>>> assert flat['c_b_y'] == 10
"""
for k, v in d.items():
if type(v) is dict:
for k1, v1 in flat_items(v, key_separator=key_separator):
yield key_separator.join((k, k1)), v1
else:
yield k, v
def flatten_nested_dict(_dict, _str=''):
'''
recursive function to flatten a nested dictionary json
'''
ret_dict = {}
for k, v in _dict.items():
if isinstance(v, dict):
ret_dict.update(flatten_nested_dict(v, _str = '_'.join([_str, k]).strip('_')))
elif isinstance(v, list):
for index, item in enumerate(v):
if isinstance(item, dict):
ret_dict.update(flatten_nested_dict(item, _str= '_'.join([_str, k, str(index)]).strip('_')))
else:
ret_dict['_'.join([_str, k, str(index)]).strip('_')] = item
else:
ret_dict['_'.join([_str, k]).strip('_')] = v
return ret_dict
Using dict.popitem() in straightforward nested-list-like recursion:
def flatten(d):
if d == {}:
return d
else:
k,v = d.popitem()
if (dict != type(v)):
return {k:v, **flatten(d)}
else:
flat_kv = flatten(v)
for k1 in list(flat_kv.keys()):
flat_kv[k + '_' + k1] = flat_kv[k1]
del flat_kv[k1]
return {**flat_kv, **flatten(d)}
If you do not mind recursive functions, here is a solution. I have also taken the liberty to include an exclusion-parameter in case there are one or more values you wish to maintain.
Code:
def flatten_dict(dictionary, exclude = [], delimiter ='_'):
flat_dict = dict()
for key, value in dictionary.items():
if isinstance(value, dict) and key not in exclude:
flatten_value_dict = flatten_dict(value, exclude, delimiter)
for k, v in flatten_value_dict.items():
flat_dict[f"{key}{delimiter}{k}"] = v
else:
flat_dict[key] = value
return flat_dict
Usage:
d = {'a':1, 'b':[1, 2], 'c':3, 'd':{'a':4, 'b':{'a':7, 'b':8}, 'c':6}, 'e':{'a':1,'b':2}}
flat_d = flatten_dict(dictionary=d, exclude=['e'], delimiter='.')
print(flat_d)
Output:
{'a': 1, 'b': [1, 2], 'c': 3, 'd.a': 4, 'd.b.a': 7, 'd.b.b': 8, 'd.c': 6, 'e': {'a': 1, 'b': 2}}
Variation of this Flatten nested dictionaries, compressing keys with max_level and custom reducer.
def flatten(d, max_level=None, reducer='tuple'):
if reducer == 'tuple':
reducer_seed = tuple()
reducer_func = lambda x, y: (*x, y)
else:
raise ValueError(f'Unknown reducer: {reducer}')
def impl(d, pref, level):
return reduce(
lambda new_d, kv:
(max_level is None or level < max_level)
and isinstance(kv[1], dict)
and {**new_d, **impl(kv[1], reducer_func(pref, kv[0]), level + 1)}
or {**new_d, reducer_func(pref, kv[0]): kv[1]},
d.items(),
{}
)
return impl(d, reducer_seed, 0)
I tried some of the solutions on this page - though not all - but those I tried failed to handle the nested list of dict.
Consider a dict like this:
d = {
'owner': {
'name': {'first_name': 'Steven', 'last_name': 'Smith'},
'lottery_nums': [1, 2, 3, 'four', '11', None],
'address': {},
'tuple': (1, 2, 'three'),
'tuple_with_dict': (1, 2, 'three', {'is_valid': False}),
'set': {1, 2, 3, 4, 'five'},
'children': [
{'name': {'first_name': 'Jessica',
'last_name': 'Smith', },
'children': []
},
{'name': {'first_name': 'George',
'last_name': 'Smith'},
'children': []
}
]
}
}
Here's my makeshift solution:
def flatten_dict(input_node: dict, key_: str = '', output_dict: dict = {}):
if isinstance(input_node, dict):
for key, val in input_node.items():
new_key = f"{key_}.{key}" if key_ else f"{key}"
flatten_dict(val, new_key, output_dict)
elif isinstance(input_node, list):
for idx, item in enumerate(input_node):
flatten_dict(item, f"{key_}.{idx}", output_dict)
else:
output_dict[key_] = input_node
return output_dict
which produces:
{
owner.name.first_name: Steven,
owner.name.last_name: Smith,
owner.lottery_nums.0: 1,
owner.lottery_nums.1: 2,
owner.lottery_nums.2: 3,
owner.lottery_nums.3: four,
owner.lottery_nums.4: 11,
owner.lottery_nums.5: None,
owner.tuple: (1, 2, 'three'),
owner.tuple_with_dict: (1, 2, 'three', {'is_valid': False}),
owner.set: {1, 2, 3, 4, 'five'},
owner.children.0.name.first_name: Jessica,
owner.children.0.name.last_name: Smith,
owner.children.1.name.first_name: George,
owner.children.1.name.last_name: Smith,
}
A makeshift solution and it's not perfect.
NOTE:
it doesn't keep empty dicts such as the address: {} k/v pair.
it won't flatten dicts in nested tuples - though it would be easy to add using the fact that python tuples act similar to lists.
You can use recursion in order to flatten your dictionary.
import collections
def flatten(
nested_dict,
seperator='.',
name=None,
):
flatten_dict = {}
if not nested_dict:
return flatten_dict
if isinstance(
nested_dict,
collections.abc.MutableMapping,
):
for key, value in nested_dict.items():
if name is not None:
flatten_dict.update(
flatten(
nested_dict=value,
seperator=seperator,
name=f'{name}{seperator}{key}',
),
)
else:
flatten_dict.update(
flatten(
nested_dict=value,
seperator=seperator,
name=key,
),
)
else:
flatten_dict[name] = nested_dict
return flatten_dict
if __name__ == '__main__':
nested_dict = {
1: 'a',
2: {
3: 'c',
4: {
5: 'e',
},
6: [1, 2, 3, 4, 5, ],
},
}
print(
flatten(
nested_dict=nested_dict,
),
)
Output:
{
"1":"a",
"2.3":"c",
"2.4.5":"e",
"2.6":[1, 2, 3, 4, 5]
}

How can I find dict keys for matching values in two dicts?

I have two dictionaries mapping IDs to values. For simplicity, lets say those are the dictionaries:
d_source = {'a': 1, 'b': 2, 'c': 3, '3': 3}
d_target = {'A': 1, 'B': 2, 'C': 3, '1': 1}
As named, the dictionaries are not symmetrical.
I would like to get a dictionary of keys from dictionaries d_source and d_target whose values match. The resulting dictionary would have d_source keys as its own keys, and d_target keys as that keys value (in either a list, tuple or set format).
This would be The expected returned value for the above example should be the following list:
{'a': ('1', 'A'),
'b': ('B',),
'c': ('C',),
'3': ('C',)}
There are two somewhat similar questions, but those solutions can't be easily applied to my question.
Some characteristics of the data:
Source would usually be smaller than target. Having roughly few thousand sources (tops) and a magnitude more targets.
Duplicates in the same dict (both d_source and d_target) are not too likely on values.
matches are expected to be found for (a rough estimate) not more than 50% than d_source items.
All keys are integers.
What is the best (performance wise) solution to this problem?
Modeling data into other datatypes for improved performance is totally ok, even when using third party libraries (i'm thinking numpy)
All answers have O(n^2) efficiency which isn't very good so I thought of answering myself.
I use 2(source_len) + 2(dict_count)(dict_len) memory and I have O(2n) efficiency which is the best you can get here I believe.
Here you go:
from collections import defaultdict
d_source = {'a': 1, 'b': 2, 'c': 3, '3': 3}
d_target = {'A': 1, 'B': 2, 'C': 3, '1': 1}
def merge_dicts(source_dict, *rest):
flipped_rest = defaultdict(list)
for d in rest:
while d:
k, v = d.popitem()
flipped_rest[v].append(k)
return {k: tuple(flipped_rest.get(v, ())) for k, v in source_dict.items()}
new_dict = merge_dicts(d_source, d_target)
By the way, I'm using a tuple in order not to link the resulting lists together.
As you've added specifications for the data, here's a closer matching solution:
d_source = {'a': 1, 'b': 2, 'c': 3, '3': 3}
d_target = {'A': 1, 'B': 2, 'C': 3, '1': 1}
def second_merge_dicts(source_dict, *rest):
"""Optimized for ~50% source match due to if statement addition.
Also uses less memory.
"""
unique_values = set(source_dict.values())
flipped_rest = defaultdict(list)
for d in rest:
while d:
k, v = d.popitem()
if v in unique_values:
flipped_rest[v].append(k)
return {k: tuple(flipped_rest.get(v, ())) for k, v in source_dict.items()}
new_dict = second_merge_dicts(d_source, d_target)
from collections import defaultdict
from pprint import pprint
d_source = {'a': 1, 'b': 2, 'c': 3, '3': 3}
d_target = {'A': 1, 'B': 2, 'C': 3, '1': 1}
d_result = defaultdict(list)
{d_result[a].append(b) for a in d_source for b in d_target if d_source[a] == d_target[b]}
pprint(d_result)
Output:
{'3': ['C'],
'a': ['A', '1'],
'b': ['B'],
'c': ['C']}
Timing results:
from collections import defaultdict
from copy import deepcopy
from random import randint
from timeit import timeit
def Craig_match(source, target):
result = defaultdict(list)
{result[a].append(b) for a in source for b in target if source[a] == target[b]}
return result
def Bharel_match(source_dict, *rest):
flipped_rest = defaultdict(list)
for d in rest:
while d:
k, v = d.popitem()
flipped_rest[v].append(k)
return {k: tuple(flipped_rest.get(v, ())) for k, v in source_dict.items()}
def modified_Bharel_match(source_dict, *rest):
"""Optimized for ~50% source match due to if statement addition.
Also uses less memory.
"""
unique_values = set(source_dict.values())
flipped_rest = defaultdict(list)
for d in rest:
while d:
k, v = d.popitem()
if v in unique_values:
flipped_rest[v].append(k)
return {k: tuple(flipped_rest.get(v, ())) for k, v in source_dict.items()}
# generate source, target such that:
# a) ~10% duplicate values in source and target
# b) 2000 unique source keys, 20000 unique target keys
# c) a little less than 50% matches source value to target value
# d) numeric keys and values
source = {}
for k in range(2000):
source[k] = randint(0, 1800)
target = {}
for k in range(20000):
if k < 1000:
target[k] = randint(0, 2000)
else:
target[k] = randint(2000, 19000)
best_time = {}
approaches = ('Craig', 'Bharel', 'modified_Bharel')
for a in approaches:
best_time[a] = None
for _ in range(3):
for approach in approaches:
test_source = deepcopy(source)
test_target = deepcopy(target)
statement = 'd=' + approach + '_match(test_source,test_target)'
setup = 'from __main__ import test_source, test_target, ' + approach + '_match'
t = timeit(stmt=statement, setup=setup, number=1)
if not best_time[approach] or (t < best_time[approach]):
best_time[approach] = t
for approach in approaches:
print(approach, ':', '%0.5f' % best_time[approach])
Output:
Craig : 7.29259
Bharel : 0.01587
modified_Bharel : 0.00682
Here is another solution. There are a lot of ways to do this
for key1 in d1:
for key2 in d2:
if d1[key1] == d2[key2]:
stuff
Note that you can use any name for key1 and key2.
This maybe "cheating" in some regards, although if you are looking for the matching values of the keys regardless of the case sensitivity then you might be able to do:
import sets
aa = {'a': 1, 'b': 2, 'c':3}
bb = {'A': 1, 'B': 2, 'd': 3}
bbl = {k.lower():v for k,v in bb.items()}
result = {k:k.upper() for k,v in aa.iteritems() & bbl.viewitems()}
print( result )
Output:
{'a': 'A', 'b': 'B'}
The bbl declaration changes the bb keys into lowercase (it could be either aa, or bb).
* I only tested this on my phone, so just throwing this idea out there I suppose... Also, you've changed your question radically since I began composing my answer, so you get what you get.
It is up to you to determine the best solution. Here is a solution:
def dicts_to_tuples(*dicts):
result = {}
for d in dicts:
for k,v in d.items():
result.setdefault(v, []).append(k)
return [tuple(v) for v in result.values() if len(v) > 1]
d1 = {'a': 1, 'b': 2, 'c':3}
d2 = {'A': 1, 'B': 2}
print dicts_to_tuples(d1, d2)

Categories

Resources