python construct nested dict dynamically - python

I want to create a nested dict in python dynamically. By saying that:
given
tuple1 = ('A','B'),
tuple2 = ('A','C'),
dict = {}
I'd like to have dict like dict = {'A': {'B':1}} after adding tuple1 to dict;
then dict = {'A': {'B' : 1, 'C' : 1}} after adding tuple2 to dict
That's what I have tried, i find the following code to create nested dict recursively. But I'm not sure how to add node dynamically and also increment its value by 1.
def incr_dict(dct, tpl):
if len(tpl) == 0:
dct = dct
else:
dct = {tpl[-1]:dct}
return incr_dict(dct, tpl[0:-1])
return dct
dct = {}
tpl = ('a', 'b', 'c')
dct = incr_dict(dct, tpl)
print(dct)

At the end of the below code, you will have a dict d which is {'A': {'B': 1, 'C': 1}}; note that the outermost loop isn't strictly necessary, but it saved me some typing in this instance.
tuple1 = ('A','B')
tuple2 = ('A','C')
d = {}
for l in [list(tuple1), list(tuple2)]:
for k in l:
v = l.pop()
if (d.has_key(k)):
if (d[k].has_key(v)):
d[k][v] = d[k][v]+1
else:
d[k][v] = 1
else:
d[k] = {}
d[k][v] = 1

Related

How to split a dictionary based on similarity of values into two separate dictionaries

I have a dict:
dictionary = {'1': 'a',
'2': 'a',
'3': 'b',
'4': 'b'}
and I want to split it such that one dict contains all items with value == 'a' and the other one all items with value == 'b':
dict_a = {'1': 'a',
'2': 'a'}
dict_b = {'3': 'b',
'4': 'b'}
How can I do this in an short way?
My approach
dict_a = {}
dict_b = {}
for key, value in dictionary.items():
if value == 'a':
dict_a = dict(key, value)
else:
dict_b = dict(key, value)
does not work.
Try this instead:
dict_a={}
dict_b={}
for key, value in dictionary.items():
if value == 'a':
dict_a[key] = value
else:
dict_b[key] = value
In your original code, you were recreating dict_a or dict_b every time you went through the loop.
If you're not absolutely sure that your starting dict contains ONLY "a" or "b" values, use this instead:
dict_a={}
dict_b={}
for key, value in dictionary.items():
if value == 'a':
dict_a[key] = value
elif value == 'b':
dict_b[key] = value
else:
pass
You could also use filter() if you want a shorter solution:
dict_a = dict(filter(lambda kv: kv[1] == 'a', dictionary.items()))
dict_b = dict(filter(lambda kv: kv[1] == 'b', dictionary.items()))
Note that this could be slower than a manual for loop if you have to do it more than once.
In any case, if you already know that every single value in dict_a is going to be "a" and every single value in dict_b is going to be "b" this seems like a waste of space, you don't need dictionaries at all:
keys_a = []
keys_b = []
for key, value in dictionary.items():
if key == 'a':
keys_a.append(key)
else:
keys_b.append(key)
Or with filter():
keys_a = list(filter(lambda k: dictionary[k] == 'a', dictionary.keys()))
keys_b = list(filter(lambda k: dictionary[k] == 'b', dictionary.keys()))
If values in your initial dict are not in your control, or not limited to a and b, you can try following:
def combine_similar_values(input_dict):
output = {}
distinct_values = set()
for k, v in input_dict.items():
v_str = str(v)
if v_str in distinct_values:
output[v_str][k] = v
else:
distinct_values.add(v_str)
output[v_str] = {k: v}
return output.values()
combine_similar_values will return a list of dictionaries where each dict contains same values.

Unflatten nested Python dictionary

What would be the cleanest way to convert this
{"a.b.c[0].key1": 1, "a.b.c[1].key2": 2, "a.b.c[3].key3": 3}
Into this
{"a": {"b": {"c": [{"key1": 1}, {"key2": 2}, None, {"key3": 3}]}}}
the dictionary keys may be anything.
the length of the list may vary.
the depth of the dictionary may vary.
if there are missing values in the list the value must be None.
if values are repeated the last one declared is the one that counts.
I came up with the following working example.
Was wondering if we could find a better solution for our community.
def unflatten(data):
if type(data) != dict:
return None
regex = r'\.?([^.\[\]]+)|\[(\d+)\]'
result_holder = {}
for key,value in data.items():
cur = result_holder
prop = ""
results = re.findall(regex, key)
for result in results:
prop = int(prop) if type(cur) == list else prop
if (type(cur) == dict and cur.get(prop)) or (type(cur) == list and len(cur) > prop):
cur = cur[prop]
else:
if type(cur) == list:
if type(prop) is int:
while len(cur) <= prop:
cur.append(None)
cur[prop] = list() if result[1] else dict()
cur = cur[prop]
prop = result[1] or result[0]
prop = int(prop) if type(cur) == list else prop
if type(cur) == list:
if type(prop) is int:
while len(cur) <= prop:
cur.append(None)
print(len(cur), prop)
cur[prop] = data[key]
return result_holder[""] or result_holder
You can use recursion:
d = {"a.b.c[0].key1": 1, "a.b.c[1].key2": 2, "a.b.c[3].key3": 3}
from itertools import groupby
import re
def group_data(data):
new_results = [[a, [i[1:] for i in b]] for a, b in groupby(sorted(data, key=lambda x:x[0]), key=lambda x:x[0])]
arrays = [[a, list(b)] for a, b in groupby(sorted(new_results, key=lambda x:x[0].endswith(']')), key=lambda x:x[0].endswith(']'))]
final_result = {}
for a, b in arrays:
if a:
_chars = [[c, list(d)] for c, d in groupby(sorted(b, key=lambda x:re.findall('^\w+', x[0])[0]), key=lambda x:re.findall('^\w+', x[0])[0])]
_key = _chars[0][0]
final_result[_key] = [[int(re.findall('\d+', c)[0]), d[0]] for c, d in _chars[0][-1]]
_d = dict(final_result[_key])
final_result[_key] = [group_data([_d[i]]) if i in _d else None for i in range(min(_d), max(_d)+1)]
else:
for c, d in b:
final_result[c] = group_data(d) if all(len(i) >1 for i in d) else d[0][0]
return final_result
print(group_data([[*a.split('.'), b] for a, b in d.items()]))
Output:
{'a': {'b': {'c': [{'key1': 1}, {'key2': 2}, None, {'key3': 3}]}}}
A recursive function would probably be much easier to work with and more elegant.
This is partly pseudocode, but it may help you get thinking.
I haven't tested it, but I'm pretty sure it should work so long as you don't have any lists that are directly elements of other lists. So you can have dicts of dicts, dicts of lists, and lists of dicts, but not lists of lists.
def unflatten(data):
resultDict = {}
for e in data:
insertElement(e.split("."), data[e], resultDict)
return resultDict
def insertElement(path, value, subDict):
if (path[0] is of the form "foo[n]"):
key, index = parseListNotation(path[0])
if (key not in subDict):
subDict[key] = []
if (index >= subDict[key].len()):
subDict[key].expandUntilThisSize(index)
if (subDict[key][index] == None):
subDict[key][index] = {}
subDict[key][index] = insertElement(path.pop(0), value, subDict[key][index])
else:
key = path[0]
if (path.length == 1):
subDict[key] = value
else:
if (key not in subDict):
subDict[key] = {}
subDict[key] = insertElement(path.pop(0), value, subDict[key])
return subDict;
The idea is to build the dictionary from the inside, out. E.g.:
For the first element, first create the dictionary `
{key1: 1},
Then assign that to an element of a new dictionary
{c : [None]}, c[0] = {key1: 1}
Then assign that dictionary to the next element b in a new dict, like
- {b: {c : [{key1: 1}]}
Assign that result to a in a new dict
- {a: {b: {c : [{key1: 1}]}}
And lastly return that full dictionary, to use to add the next value.
If you're not familiar with recursive functions, I'd recommend practicing with some simpler ones, and then writing one that does what you want but for input that's only dictionaries.
General path of a dictionary-only recursive function:
Given a path that's a list of attributes of nested dictionaries ( [a, b, c, key1] in your example, if c weren't a list):
Start (path, value):
If there's only item in your path, build a dictionary setting
that key to your value, and you're done.
If there's more than one, build a dictionary using the first
element as a key, and set the value as the output of Start(path.remove(0), value)
Here is another variation on how to achieve the desired results. Not as pretty as I would like though, so I expect there is a much more elegant way. Probably more regex than is really necessary if you spent a bit more time on this, and also seems like the break approach to handling the final key is probably just an indicator that the loop logic could be improved to eliminate that sort of manual intervention. That said, hopefully this is helpful in the process of refining your approach here.
import re
def unflatten(data):
results = {}
list_rgx = re.compile(r'[^\[\]]+\[\d+\]')
idx_rgx = re.compile(r'\d+(?=\])')
key_rgx = re.compile(r'[^\[]+')
for text, value in data.items():
cur = results
keys = text.split('.')
idx = None
for i, key in enumerate(keys):
stop = (i == len(keys) - 1)
if idx is not None:
val = value if stop else {}
if len(cur) > idx:
cur[idx] = {key: val}
else:
for x in range(len(cur), idx + 1):
cur.append({key: val}) if x == idx else cur.append(None)
if stop:
break
else:
cur[idx].get(key)
idx = None
if stop:
cur[key] = value
break
elif re.match(list_rgx, key):
idx = int(re.search(idx_rgx, key).group())
key = re.search(key_rgx, key).group()
cur.setdefault(key, [])
else:
cur.setdefault(key, {})
cur = cur.get(key)
print(results)
Output:
d = {"a.b.c[0].key1": 1, "a.b.c[1].key2": 2, "a.b.c[3].key3": 3}
unflatten(d)
# {'a': {'b': {'c': [{'key1': 1}, {'key2': 2}, None, {'key3': 3}]}}}

Accessing value which is nested inside 2 keys

Suppose I have the following dict:
L = {'A': {'root[1]': 'firstvalue', 'root[2]': 'secondvalue'}, 'B': {'root[3]': 'thirdvalue', 'root[4]': 'Fourthvalue'}}
How can I access the values of the keys root[1], root[2], root[3], root[4] (indexes of root[] is dynamic) in Python 2.7.
Try :
>>> L = {'A': {'root[1]': 'firstvalue', 'root[2]': 'secondvalue'}, 'B': {'root[3]': 'thirdvalue', 'root[4]': 'Fourthvalue'}}
>>> L['A']['root[1]']
'firstvalue'
>>> L['A']['root[2]']
'secondvalue'
>>> L['B']['root[3]']
'thirdvalue'
>>> L['B']['root[4]']
'Fourthvalue'
>>>
Something like this:
for (key, value) in L.items():
for (another_key, real_value) in value.items():
print(another_key, real_value)
To access the values from a dict nested inside the dict, i used the following step:
L = {'A': {'root[1]': 'firstvalue', 'root[2]': 'secondvalue'}, 'B': {'root[3]': 'thirdvalue', 'root[4]': 'Fourthvalue'}}
Solution:
F = {}
G = []
F = L.get("A", None)
F= {{'root[1]': 'firstvalue', 'root[2]': 'secondvalue'}}
for value in F.values():
G.append(value)
Output:
G = ['firstvalue', 'secondvalue']

Convert a list into a nested dictionary

For example I have
x = ['a','b','c']
I need to convert it to:
y['a']['b']['c'] = ''
Is that possible?
For the background, I have a config file which contains dotted notation that points to a place in some json data. I'd like to use the dotted notation string to access that specific data in the json file. For example, in the config:
path_to_data = "user.name.first_name"
I'd like my script to recognize that as:
json_data["user"]["name"]["first_name"]
so I can get the value of the first_name field. I converted the original string into a list, and now I don't know how to convert it to a nested dict.
EDIT: There is an existing data structure that I need to apply the dict with. Let's say:
m = {'a': {'b': {'c': 'lolcat'}}}
so that
m['a']['b']['c']
gives me 'lolcat'. If I get the right dictionary structure (as some of the replies did), I would still need to apply this to the existing dictionary 'm'.
So, again, I get this from a config file:
c = 'a.b.c'
That I converted to a list, thinking this will make things easier:
x = ['a','b','c']
Now I have a json-like data structure:
m = {'a': {'b': {'c': 'lolcat'}}}
So the nested dict generated from 'x' should be able to traverse 'm' so that
m['a']['b']['c']
gets me the cat.
li = ['a','b','c']
d = reduce(lambda x, y: {y:x}, reversed(li+['']))
print(d)
print(d['a']['b']['c'])
I guess you also want to include a value in the end. This works for that too:
def get_value(d, l):
if len(l) > 1:
return get_value(d[l[0]], l[1:])
return d[l[0]]
def add_keys(d, l, c=None):
if len(l) > 1:
d[l[0]] = _d = {}
d[l[0]] = d.get(l[0], {})
add_keys(d[l[0]], l[1:], c)
else:
d[l[0]] = c
def main():
d = {}
l1 = ['a', 'b', 'c', 'd']
c1 = 'letters'
l2 = [42, "42", (42,)]
c2 = 42
add_keys(d, l1, c1)
print d
add_keys(d, l2, c2)
print d
if __name__ == '__main__':
main()
It prints:
{'a': {'b': {'c': {'d': 'letters'}}}}
{'a': {'b': {'c': {'d': 'letters'}}}, 42: {'42': {(42,): 42}}}
letters
42
So it surely works. Recursion for the win.
>>> x = ['a','b','c']
>>> y={}
>>> y[x[-1]]=""
>>> x.pop(-1)
'c'
>>> for i in x[::-1]:
... y={i:y}
...
>>> y
{'a': {'b': {'c': ''}}}
>>> y['a']['b']['c']
''
This will work.
#!/usr/bin/python2
from __future__ import print_function
x = ['a','b','c']
def ltod(l):
rv = d = {}
while l:
i = l.pop(0)
d[i] = {}
d = d[i]
return rv
d = ltod(x)
print(d)
print(d["a"]["b"]["c"])
d["a"]["b"]["c"] = "text"
print(d["a"]["b"]["c"])
Outputs:
{'a': {'b': {'c': {}}}}
{}
text
Find below sample that is not very beautiful but quite simple:
path_to_data = "user.name.first_name"
keys = path_to_data.split('.')
t = []
for key in keys[::-1]: # just to iterate in reversed order
if not t:
t.append({k:{}})
else:
t[-1] = ({k: t[-1]})
#t[0] will contain your dictionary
A general solution would be to use collections.defaultdict to create a nested dictionary. Then override __setitem__ for whatever behavior you'd like. This example will do the string parsing as well.
from collections import defaultdict
class nesteddict(defaultdict):
def __init__(self):
defaultdict.__init__(self, nesteddict)
def __setitem__(self, key, value):
keys = key.split('.')
for key in keys[:-1]:
self = self[key]
defaultdict.__setitem__(self, keys[-1], value)
nd = nesteddict()
nd['a.b.c'] = 'lolcat'
assert nd['a']['b']['c'] == 'lolcat'

Combine two dictionaries of dictionaries (Python)

Is there an easy way to combine two dictionaries of dictionaries in Python? Here's what I need:
dict1 = {'A' : {'B' : 'C'}}
dict2 = {'A' : {'D' : 'E'}}
result = dict_union(dict1, dict2)
# => result = {'A' : {'B' : 'C', 'D' : 'E'}}
I created a brute-force function that does it, but I was looking for a more compact solution:
def dict_union(train, wagon):
for key, val in wagon.iteritems():
if not isinstance(val, dict):
train[key] = val
else:
subdict = train.setdefault(key, {})
dict_union(subdict, val)
Here is a class, RUDict (for Recursive-Update dict) that implements the behaviour you're looking for:
class RUDict(dict):
def __init__(self, *args, **kw):
super(RUDict,self).__init__(*args, **kw)
def update(self, E=None, **F):
if E is not None:
if 'keys' in dir(E) and callable(getattr(E, 'keys')):
for k in E:
if k in self: # existing ...must recurse into both sides
self.r_update(k, E)
else: # doesn't currently exist, just update
self[k] = E[k]
else:
for (k, v) in E:
self.r_update(k, {k:v})
for k in F:
self.r_update(k, {k:F[k]})
def r_update(self, key, other_dict):
if isinstance(self[key], dict) and isinstance(other_dict[key], dict):
od = RUDict(self[key])
nd = other_dict[key]
od.update(nd)
self[key] = od
else:
self[key] = other_dict[key]
def test():
dict1 = {'A' : {'B' : 'C'}}
dict2 = {'A' : {'D' : 'E'}}
dx = RUDict(dict1)
dx.update(dict2)
print(dx)
if __name__ == '__main__':
test()
>>> import RUDict
>>> RUDict.test()
{'A': {'B': 'C', 'D': 'E'}}
>>>
This solution is pretty compact. It's ugly, but you're asking for some rather complicated behavior:
dict_union = lambda d1,d2: dict((x,(dict_union(d1.get(x,{}),d2[x]) if
isinstance(d2.get(x),dict) else d2.get(x,d1.get(x)))) for x in
set(d1.keys()+d2.keys()))
My solution is designed to combine any number of dictionaries as you had and could probably be cut down to look neater by limiting it to combining only two dictionaries but the logic behind it should be fairly easy to use in your program.
def dictCompressor(*args):
output = {x:{} for mydict in args for x,_ in mydict.items()}
for mydict in args:
for x,y in mydict.items():
output[x].update(y)
return output
You could subclass dict and wrap the original dict.update() method with a version which would call update() on the subdicts rather than directly overwriting subdicts. That may end up taking at least as much effort as your existing solution, though.
Has to be recursive, since dictionaries may nest. Here's my first take on it, you probably want to define your behavior when dictionaries nest at different depths.
def join(A, B):
if not isinstance(A, dict) or not isinstance(B, dict):
return A or B
return dict([(a, join(A.get(a), B.get(a))) for a in set(A.keys()) | set(B.keys())])
def main():
A = {'A': {'B': 'C'}, 'D': {'X': 'Y'}}
B = {'A': {'D': 'E'}}
print join(A, B)
As for me there is not enaugh information but anyway please find my sample code below:
dict1 = {'A' : {'B' : 'C'}}
dict2 = {'A' : {'D' : 'E'}, 'B':{'C':'D'}}
output = {}
for key in (set(dict1) | set(dict2):
output[key] = {}
(key in dict1 and output[key].update(dict1.get(key)))
(key in dict2 and output[key].update(dict2.get(key)))

Categories

Resources