What would be the cleanest way to convert this
{"a.b.c[0].key1": 1, "a.b.c[1].key2": 2, "a.b.c[3].key3": 3}
Into this
{"a": {"b": {"c": [{"key1": 1}, {"key2": 2}, None, {"key3": 3}]}}}
the dictionary keys may be anything.
the length of the list may vary.
the depth of the dictionary may vary.
if there are missing values in the list the value must be None.
if values are repeated the last one declared is the one that counts.
I came up with the following working example.
Was wondering if we could find a better solution for our community.
def unflatten(data):
if type(data) != dict:
return None
regex = r'\.?([^.\[\]]+)|\[(\d+)\]'
result_holder = {}
for key,value in data.items():
cur = result_holder
prop = ""
results = re.findall(regex, key)
for result in results:
prop = int(prop) if type(cur) == list else prop
if (type(cur) == dict and cur.get(prop)) or (type(cur) == list and len(cur) > prop):
cur = cur[prop]
else:
if type(cur) == list:
if type(prop) is int:
while len(cur) <= prop:
cur.append(None)
cur[prop] = list() if result[1] else dict()
cur = cur[prop]
prop = result[1] or result[0]
prop = int(prop) if type(cur) == list else prop
if type(cur) == list:
if type(prop) is int:
while len(cur) <= prop:
cur.append(None)
print(len(cur), prop)
cur[prop] = data[key]
return result_holder[""] or result_holder
You can use recursion:
d = {"a.b.c[0].key1": 1, "a.b.c[1].key2": 2, "a.b.c[3].key3": 3}
from itertools import groupby
import re
def group_data(data):
new_results = [[a, [i[1:] for i in b]] for a, b in groupby(sorted(data, key=lambda x:x[0]), key=lambda x:x[0])]
arrays = [[a, list(b)] for a, b in groupby(sorted(new_results, key=lambda x:x[0].endswith(']')), key=lambda x:x[0].endswith(']'))]
final_result = {}
for a, b in arrays:
if a:
_chars = [[c, list(d)] for c, d in groupby(sorted(b, key=lambda x:re.findall('^\w+', x[0])[0]), key=lambda x:re.findall('^\w+', x[0])[0])]
_key = _chars[0][0]
final_result[_key] = [[int(re.findall('\d+', c)[0]), d[0]] for c, d in _chars[0][-1]]
_d = dict(final_result[_key])
final_result[_key] = [group_data([_d[i]]) if i in _d else None for i in range(min(_d), max(_d)+1)]
else:
for c, d in b:
final_result[c] = group_data(d) if all(len(i) >1 for i in d) else d[0][0]
return final_result
print(group_data([[*a.split('.'), b] for a, b in d.items()]))
Output:
{'a': {'b': {'c': [{'key1': 1}, {'key2': 2}, None, {'key3': 3}]}}}
A recursive function would probably be much easier to work with and more elegant.
This is partly pseudocode, but it may help you get thinking.
I haven't tested it, but I'm pretty sure it should work so long as you don't have any lists that are directly elements of other lists. So you can have dicts of dicts, dicts of lists, and lists of dicts, but not lists of lists.
def unflatten(data):
resultDict = {}
for e in data:
insertElement(e.split("."), data[e], resultDict)
return resultDict
def insertElement(path, value, subDict):
if (path[0] is of the form "foo[n]"):
key, index = parseListNotation(path[0])
if (key not in subDict):
subDict[key] = []
if (index >= subDict[key].len()):
subDict[key].expandUntilThisSize(index)
if (subDict[key][index] == None):
subDict[key][index] = {}
subDict[key][index] = insertElement(path.pop(0), value, subDict[key][index])
else:
key = path[0]
if (path.length == 1):
subDict[key] = value
else:
if (key not in subDict):
subDict[key] = {}
subDict[key] = insertElement(path.pop(0), value, subDict[key])
return subDict;
The idea is to build the dictionary from the inside, out. E.g.:
For the first element, first create the dictionary `
{key1: 1},
Then assign that to an element of a new dictionary
{c : [None]}, c[0] = {key1: 1}
Then assign that dictionary to the next element b in a new dict, like
- {b: {c : [{key1: 1}]}
Assign that result to a in a new dict
- {a: {b: {c : [{key1: 1}]}}
And lastly return that full dictionary, to use to add the next value.
If you're not familiar with recursive functions, I'd recommend practicing with some simpler ones, and then writing one that does what you want but for input that's only dictionaries.
General path of a dictionary-only recursive function:
Given a path that's a list of attributes of nested dictionaries ( [a, b, c, key1] in your example, if c weren't a list):
Start (path, value):
If there's only item in your path, build a dictionary setting
that key to your value, and you're done.
If there's more than one, build a dictionary using the first
element as a key, and set the value as the output of Start(path.remove(0), value)
Here is another variation on how to achieve the desired results. Not as pretty as I would like though, so I expect there is a much more elegant way. Probably more regex than is really necessary if you spent a bit more time on this, and also seems like the break approach to handling the final key is probably just an indicator that the loop logic could be improved to eliminate that sort of manual intervention. That said, hopefully this is helpful in the process of refining your approach here.
import re
def unflatten(data):
results = {}
list_rgx = re.compile(r'[^\[\]]+\[\d+\]')
idx_rgx = re.compile(r'\d+(?=\])')
key_rgx = re.compile(r'[^\[]+')
for text, value in data.items():
cur = results
keys = text.split('.')
idx = None
for i, key in enumerate(keys):
stop = (i == len(keys) - 1)
if idx is not None:
val = value if stop else {}
if len(cur) > idx:
cur[idx] = {key: val}
else:
for x in range(len(cur), idx + 1):
cur.append({key: val}) if x == idx else cur.append(None)
if stop:
break
else:
cur[idx].get(key)
idx = None
if stop:
cur[key] = value
break
elif re.match(list_rgx, key):
idx = int(re.search(idx_rgx, key).group())
key = re.search(key_rgx, key).group()
cur.setdefault(key, [])
else:
cur.setdefault(key, {})
cur = cur.get(key)
print(results)
Output:
d = {"a.b.c[0].key1": 1, "a.b.c[1].key2": 2, "a.b.c[3].key3": 3}
unflatten(d)
# {'a': {'b': {'c': [{'key1': 1}, {'key2': 2}, None, {'key3': 3}]}}}
Related
I am trying to access a specific key in a nest dictionary, then match its value to a string in a list. If the string in the list contains the string in the dictionary value, I want to override the dictionary value with the list value. below is an example.
my_list = ['string1~', 'string2~', 'string3~', 'string4~', 'string5~', 'string6~']
my_iterable = {'A':'xyz',
'B':'string6',
'C':[{'B':'string4', 'D':'123'}],
'E':[{'F':'321', 'B':'string1'}],
'G':'jkl'
}
The key I'm looking for is B, the objective is to override string6 with string6~, string4 with string4~, and so on for all B keys found in the my_iterable.
I have written a function to compute the Levenshtein distance between two strings, but I am struggling to write an efficient ways to override the values of the keys.
def find_and_replace(key, dictionary, original_list):
for k, v in dictionary.items():
if k == key:
#function to check if original_list item contains v
yield v
elif isinstance(v, dict):
for result in find_and_replace(key, v, name_list):
yield result
elif isinstance(v, list):
for d in v:
if isinstance(d, dict):
for result in find_and_replace(key, d, name_list):
yield result
if I call
updated_dict = find_and_replace('B', my_iterable, my_list)
I want updated_dict to return the below:
{'A':'xyz',
'B':'string6~',
'C':[{'B':'string4~', 'D':'123'}],
'E':[{'F':'321', 'B':'string1~'}],
'G':'jkl'
}
Is this the right approach to the most efficient solution, and how can I modify it to return a dictionary with the updated values for B?
You can use below code. I have assumed the structure of input dict to be same throughout the execution.
# Input List
my_list = ['string1~', 'string2~', 'string3~', 'string4~', 'string5~', 'string6~']
# Input Dict
# Removed duplicate key "B" from the dict
my_iterable = {'A':'xyz',
'B':'string6',
'C':[{'B':'string4', 'D':'123'}],
'E':[{'F':'321', 'B':'string1'}],
'G':'jkl',
}
# setting search key
search_key = "B"
# Main code
for i, v in my_iterable.items():
if i == search_key:
if not isinstance(v,list):
search_in_list = [i for i in my_list if v in i]
if search_in_list:
my_iterable[i] = search_in_list[0]
else:
try:
for j, k in v[0].items():
if j == search_key:
search_in_list = [l for l in my_list if k in l]
if search_in_list:
v[0][j] = search_in_list[0]
except:
continue
# print output
print (my_iterable)
# Result -> {'A': 'xyz', 'B': 'string6~', 'C': [{'B': 'string4~', 'D': '123'}], 'E': [{'F': '321', 'B': 'string1~'}], 'G': 'jkl'}
Above can has scope of optimization using list comprehension or using
a function
I hope this helps and counts!
In some cases, if your nesting is kind of complex you can treat the dictionary like a json string and do all sorts of replacements. Its probably not what people would call very pythonic, but gives you a little more flexibility.
import re, json
my_list = ['string1~', 'string2~', 'string3~', 'string4~', 'string5~', 'string6~']
my_iterable = {'A':'xyz',
'B':'string6',
'C':[{'B':'string4', 'D':'123'}],
'E':[{'F':'321', 'B':'string1'}],
'G':'jkl'}
json_str = json.dumps(my_iterable, ensure_ascii=False)
for val in my_list:
json_str = re.sub(re.compile(f"""("[B]":\\W?")({val[:-1]})(")"""), r"\1" + val + r"\3", json_str)
my_iterable = json.loads(json_str)
print(my_iterable)
I have an existing list of Key, Value pairs in my current dictionary called total_list. I want to check my list to see if the length of each Key == 1 in total_list, I want to add that key and its value pair to a new dictionary. This is the code that I've come up with.
total_list = {104370544: [31203.7, 01234], 106813775: [187500.0], 106842625: [60349.8]}
diff_so = defaultdict(list)
for key, val in total_list:
if len(total_list[key]) == 1:
diff_so[key].append[val]
total_list.pop[key]
But I keep getting an error with
"cannot unpack non-iterable int object".
I was wondering if there's anyway for me to fix this code for it to run properly?
Assuming that the OP means a string of one character by length = 1 of the key.
You can do this:
total_list = [{'abc':"1", 'bg':"7", 'a':"7"}]
new_dict = {}
for i in total_list:
for k,v in i.items():
if len(k) == 1:
new_dict[str(k)] = v
else:
pass
print(new_dict)
Output:
{'a': '7'}
After edit:
total_list = {104370544: [31203.7, 1234], 106813775: [187500.0], 106842625: [60349.8]}
new_dict = {}
for k,v in total_list.items():
if len(v) == 1:
new_dict[k] = v
else:
pass
Output:
{'106842625': [60349.8], '106813775': [187500.0]}
You just need a dictionary comprehension
diff_so = {k: v for k, v in total_list.items() if len(v) == 1}
I had written a recursive function to find a value with given dict and key.
But I think there should be a more readable version. Here is the code block.
def findvalue(_dict, target):
for key, values in _dict.items():
if 'dict' in str(type(values)):
return findvalue(values, target)
elif key == target:
return values
else:
print("no such key")
Is there any one line version of this or using yield(not sure about this)?
Edit: Based on Recursive functions and lists appending/extending and ideas from comments I modified the function to find all matched values by the given key
def find_all_value(_dict, target, values=None):
for key, values in _dict.items():
#case 1: it is a dictionary but not match the key
if isinstance(values, dict) and key!=target:
return find_all_value(values, target)
#case 2: it is a dictionary but match the key -> put it in result
elif isinstance(values, dict) and key==target:
return [values] + find_all_value(values, target)
#case 3: it is not dictionary and match the key -> put it in result
elif key==target:
return [values]
To find the value of the first key (found in Breadth First Search) of a recursive python dictionnary.
You can do:
def find_value(_dict, key):
stack = [(None, _dict)]
while len(stack) != 0:
_key, val = stack.pop(0)
if val is not _dict and _key == key:
return val
if isinstance(val, dict):
for k, v in val.items():
stack.append((k, v))
Example:
d = {'c': {'d': 3, 'e': 4}, None: 0, 'b': 2, 'a': 1}
print('None:', find_value(d, None))
print('c:', find_value(d, 'c'))
print('e:', find_value(d, 'e'))
print('a:', find_value(d, 'a'))
Output:
None: 0
c: {'e': 4, 'd': 3}
e: 4
a: 1
##Edit:##
Recursive search for (nested) dict(s) via list comprehension:
def findvalue(d, key):
l = [e for e in [findvalue(e, key) for e in [d[k] for k in d.keys() if type(d[k])==dict]] if e]
l.append(d[key]) if key in d else l
return l
^ Returns (nested) lists of values for each matching (nested) dict key.
Simplified version which prints, instead of returning, each match:
def findvalue(d, key):
[findvalue(e, key) for e in [d[k] for k in d.keys() if type(d[k])==dict]]
if key in d: print(d[key])
^ Just displays the values, for each matching key, as they
are encountered.
Test with:
d = {1:2, 3:4, 5:{6:7,8:9, 99:{100:101}}, 10:11}
I need to write a function which is doing following work
Find a string as value in a dictionary of dictionaries and return its key
(1st key if found in main dictionary, 2nd key if found in sub dictionary).
Source Code
Here is the function which I try to implement, but it works incorrect as I can't find any answer of how to convert list into dictionary as in this case the following error occurs
for v, k in l:
ValueError: need more than 1 value to unpack
def GetKeyFromDictByValue(self, dictionary, value_to_find):
""""""
key_list = [k for (k, v) in dictionary.items() if v == value_to_find]
if key_list.__len__() is not 0:
return key_list[0]
else:
l = [s for s in dictionary.values() if ":" in str(s)]
d = defaultdict(list)
for v, k in l:
d[k].append(v)
print d
dict = {'a': {'a1': 'a2'}, "aa": "aa1", 'aaa': {'aaa1': 'aaa2'}}
print GetKeyFromDictByValue(dict, "a2")
I must do this on Python 2.5
You created a list of only the dictionary values, but then try to loop over it as if it already contains both keys and values of those dictionaries. Perhaps you wanted to loop over each matched dictionary?
l = [v for v in dictionary.values() if ":" in str(v)]
d = defaultdict(list)
for subdict in l:
for k, v in subdict.items():
I'd instead flatten the structure:
def flatten(dictionary):
for key, value in dictionary.iteritems():
if isinstance(value, dict):
# recurse
for res in flatten(value):
yield res
else:
yield key, value
then just search:
def GetKeyFromDictByValue(self, dictionary, value_to_find):
for key, value in flatten(dictionary):
if value == value_to_find:
return key
Demo:
>>> sample = {'a': {'a1': 'a2'}, "aa": "aa1", 'aaa': {'aaa1': 'aaa2'}}
>>> GetKeyFromDictByValue(None, sample, "a2")
'a1'
For example I have
x = ['a','b','c']
I need to convert it to:
y['a']['b']['c'] = ''
Is that possible?
For the background, I have a config file which contains dotted notation that points to a place in some json data. I'd like to use the dotted notation string to access that specific data in the json file. For example, in the config:
path_to_data = "user.name.first_name"
I'd like my script to recognize that as:
json_data["user"]["name"]["first_name"]
so I can get the value of the first_name field. I converted the original string into a list, and now I don't know how to convert it to a nested dict.
EDIT: There is an existing data structure that I need to apply the dict with. Let's say:
m = {'a': {'b': {'c': 'lolcat'}}}
so that
m['a']['b']['c']
gives me 'lolcat'. If I get the right dictionary structure (as some of the replies did), I would still need to apply this to the existing dictionary 'm'.
So, again, I get this from a config file:
c = 'a.b.c'
That I converted to a list, thinking this will make things easier:
x = ['a','b','c']
Now I have a json-like data structure:
m = {'a': {'b': {'c': 'lolcat'}}}
So the nested dict generated from 'x' should be able to traverse 'm' so that
m['a']['b']['c']
gets me the cat.
li = ['a','b','c']
d = reduce(lambda x, y: {y:x}, reversed(li+['']))
print(d)
print(d['a']['b']['c'])
I guess you also want to include a value in the end. This works for that too:
def get_value(d, l):
if len(l) > 1:
return get_value(d[l[0]], l[1:])
return d[l[0]]
def add_keys(d, l, c=None):
if len(l) > 1:
d[l[0]] = _d = {}
d[l[0]] = d.get(l[0], {})
add_keys(d[l[0]], l[1:], c)
else:
d[l[0]] = c
def main():
d = {}
l1 = ['a', 'b', 'c', 'd']
c1 = 'letters'
l2 = [42, "42", (42,)]
c2 = 42
add_keys(d, l1, c1)
print d
add_keys(d, l2, c2)
print d
if __name__ == '__main__':
main()
It prints:
{'a': {'b': {'c': {'d': 'letters'}}}}
{'a': {'b': {'c': {'d': 'letters'}}}, 42: {'42': {(42,): 42}}}
letters
42
So it surely works. Recursion for the win.
>>> x = ['a','b','c']
>>> y={}
>>> y[x[-1]]=""
>>> x.pop(-1)
'c'
>>> for i in x[::-1]:
... y={i:y}
...
>>> y
{'a': {'b': {'c': ''}}}
>>> y['a']['b']['c']
''
This will work.
#!/usr/bin/python2
from __future__ import print_function
x = ['a','b','c']
def ltod(l):
rv = d = {}
while l:
i = l.pop(0)
d[i] = {}
d = d[i]
return rv
d = ltod(x)
print(d)
print(d["a"]["b"]["c"])
d["a"]["b"]["c"] = "text"
print(d["a"]["b"]["c"])
Outputs:
{'a': {'b': {'c': {}}}}
{}
text
Find below sample that is not very beautiful but quite simple:
path_to_data = "user.name.first_name"
keys = path_to_data.split('.')
t = []
for key in keys[::-1]: # just to iterate in reversed order
if not t:
t.append({k:{}})
else:
t[-1] = ({k: t[-1]})
#t[0] will contain your dictionary
A general solution would be to use collections.defaultdict to create a nested dictionary. Then override __setitem__ for whatever behavior you'd like. This example will do the string parsing as well.
from collections import defaultdict
class nesteddict(defaultdict):
def __init__(self):
defaultdict.__init__(self, nesteddict)
def __setitem__(self, key, value):
keys = key.split('.')
for key in keys[:-1]:
self = self[key]
defaultdict.__setitem__(self, keys[-1], value)
nd = nesteddict()
nd['a.b.c'] = 'lolcat'
assert nd['a']['b']['c'] == 'lolcat'