I have two lists
a = ['b','c','a','d','v']
g = [{'c':'1'},{'c':'2'},{'c':'3'},{'d':'1'},{'d':'2'}]
I want to match the key in the dictionary in the list g to the elements in list a, if they are matched, the elements in list g will be inserted into list a.
The desired outcome is:
['b','c',{'c':'1'},{'c':'2'},{'c':'3'},'a','d',{'d':'1'},{'d':'2'},'v']
Try:
a = ["b", "c", "a", "d", "v"]
g = [{"c": "1"}, {"c": "2"}, {"c": "3"}, {"d": "1"}, {"d": "2"}]
tmp = {}
for d in g:
for k in d:
tmp.setdefault(k, []).append(d)
out = []
for v in a:
out.append(v)
out.extend(tmp.get(v, []))
print(out)
Prints:
['b', 'c', {'c': '1'}, {'c': '2'}, {'c': '3'}, 'a', 'd', {'d': '1'}, {'d': '2'}, 'v']
Try using sorted:
print(sorted(a + g, key=lambda x: list(x)[0]))
Output:
['a', 'b', 'c', {'c': '1'}, {'c': '2'}, {'c': '3'}, 'd', {'d': '1'}, {'d': '2'}, 'e']
a = ['a','b','c','d','e']
g = [{'c':'1'},{'c':'2'},{'c':'3'},{'d':'1'},{'d':'2'},{'z':'1'}]
for dic in g:
for key in dic:
if key in a:
a.append(dic)
print(a)
#output ['a', 'b', 'c', 'd', 'e', {'c': '1'}, {'c': '2'}, {'c': '3'}, {'d': '1'}, {'d': '2'}]
After this, you can do sorting using lambda if required.
Related
Suppose I have the following data frame:
df = pd.DataFrame({'a': [1,1,1,2], 'b': ['a', 'a', 'b', 'c'], 'd': [1, 2, 3, 4]})
And I want to end with the following dict:
{1: [{'b':'a', 'd': 1}, {'b': 'a', 'd': 2}, {'b': 'b', 'd': 3}], 2: [{'b': 'c', 'd': 4}]}
Basically, I want to group by a and for each data frame I want to apply to_dict('records').
What I tried was the following:
# dict ok but not a list
df.groupby('a').agg(list).to_dict('index')
{1: {'b': ['a', 'a', 'b'], 'd': [1, 2, 3]}, 2: {'b': ['c'], 'd': [4]}}
# the index disappears
df.groupby('a').agg(list).to_dict('records')
[{'b': ['a', 'a', 'b'], 'd': [1, 2, 3]}, {'b': ['c'], 'd': [4]}]
df.set_index('a').to_dict('index')
ValueError: DataFrame index must be unique for orient='index'
I think I can do it using a for-loop but I'm almost sure there is a pythonic way to do it.
You could do:
df.assign(dicts=df.drop(columns="a").to_dict("records")).groupby("a")["dicts"].agg(
list
).to_dict()
Here is a way using groupby() and apply()
df.groupby('a').apply(lambda x: x[['b','d']].to_dict('records')).to_dict()
Output:
{1: [{'b': 'a', 'd': 1}, {'b': 'a', 'd': 2}, {'b': 'b', 'd': 3}],
2: [{'b': 'c', 'd': 4}]}
Following your logic, I think one way to avoid a for-loop, is to use GroupBy.apply with zip inside a listcomp to iterate over both columns in // :
out = df.groupby("a").apply(lambda x: [{"b": y, "d": z}
for y, z in zip(x["b"], x["d"])]).to_dict()
If you need to zip more than two columns (dynamically), use this variant :
out = df.groupby("a").apply(lambda x: [dict(zip(x.columns[1:], row))
for row in x[x.columns[1:]].to_numpy()]).to_dict()
Output :
print(out)
#{1: [{'b': 'a', 'd': 1}, {'b': 'a', 'd': 2}, {'b': 'b', 'd': 3}], 2: [{'b': 'c', 'd': 4}]}
I have dictionary something like:
d1 = {'0': {'a'}, '1': {'b'}, '2': {'c', 'd'}, '3': {'E','F','G'}}
and I want result like this
d2 = {'a': '0', 'b': '1', 'c': '2', 'd': '2', 'E': '3', 'F': '3', 'G': '3'}
so I tried
d2 = dict ((v, k) for k, v in d1.items())
but value is surrounded by set{}, so it didn't work well...
is there any way that I can fix it?
You could use a dictionary comprehension:
{v:k for k,vals in d1.items() for v in vals}
# {'a': '0', 'b': '1', 'c': '2', 'd': '2', 'E': '3', 'F': '3', 'G': '3'}
Note that you need an extra level of iteration over the values in each key here to get a flat dictionary.
Another dict comprehension:
>>> {v: k for k in d1 for v in d1[k]}
{'a': '0', 'b': '1', 'c': '2', 'd': '2', 'E': '3', 'F': '3', 'G': '3'}
Benchmark comparison with yatu's:
from timeit import repeat
setup = "d1 = {'0': {'a'}, '1': {'b'}, '2': {'c', 'd'}, '3': {'E','F','G'}}"
yatu = "{v:k for k,vals in d1.items() for v in vals}"
heap = "{v:k for k in d1 for v in d1[k]}"
for _ in range(3):
print('yatu', min(repeat(yatu, setup)))
print('heap', min(repeat(heap, setup)))
print()
Results:
yatu 1.4274586000000227
heap 1.4059823000000051
yatu 1.4562267999999676
heap 1.3701727999999775
yatu 1.4313863999999512
heap 1.3878657000000203
Another benchmark, with a million keys/values:
setup = "d1 = {k: {k+1, k+2} for k in range(0, 10**6, 3)}"
for _ in range(3):
print('yatu', min(repeat(yatu, setup, number=10)))
print('heap', min(repeat(heap, setup, number=10)))
print()
yatu 1.071519999999964
heap 1.1391495000000305
yatu 1.0880677000000105
heap 1.1534022000000732
yatu 1.0944767999999385
heap 1.1526202000000012
Here's another possible solution to the given problem:
def flatten_dictionary(dct):
d = {}
for k, st_values in dct.items():
for v in st_values:
d[v] = k
return d
if __name__ == '__main__':
d1 = {'0': {'a'}, '1': {'b'}, '2': {'c', 'd'}, '3': {'E', 'F', 'G'}}
d2 = flatten_dictionary(d1)
print(d2)
def recursion(input_type):
print('input_type ',input_type)
if isinstance(input_type, dict):
num = 0
for k,v in input_type.items():
if isinstance(v, dict):
print('from recursion')
recursion(v)
elif isinstance(v, list):
for j in v:
if isinstance(j, dict):
print('from recursion level 2')
recursion(j)
else:
temp_dict = {k:v}
print('type: ',type(temp_dict), k, v)
print('num',num)
num = num+1
for i in list_:
recursion(i)
How to get the interim results from the recursion.
consider the input as shown below:
input: [{'a':a, 'b':b, 'c':[{'d':d, 'e':e}]}]
Updated input: [ {'a':a, 'b':b, 'c': { 'd':d, 'e': [ {'f':f, 'g':g}, {'f':f1, 'g':g1} ] } } ]
desired output: [{'a':a, 'b':b, 'd':d, 'f':f, 'g':g, 'f_new':f1, 'g_new':g1}]
If the key is duplicate then it should update such as 'f' to 'f_new' or something like that
Thank you in advance!!
You can iterate over the dict items and if an item is a list, recursively flatten the dicts within it:
def f(o):
return {a: b for k, v in o.items() for a, b in ((i for d in (v if isinstance(v, list)
else (v,)) for i in f(d).items()) if isinstance(v, (list, dict)) else ((k, v),))}
so that given:
lst = [{'a': 'a', 'b': 'b', 'c': [{'d': 'd', 'e': 'e'}, {'f': [{'g': 'g'}]}]}]
[f(d) for d in lst] would return:
[{'a': 'a', 'b': 'b', 'd': 'd', 'e': 'e', 'g': 'g'}]
and that given:
lst = [{'a': 'a', 'b': 'b', 'c': {'d': 'd', 'e': {'f': 'f', 'g': 'g'}}}]
[f(d) for d in lst] would return:
[{'a': 'a', 'b': 'b', 'd': 'd', 'f': 'f', 'g': 'g'}]
To avoid collisions in merged keys, append _new to a duplicating key until it is found not pre-existing, in which case you cannot use comprehension:
def f(o):
output = {}
for k, v in o.items():
for a, b in ((i for d in (v if isinstance(v, list) else (v,)) for i in f(d).items())
if isinstance(v, (list, dict)) else ((k, v),)):
while a in output:
a += '_new'
output[a] = b
return output
so that given:
lst = [{'a': 'a', 'b': 'b', 'c': {'d': 'd', 'e': [{'f': 'f', 'g': 'g'}, {'f': 'f1', 'g': 'g1'}]}}]
[f(d) for d in lst] would return:
[{'a': 'a', 'b': 'b', 'd': 'd', 'f': 'f', 'g': 'g', 'f_new': 'f1', 'g_new': 'g1'}]
Demo: https://repl.it/#blhsing/NonstopSeveralActionscript
I am given a list of dictionaries like this:
[
{'A': ['B', 'C', 'D']},
{'B': ['E', 'F']},
{'C': ['E']},
{'F': ['G', 'H']}
]
A key in the dictionary is a parent of a corresponding dictionary value, i.e., {parent: [child1, child2, child3]}
How can I construct a tree-like dictionary in the following format like the following:
{'A':{'B':{'E': None,
'F': {'G': None,
'H': None}
},
'C': {'E': None}}
If a node doesn't have a child, we will fill its value with None. I don't know how to write a recursive process to transform the list into a dictionary, any idea?
Thanks!
You can do this with a recursive function:
def find_set(d, k, v):
for key, value in d.items():
if isinstance(value, dict):
find_set(d[key], k, v)
return
if key == k:
d[key] = {}
for i in v:
d[key][i] = None
return d
d[k] = {}
for i in v:
d[k][i] = None
return d
Code:
l = [{'A': ['B', 'C', 'D']}, {'B': ['E', 'F']}, {'C': ['E']}, {'F': ['G', 'H']}]
d = {}
for node in l:
for key, value in node.items():
find_set(d, key, value)
d will be :
{'A': {'B': {'E': None, 'F': {'G': None, 'H': None}, 'C': {'E': None}}, 'C': None, 'D': None}}
You can use recursion:
d = [{'A': ['B', 'C', 'D']}, {'B': ['E', 'F']}, {'C': ['E']}, {'F': ['G', 'H']}]
d1 = dict(list(i.items())[0] for i in d)
def flatten_structure(d, root = 'A'):
if root:
return {root:flatten_structure(d1[root], None)}
return {i:flatten_structure(d1[i], None) if i in d1 else None for i in d}
print(flatten_structure(d1['A']))
Output:
{'A': {'B': {'E': None, 'F': {'G': None, 'H': None}}, 'C': {'E': None}, 'D': None}}
I have a python dict like below:
{ '1': {'a': '0.6', 'b': '0.8', 'c': '2','d': '0.5'},
'2': {'a': '0.7', 'b': '0.9', 'c': '0.1','d': '0.2'},
'3': {'a': '0.5', 'b': '0.8', 'c': '3'},
}
How could I get the following result?
('2','a','0.7') ('2',b','0.9') ('3','c', '3') ('1','d', '0.5')
Well, here is the code for it (just 5 lines):
total = []
for i in ['a', 'b', 'c', 'd']:
kv = max(a.iterkeys(), key=(lambda key: float(a[key][i]) if i in a[key].keys() else -9.0))
hv = a[kv][i]
total.append((kv, i, hv))
print total
Output:
[('2', 'a', '0.7'), ('2', 'b', '0.9'), ('3', 'c', '3'), ('1', 'd', '0.5')]
-9.0 is just a random low number.
x={ '1': {'a': '0.6', 'b': '0.8', 'c': '2','d': '0.5'},
'2': {'a': '0.7', 'b': '0.9', 'c': '0.1','d': '0.2'},
'3': {'a': '0.5', 'b': '0.8', 'c': '3'},
}
d={}
for i,j in x.iteritems():
for k,m in j.iteritems():
d.setdefault(k,[0,0])
if j[k]>d[k][0]:
d[k]=(j[k],i)
print [(j[1],i,j[0]) for i,j in d.items()]
You can use additional dict to do your job.
Output:[('2', 'a', '0.7'), ('3', 'c', '3'), ('2', 'b', '0.9'), ('1', 'd', '0.5')]
I agree the question is a bit vague.. I recommend you dont use strings as values.. use int or float if you can in the dictionaries, also does not specify if python 2.x or 3.x
but I think you are after something like this..
def filter_dict(values):
result = collections.Counter()
for value in values.keys():
for k, v in values[value].items():
v = float(v)
result[k] = v if v > result[k] else result[k]
return result
this is how it behaves:
class FilterDictTest(unittest.TestCase):
def test_filter_dict(self):
# Arrange
actual = {
'1': {'a': '0.6', 'b': '0.8', 'c': '2', 'd': '0.5'},
'2': {'a': '0.7', 'b': '0.9', 'c': '0.1', 'd': '0.2'},
'3': {'a': '0.5', 'b': '0.8', 'c': '3'}
}
expected = {
'a': 0.7,
'b': 0.9,
'c': 3,
'd': 0.5
}
# Act & Assert
self.assertEquals(filter_dict(actual), expected)
A little late here.
#!/usr/bin/env python3.5
# entry
entry = {'1': {'a': '0.6', 'b': '0.8', 'c': '2','d': '0.5'}, '2': {'a': '0.7', 'b': '0.9', 'c': '0.1','d': '0.2'}, '3': {'a': '0.5', 'b': '0.8', 'c': '3'}}
# identify keys
all_categories = []
for number, dct in entry.items():
for key, val in dct.items():
all_categories = all_categories + list(dct.keys())
all_categories = set(all_categories)
# Get max values
max_values = {category:None for category in all_categories}
for category in all_categories:
for number, dct in entry.items():
if category in dct.keys():
if max_values[category] is None:
max_values[category] = (number, dct[category])
elif float(max_values[category][1]) < float(dct[category]):
max_values[category] = (number, dct[category])
output = [(number, category, value) for (category, (number, value)) in max_values.items()]
print (output)
Output:
[('2', 'a', '0.7'), ('1', 'd', '0.5'), ('2', 'b', '0.9'), ('3', 'c', '3')]
Not exactly in the order you expected them, but the values are correct. It's not the most elegant solution, though.
I iterate a second time in dict to compare values.
values = []
for key in d:
for skey in d[key]:
max = 0
_key_ = ''
for _ in d:
if d[_].has_key(skey):
if d[_][skey]>max:
max = d[_][skey]
_key_ = _
if (_key_, skey, max) not in values:
values.append((_key_, skey, max))
print values