Convert pandas df to nested dictionary - python

I have a requirement to convert a df that is in following format:
d = {
'A': ['a1', 'a1', 'a1', 'a1', 'a1', 'a1', 'a1', 'a2', 'a2', 'a2', 'a2', 'a2', 'a2', 'a2', 'a2'],
'B': ['b1', 'b1', 'b1', 'b1', 'b2', 'b2', 'b2', 'b3', 'b3', 'b3', 'b3', 'b3', 'b3', 'b4', 'b4', ],
'C': ['c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9', 'c10', 'c11', 'c12', 'c13', 'c14', 'c15', ],
'D': ['d1', 'd2', 'd3', 'd4', 'd5', 'd6', 'd7', 'd8', 'd9', 'd10', 'd11', 'd12', 'd13', 'd14', 'd15', ],
'E': ['e1', 'e2', 'e3', 'e4', 'e5', 'e6', 'e7', 'e8', 'e9', 'e10', 'e11', 'e12', 'e13', 'e14', 'e15', ],
}
df = pd.DataFrame(d)
df
A B C D E
a1 b1 c1 d1 e1
a1 b1 c2 d2 e2
a1 b1 c3 d3 e3
a1 b1 c4 d4 e4
a1 b2 c5 d5 e5
a1 b2 c6 d6 e6
a1 b2 c7 d7 e7
a2 b3 c8 d8 e8
a2 b3 c9 d9 e9
a2 b3 c10 d10 e10
a2 b3 c11 d11 e11
a2 b3 c12 d12 e12
a2 b3 c13 d13 e13
a2 b4 c14 d14 e14
a2 b4 c15 d15 e15
to a dictionary in following format:
outDict = {
'a1': {
'b1': {
'c': ['c1', 'c2', 'c3', 'c4'],
'd': ['d1', 'd2', 'd3', 'd4'],
'e': ['e1', 'e2', 'e3', 'e4'],
},
'b2': {
'c': ['c5', 'c6', 'c7'],
'd': ['d5', 'd6', 'd7'],
'e': ['e5', 'e6', 'e7'],
},
},
'a2': {
'b3': {
'c': ['c8', 'c9', 'c10', 'c11', 'c12', 'c13'],
'd': ['d8', 'd9', 'd10', 'd11', 'd12', 'd13'],
'e': ['e8', 'e9', 'e10', 'e11', 'e12', 'e13'],
},
'b4': {
'c': ['c14', 'c15'],
'd': ['d14', 'd15'],
'e': ['e14', 'e15'],
}
}
}
i.e. convert values in column A to first level keys; values in column B to second level keys and values in column C,D,E to lists.

First create nested lists by convert A, B to index, group by index values and all columns convert to lists in lambda function, last convert Series with MultiIndex to nested dictionary:
df = (df.set_index(['A', 'B'])
.groupby(['A', 'B'])
.apply(lambda x: x.to_dict(orient='list')))
d = {level: df.xs(level).to_dict() for level in df.index.levels[0]}
print (d)
{
'a1': {
'b1': {
'C': ['c1', 'c2', 'c3', 'c4'],
'D': ['d1', 'd2', 'd3', 'd4'],
'E': ['e1', 'e2', 'e3', 'e4']
},
'b2': {
'C': ['c5', 'c6', 'c7'],
'D': ['d5', 'd6', 'd7'],
'E': ['e5', 'e6', 'e7']
}
},
'a2': {
'b3': {
'C': ['c8', 'c9', 'c10', 'c11', 'c12', 'c13'],
'D': ['d8', 'd9', 'd10', 'd11', 'd12', 'd13'],
'E': ['e8', 'e9', 'e10', 'e11', 'e12', 'e13']
},
'b4': {
'C': ['c14', 'c15'],
'D': ['d14', 'd15'],
'E': ['e14', 'e15']
}
}
}
If need nested keys in lowercases only rename columns:
df = df.rename(columns={'C':'c', 'D':'d', 'E':'e'})
df = (df.set_index(['A', 'B'])
.groupby(['A', 'B'])
.apply(lambda x: x.to_dict(orient='list')))
d = {level: df.xs(level).to_dict() for level in df.index.levels[0]}
print (d)
{
'a1': {
'b1': {
'c': ['c1', 'c2', 'c3', 'c4'],
'd': ['d1', 'd2', 'd3', 'd4'],
'e': ['e1', 'e2', 'e3', 'e4']
},
'b2': {
'c': ['c5', 'c6', 'c7'],
'd': ['d5', 'd6', 'd7'],
'e': ['e5', 'e6', 'e7']
}
},
'a2': {
'b3': {
'c': ['c8', 'c9', 'c10', 'c11', 'c12', 'c13'],
'd': ['d8', 'd9', 'd10', 'd11', 'd12', 'd13'],
'e': ['e8', 'e9', 'e10', 'e11', 'e12', 'e13']
},
'b4': {
'c': ['c14', 'c15'],
'd': ['d14', 'd15'],
'e': ['e14', 'e15']
}
}
}

If the purpose is just to access by keys in A and B, I actually think the output from a group by with tuple keys is sufficient.
d = df.groupby(['A','B']).agg({'C':list,'D':list,'E':list}).to_dict(orient = 'index')
d
>>
{('a1', 'b1'): {'C': ['c1', 'c2', 'c3', 'c4'],
'D': ['d1', 'd2', 'd3', 'd4'],
'E': ['e1', 'e2', 'e3', 'e4']},
('a1', 'b2'): {'C': ['c5', 'c6', 'c7'],
'D': ['d5', 'd6', 'd7'],
'E': ['e5', 'e6', 'e7']},
('a2', 'b3'): {'C': ['c8', 'c9', 'c10', 'c11', 'c12', 'c13'],
'D': ['d8', 'd9', 'd10', 'd11', 'd12', 'd13'],
'E': ['e8', 'e9', 'e10', 'e11', 'e12', 'e13']},
('a2', 'b4'): {'C': ['c14', 'c15'], 'D': ['d14', 'd15'], 'E': ['e14', 'e15']}}
I prefer this way because is is less nested, however, you can easily rebuild the dict to fit your requirements:
new_dic = {}
for k,v in d.items():
new_dic.setdefault(k[0],{})
new_dic[k[0]] .update({k[1]:{_k.lower():_v for _k,_v in v.items()}})
new_dict
>>
{'a1': {'b1': {'c': ['c1', 'c2', 'c3', 'c4'],
'd': ['d1', 'd2', 'd3', 'd4'],
'e': ['e1', 'e2', 'e3', 'e4']},
'b2': {'c': ['c5', 'c6', 'c7'],
'd': ['d5', 'd6', 'd7'],
'e': ['e5', 'e6', 'e7']}},
'a2': {'b3': {'c': ['c8', 'c9', 'c10', 'c11', 'c12', 'c13'],
'd': ['d8', 'd9', 'd10', 'd11', 'd12', 'd13'],
'e': ['e8', 'e9', 'e10', 'e11', 'e12', 'e13']},
'b4': {'c': ['c14', 'c15'], 'd': ['d14', 'd15'], 'e': ['e14', 'e15']}}}
In most cases it is recommended to keep your data structure as flat as possible.

Use the following code:
outDict = { key: { key2: grp2.to_dict(orient='list')
for key2, grp2 in grp.groupby(level=1) }
for key, grp in df.set_index(['A', 'B']).groupby(level=0) }
The result, after some "pretty printing" reformatting is:
{
'a1': {
'b1': {
'C': ['c1', 'c2', 'c3', 'c4'],
'D': ['d1', 'd2', 'd3', 'd4'],
'E': ['e1', 'e2', 'e3', 'e4']
},
'b2': {
'C': ['c5', 'c6', 'c7'],
'D': ['d5', 'd6', 'd7'],
'E': ['e5', 'e6', 'e7']}},
'a2': {
'b3': {
'C': ['c8', 'c9', 'c10', 'c11', 'c12', 'c13'],
'D': ['d8', 'd9', 'd10', 'd11', 'd12', 'd13'],
'E': ['e8', 'e9', 'e10', 'e11', 'e12', 'e13']
},
'b4': {
'C': ['c14', 'c15'],
'D': ['d14', 'd15'],
'E': ['e14', 'e15']
}
}
}

Related

How do i sort nested dictionary based on value of first key

I have a dictionary in following format:
{
'Item1': {'Cl': ['E1', 'E2', 'E3'], 'Re': ['E1', 'E2', 'E3']},
'Item2': {'Cl': ['E1', 'E2', 'E3'], 'Re': ['E1', 'E2', 'E3']},
'Item3': {'Cl': ['E1', 'E2', 'E3'], 'Re': ['E1', 'E2', 'E3']},
'Item4': {'Cl': ['E2', 'E1', 'E3'], 'Re': ['E1', 'E2', 'E3']}
}
And i want to restructure in the following format:
{
'Item1': {'Re': ['E1', 'E2', 'E3'], 'Cl': ['E1', 'E2', 'E3']},
'Item2': {'Re': ['E1', 'E2', 'E3'], 'Cl': ['E1', 'E2', 'E3']},
'Item3': {'Re': ['E1', 'E2', 'E3'], 'Cl': ['E1', 'E2', 'E3']},
'Item4': {'Re': ['E2', 'E1', 'E3'], 'Cl': ['E1', 'E2', 'E3']}
}
I have tried sorted() but it doesn't seem to work or maybe i'm not implementing it in the correct way.
You can use a dictionary comprehension. I am assuming here that you want to sort the keys by reverse lexicographic order:
{k:{k2:v[k2] for k2 in sorted(v, reverse=True)} for k,v in d.items()}
output:
{'Item1': {'Re': ['E1', 'E2', 'E3'], 'Cl': ['E1', 'E2', 'E3']},
'Item2': {'Re': ['E1', 'E2', 'E3'], 'Cl': ['E1', 'E2', 'E3']},
'Item3': {'Re': ['E1', 'E2', 'E3'], 'Cl': ['E1', 'E2', 'E3']},
'Item4': {'Re': ['E1', 'E2', 'E3'], 'Cl': ['E2', 'E1', 'E3']}}
If you want to push 'Re' to the beginning, and keep the rest in the same order (assuming more than 2 keys), you could do:
{k:{k2:v[k2] for k2 in sorted(v, key='Re'.__ne__)} for k,v in d.items()}

Will output change if right_index is used instead of left_index with both left_on and right_on defined in pandas dataframe

I have two dataframes
import pandas as pd
df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
'B': ['B0', 'B1', 'B2', 'B3'],
'C': ['C0', 'C1', 'C2', 'C3'],
'D': ['D0', 'D1', 'D2', 'D3']},index=[0, 1, 2, 3])
df2 = pd.DataFrame({'A': ['A0', 'A5', 'A6', 'A7'],
'B': ['B1', 'B5', 'B6', 'B7'],
'C': ['A1', 'C5', 'C6', 'C7'],
'D': ['B1', 'D5', 'D6', 'D7']},index=[4, 5, 6, 7])
Output I
pd.merge(df1, df2, how='outer', left_index=True, left_on='A', right_on='A')
Output II
pd.merge(df1, df2, how='outer', right_index=True, left_on='A', right_on='A')
Why do these above two outputs differ? Basically I want clarification regarding the functionality of right_index and left_index?

Concatenation of a variant number of keys of a dictionary Python (recursion?)

Hello Stackoverlow members,
I'm trying to concatenate keys (string) on a hand, and values (list) on the other hand, of a dictionnary.
For your better understanding, here is what I have at the beginning:
dict = {'bk1':
{'k11': ['a1', 'b1', 'c1'],
'k12': ['a2', 'b2', 'c2']},
'bk2':
{'k21': ['d1', 'e1'],
'k22': ['d2', 'e2'],
'k23': ['d3', 'e3']},
'bk3':
{'k31': ['f1', 'g1', 'h1'],
'k32': ['f2', 'g2', 'h2']}
}
And here is what I would like at the end:
newdict = {'k11_k21_k31': ['a1', 'b1', 'c1', 'd1', 'e1', 'f1', 'g1', 'h1'],
'k11_k21_k32': ['a1', 'b1', 'c1', 'd1', 'e1', 'f2', 'g2', 'h2'],
'k11_k22_k31': ['a1', 'b1', 'c1', 'd2', 'e2', 'f1', 'g1', 'h1'],
'k11_k22_k32': ['a1', 'b1', 'c1', 'd2', 'e2', 'f2', 'g2', 'h2'],
'k11_k23_k31': ['a1', 'b1', 'c1', 'd3', 'e3', 'f1', 'g1', 'h1'],
'k11_k23_k32': ['a1', 'b1', 'c1', 'd3', 'e3', 'f2', 'g2', 'h2'],
'k12_k21_k31': ['a2', 'b2', 'c2', 'd1', 'e1', 'f1', 'g1', 'h1'],
'k12_k21_k32': ['a2', 'b2', 'c2', 'd1', 'e1', 'f2', 'g2', 'h2'],
'k12_k22_k31': ['a2', 'b2', 'c2', 'd2', 'e2', 'f1', 'g1', 'h1'],
'k12_k22_k32': ['a2', 'b2', 'c2', 'd2', 'e2', 'f2', 'g2', 'h2'],
'k12_k23_k31': ['a2', 'b2', 'c2', 'd3', 'e3', 'f1', 'g1', 'h1'],
'k12_k23_k32': ['a2', 'b2', 'c2', 'd3', 'e3', 'f2', 'g2', 'h2']}
I wish to do that with:
a variant number of "big key" (bki), and for each bki, a variant number of key (kij).
"Full combination" between "big keys". For example, I don't expect results like:
{'k11_k23': ['a1', 'b1', 'c1', 'd3', 'e3']}
where the "bk3" is missed.
I tried with imbricated "for" loops but the number of loops is depending on the number of "big keys"...
Then, I felt that the problem could be solved with recursion (maybe?), but in spite of my research and my will to implement it, I failed.
Any help with "recursive or not" solution would be strongly appreciated.
Thank you,
Mat
Whoaa, what a reactivity!
Thanks a lot for all your quick answers, it works perfect!
As suggested by #jksnw in the comments, you can use itertools.product to do this:
import itertools
dct = {
'bk1': {
'k11': ['a1', 'b1', 'c1'],
'k12': ['a2', 'b2', 'c2']
},
'bk2':{
'k21': ['d1', 'e1'],
'k22': ['d2', 'e2'],
'k23': ['d3', 'e3']
},
'bk3': {
'k31': ['f1', 'g1', 'h1'],
'k32': ['f2', 'g2', 'h2']
}
}
big_keys = dct.keys()
small_keys = (dct[big_key].keys() for big_key in big_keys)
res = {}
for keys_from_each in itertools.product(*small_keys):
key = "_".join(keys_from_each)
value = []
for big_key, small_key in zip(big_keys, keys_from_each):
value.extend(dct[big_key][small_key])
res[key] = value
So that:
>>> res
{'k11_k21_k31': ['a1', 'b1', 'c1', 'd1', 'e1', 'f1', 'g1', 'h1'],
'k11_k21_k32': ['a1', 'b1', 'c1', 'd1', 'e1', 'f2', 'g2', 'h2'],
'k11_k22_k31': ['a1', 'b1', 'c1', 'd2', 'e2', 'f1', 'g1', 'h1'],
'k11_k22_k32': ['a1', 'b1', 'c1', 'd2', 'e2', 'f2', 'g2', 'h2'],
'k11_k23_k31': ['a1', 'b1', 'c1', 'd3', 'e3', 'f1', 'g1', 'h1'],
'k11_k23_k32': ['a1', 'b1', 'c1', 'd3', 'e3', 'f2', 'g2', 'h2'],
'k12_k21_k31': ['a2', 'b2', 'c2', 'd1', 'e1', 'f1', 'g1', 'h1'],
'k12_k21_k32': ['a2', 'b2', 'c2', 'd1', 'e1', 'f2', 'g2', 'h2'],
'k12_k22_k31': ['a2', 'b2', 'c2', 'd2', 'e2', 'f1', 'g1', 'h1'],
'k12_k22_k32': ['a2', 'b2', 'c2', 'd2', 'e2', 'f2', 'g2', 'h2'],
'k12_k23_k31': ['a2', 'b2', 'c2', 'd3', 'e3', 'f1', 'g1', 'h1'],
'k12_k23_k32': ['a2', 'b2', 'c2', 'd3', 'e3', 'f2', 'g2', 'h2']}
Here, itertools.product is used to get a list of the "small keys" that we take from each block:
>>> big_keys = dct.keys()
>>> small_keys = (dct[big_key].keys() for big_key in big_keys)
>>> list(itertools.product(*small_keys))
[('k12', 'k22', 'k31'),
('k12', 'k22', 'k32'),
('k12', 'k23', 'k31'),
('k12', 'k23', 'k32'),
('k12', 'k21', 'k31'),
('k12', 'k21', 'k32'),
('k11', 'k22', 'k31'),
('k11', 'k22', 'k32'),
('k11', 'k23', 'k31'),
('k11', 'k23', 'k32'),
('k11', 'k21', 'k31'),
('k11', 'k21', 'k32')]
You can use itertools.product, and reduce(lambda x,y:x+y,i) to flatten your nested lists , also do not use dict or other python built-in types name or keywords as your variables name (i used d) :
>>> from itertools import product
>>> v=[i.values() for i in d.values()]
>>> v=[reduce(lambda x,y:x+y,i) for i in product(*v)]
>>> k=[i.keys() for i in d.values()]
>>> k=['_'.join(i) for i in product(*k)]
>>> {k:v for k,v in zip(k,v)}
{'k31_k12_k22': ['f1', 'g1', 'h1', 'a2', 'b2', 'c2', 'd2', 'e2'],
'k32_k12_k21': ['f2', 'g2', 'h2', 'a2', 'b2', 'c2', 'd1', 'e1'],
'k31_k11_k22': ['f1', 'g1', 'h1', 'a1', 'b1', 'c1', 'd2', 'e2'],
'k31_k12_k23': ['f1', 'g1', 'h1', 'a2', 'b2', 'c2', 'd3', 'e3'],
'k32_k12_k22': ['f2', 'g2', 'h2', 'a2', 'b2', 'c2', 'd2', 'e2'],
'k31_k12_k21': ['f1', 'g1', 'h1', 'a2', 'b2', 'c2', 'd1', 'e1'],
'k32_k11_k23': ['f2', 'g2', 'h2', 'a1', 'b1', 'c1', 'd3', 'e3'],
'k32_k12_k23': ['f2', 'g2', 'h2', 'a2', 'b2', 'c2', 'd3', 'e3'],
'k31_k11_k21': ['f1', 'g1', 'h1', 'a1', 'b1', 'c1', 'd1', 'e1'],
'k31_k11_k23': ['f1', 'g1', 'h1', 'a1', 'b1', 'c1', 'd3', 'e3'],
'k32_k11_k21': ['f2', 'g2', 'h2', 'a1', 'b1', 'c1', 'd1', 'e1'],
'k32_k11_k22': ['f2', 'g2', 'h2', 'a1', 'b1', 'c1', 'd2', 'e2']}

How can i make the list form list of tuples

I have this
d = \
[('a', {'b': 'c1', 'd': 'f1'}),
('a', {'bb': 'c2', 'dd': 'f2'}),
('a', {'bbb': 'c3', 'ddd': 'f3'})]
I want the ouput like this
['c1', 'f1', 'f2', 'c2', 'c3', 'f3']
I have tried this
In [51]: [a.values() for k,a in d]
Out[51]: [['c1', 'f1'], ['f2', 'c2'], ['c3', 'f3']]
I want to do that simplest and shortest possible way
>>> d = \
[('a', {'b': 'c1', 'd': 'f1'}),
('a', {'bb': 'c2', 'dd': 'f2'}),
('a', {'bbb': 'c3', 'ddd': 'f3'})]
>>> [y for x in d for y in x[1].values()]
['c1', 'f1', 'f2', 'c2', 'c3', 'f3']
You can use itertools.chain:
>>> d=[('a', {'b': 'c1', 'd': 'f1'}),
('a', {'bb': 'c2', 'dd': 'f2'}),
('a', {'bbb': 'c3', 'ddd': 'f3'})]
>>> from itertools import chain
>>> list(chain.from_iterable( x[1].values() for x in d ))
['c1', 'f1', 'f2', 'c2', 'c3', 'f3']
Just an alternative answer using reduce:
import operator
reduce(operator.add,(a.values() for k,a in d))
Maybe not the best idea, but it works. Essentially equivalent to Blender's
sum([a.values() for k, a in d], [])

List of Lists to List of Dictionaries

How can I convert a list of lists into a list of dictionaries?
More specifiicaly: How do I go from this:
[['a1', 'b1', 'c1', 'd1', 'e1', 'f1', 'g1', 'h1', 'i1'], ['a2', 'b2', 'c2', 'd2', 'e2', 'f2', 'g2', 'h2', 'i2'], ['a3', 'b3', 'c3', 'd3', 'e3', 'f3', 'g3', 'h3', 'i3'], ['a4', 'b4', 'c4', 'd4', 'e4', 'f4', 'g4', 'h4', 'i4'], ['a5', 'b5', 'c5', 'd5', 'e5', 'f5', 'g5', 'h5', 'i5'], ['a6', 'b6', 'c6', 'd6', 'e6', 'f6', 'g6', 'h6', 'i6'], ['a7', 'b7', 'c7', 'd7', 'e7', 'f7', 'g7', 'h7', 'i7'], ['a8', 'b8', 'c8', 'd8', 'e8', 'f8', 'g8', 'h8', 'i8'], ['a9', 'b9', 'c9', 'd9', 'e9', 'f9', 'g9', 'h9', 'i9']]
to this:
[{'a1': None, 'b1': None, 'c1': None, 'd1': None, 'e1': None, 'f1': None, 'g1': None, 'h1': None, 'i1': None}, #etc
In [20]: l = [['a1', 'b1', 'c1', 'd1', 'e1', 'f1', 'g1', 'h1', 'i1'], ['a2', 'b2', 'c2', 'd2', 'e2', 'f2', 'g2', 'h2', 'i2'], ['a3', 'b3', 'c3', 'd3', 'e3', 'f3', 'g3', 'h3', 'i3'], ['a4', 'b4', 'c4', 'd4', 'e4', 'f4', 'g4', 'h4', 'i4'], ['a5', 'b5', 'c5', 'd5', 'e5', 'f5', 'g5', 'h5', 'i5'], ['a6', 'b6', 'c6', 'd6', 'e6', 'f6', 'g6', 'h6', 'i6'], ['a7', 'b7', 'c7', 'd7', 'e7', 'f7', 'g7', 'h7', 'i7'], ['a8', 'b8', 'c8', 'd8', 'e8', 'f8', 'g8', 'h8', 'i8'], ['a9', 'b9', 'c9', 'd9', 'e9', 'f9', 'g9', 'h9', 'i9']]
In [21]: map(dict.fromkeys, l)
Out[21]:
[{'a1': None,
'b1': None,
'c1': None,
'd1': None,
'e1': None,
'f1': None,
'g1': None,
'h1': None,
'i1': None},
{'a2': None,
'b2': None,
'c2': None,
'd2': None,
...
This will work with any iterable of iterables, not just a list of lists (provided, of course, that the second-level elements are hashable).
In Python 2, the above code returns a list.
In Python 3, it returns an iterable. If you require a list, you could use list(map(dict.fromkeys, l)).
Try this:
l = [['a1', 'b1', 'c1', 'd1', 'e1', 'f1', 'g1', 'h1', 'i1'], ['a2', 'b2', 'c2', 'd2', 'e2', 'f2', 'g2', 'h2', 'i2'], ['a3', 'b3', 'c3', 'd3', 'e3', 'f3', 'g3', 'h3', 'i3'], ['a4', 'b4', 'c4', 'd4', 'e4', 'f4', 'g4', 'h4', 'i4'], ['a5', 'b5', 'c5', 'd5', 'e5', 'f5', 'g5', 'h5', 'i5'], ['a6', 'b6', 'c6', 'd6', 'e6', 'f6', 'g6', 'h6', 'i6'], ['a7', 'b7', 'c7', 'd7', 'e7', 'f7', 'g7', 'h7', 'i7'], ['a8', 'b8', 'c8', 'd8', 'e8', 'f8', 'g8', 'h8', 'i8'], ['a9', 'b9', 'c9', 'd9', 'e9', 'f9', 'g9', 'h9', 'i9']]
res = []
for line in l:
res.append(dict((k, None) for k in line))
OR:
res = [dict((k, None) for k in line) for line in l]

Categories

Resources