Pandas DataFrame to JSON with multiple nested categories - python

I'm looking for a solution to convert a Pandas DataFrame with 3 subcategories to an JSON output without any lists.
This is the data-structure I got:
import pandas as pd
df = pd.DataFrame({
'cat1': ['A', 'A', 'A', 'B', 'B', 'C', 'C', 'C'],
'cat2': ['BB', 'BB', 'BC', 'BB', 'BB', 'BB', 'BC', 'BC'],
'cat3': ['CC', 'CC', 'CD', 'CD', 'CD', 'CC', 'CD', 'CE'],
'prod': ['P1', 'P2', 'P3', 'P1', 'P4', 'P1', 'P3','P6'],
'amount': [132, 51, 12, 421, 55, 11, 123, 312]
})
And this is the desired JSON output:
{
"A": {
"BB": {
"CC": {
"P1": 132,
"P2": 51
}
},
"BC": {
"CD": {
"P3": 12
}
}
},
"B": {
"BB": {
"CD": {
"P1": 421,
"P4": 55
}
}
},
"C": {
"BB": {
"CC": {
"P1": 11
}
},
"BC": {
"CD": {
"P3": 123
},
"CE": {
"P6": 312
}
}
}
I had several attemts with the to_json method and tried to adapt this answer without any luck.
def f(x):
return dict({k:v for k,v in zip(x.prod, x.amount)})
(
df.groupby(['cat1','cat2', 'cat3'])
.apply(f)
.groupby(level=0)
.apply(lambda x: x.tolist())
.to_dict()
)
TypeError: zip argument #1 must support iteration

Credit to DSM but I modified the .ix to .iloc
def recur_dictify(frame):
if len(frame.columns) == 1:
if frame.values.size == 1: return frame.values[0][0]
return frame.values.squeeze()
grouped = frame.groupby(frame.columns[0])
d = {k: recur_dictify(g.iloc[:,1:]) for k,g in grouped}
return d
recur_dictify(df)
Out[211]:
{'A': {'BB': {'CC': {'P1': 132, 'P2': 51}}, 'BC': {'CD': {'P3': 12}}},
'B': {'BB': {'CD': {'P1': 421, 'P4': 55}}},
'C': {'BB': {'CC': {'P1': 11}}, 'BC': {'CD': {'P3': 123}, 'CE': {'P6': 312}}}}

Related

group few lists of dictionary by key

lst1 = [
{"id": "A", "a": "one"},
{"id": "B", "b": "two"}
]
lst2 = [
{"id": "A", "a1": "Three"},
{"id": "B", "b1": "Four"},
{"id": "C", "c1": "Four"}
]
lst3 = [
{"id": "A", "c1": "Five"},
{"id": "B", "d1": "Six"}
]
a = lst1+lst2+lst3
res = [
{'id': 'A', 'a': 'one'},
{'id': 'B', 'b': 'two'},
{'id': 'A', 'a1': 'Three'},
{'id': 'B', 'b1': 'Four'},
{'id': 'C', 'c1': 'Four'},
{'id': 'A', 'c1': 'Five'},
{'id': 'B', 'd1': 'Six'}
]
I want to group by Id the res will look like this
res = [
{'id': 'A', 'a': 'one','a1': 'Three','c1': 'Five'},
{'id': 'B', 'b': 'two', 'b1': 'Four', 'd1': 'Six'},
{'id': 'C', 'c1': 'Four'},
]
What I have tried:
result = []
for l1, l2,l3 in zip(lst1, lst2,lst3):
result.append({**l1 , **l2 , **l3})
print(result)
Here's one approach:
arr = [
{'id': 'A', 'a': 'one'},
{'id': 'B', 'b': 'two'},
{'id': 'A', 'a1': 'Three'},
{'id': 'B', 'b1': 'Four'},
{'id': 'C', 'c1': 'Four'},
{'id': 'A', 'c1': 'Five'},
{'id': 'B', 'd1': 'Six'}
]
res_dict = {d['id']:{'id':d['id']} for d in arr}
for d in arr:
res_dict[d['id']].update(d)
res = list(res_dict.values())
The resulting list res:
[{'id': 'A', 'a': 'one', 'a1': 'Three', 'c1': 'Five'},
{'id': 'B', 'b': 'two', 'b1': 'Four', 'd1': 'Six'},
{'id': 'C', 'c1': 'Four'}]
def coalesce(updates: list[dict[str,str]], join_key):
res = {}
for upd in updates:
for rec in upd:
key = rec[join_key]
if not key in res:
res[key] = {join_key: key}
res[key].update(rec)
return res.values()
recs = [lst1, lst2, lst3]
for value in coalesce(recs, "id"):
print(value)

converting an unordered nested json into a specific format

I have a json like this :
[
{
"a": 1,
"b": 2,
"c": 3,
"d": 4,
"more": { "sd": 2, "fg": 5, "ef": 6 },
"s": 6,
"as": 8,
"dfd": [{ "sda": "something", "SX": ["CB"], "jhgjh": "", "das": "" }]
},
{
"a3": 1,
"b": 2,
"c": 3,
"ds": 4,
"more": { "sd": 32, "fg": 5, "3ef": 6 },
"s": 6,
"as": 38,
"dfd": [
{ "sda": "somethinDDDg", "SX": ["CDB"], "jhgjh": "1", "das": "wes" }
]
}
]
I am trying to loop over all levels and convert the json into a specific format. I json can have any number of key value pairs and levels. Key value pairs can be different for each json entry.
I tried using for loop on key value pair and was able to get it converted into the required format on first level of json. I am unable to expand it to any level (where level is not fixed).
Expected output :
'data' : [{
{
'name' : 'a',
'value':1
},
{
'name' : 'b',
'value': 2
},
{
'name' : 'c',
'value': 3
},
{
'name' : 'd',
'value': 4
},
{
'name' : 'more',
'value': {'name' :'sd',
'value': 2},
{'name' : 'fg',
'value' : 5},
{'name':'ef',
'value':6},
{
'name' : 's',
'value': 6
},
{
'name' : 'as',
'value':
},
{
'name' : 'dfd',
'value': [ {"sda": "something",
"SX":["CB"],
"jhgjh": "",
"das": ''
}]
},
{{
'name' : 'a3',
'value':1
},
{
'name' : 'b',
'value': 2
},
{
'name' : 'c',
'value': 3
},
{
'name' : 'ds',
'value': 4
},
{
'name' : 'more',
'value': {'name' :'sd',
'value': 32},
{'name' : 'fg',
'value' : 5},
{'name':'3ef',
'value':6},
{
'name' : 's',
'value': 6
},
{
'name' : 'as',
'value': 38
},
{
'name' : 'dfd',
'value': [ {"sda": "somethinDDDg",
"SX":["CDB"],
"jhgjh": "1",
"das": 'wes'
}]
}}]
Okay, I think I understand now what you want. Using a recursive helper function:
def names_and_values(d):
if type(d) != dict:
return d
return [{"name": key, "value": names_and_values(value)} for key, value in d.items()]
And the json module from the standard library:
import json
data = json.loads(your_json_string)
converted_data = [names_and_values(d) for d in data]
Output:
[[{'name': 'a', 'value': 1},
{'name': 'b', 'value': 2},
{'name': 'c', 'value': 3},
{'name': 'd', 'value': 4},
{'name': 'more',
'value': [{'name': 'sd', 'value': 2},
{'name': 'fg', 'value': 5},
{'name': 'ef', 'value': 6}]},
{'name': 's', 'value': 6},
{'name': 'as', 'value': 8},
{'name': 'dfd',
'value': [{'sda': 'something',
'SX': ['CB'],
'jhgjh': '',
'das': ''}]}],
[{'name': 'a3', 'value': 1},
{'name': 'b', 'value': 2},
{'name': 'c', 'value': 3},
{'name': 'ds', 'value': 4},
{'name': 'more',
'value': [{'name': 'sd', 'value': 32},
{'name': 'fg', 'value': 5},
{'name': '3ef', 'value': 6}]},
{'name': 's', 'value': 6},
{'name': 'as', 'value': 38},
{'name': 'dfd',
'value': [{'sda': 'somethinDDDg',
'SX': ['CDB'],
'jhgjh': '1',
'das': 'wes'}]}]]

Build nested Python dictionary from flatten dictionary

I have the following flatten dictionary containing one entry for each item and each item contains a parent and children attribute.
{
'a': {
parent: None,
children: ['b', 'c', 'd']
},
'b': {
parent: 'a',
children: ['e', 'f', 'g']
},
'c': {
parent: 'a',
children: []
},
'd': {
parent: 'a',
children: []
},
'e': {
parent: 'b',
children: []
},
'f': {
parent: 'b',
children: ['h']
},
'g': {
parent: 'b',
children: []
},
'h': {
parent: 'f',
children: []
},
}
How can I turn it into a nested dictionary which looks like this?
{
'a': {
'b': {
'e': {},
'f': {
'h':
}
'g': {}
},
'c': {},
'd': {},
}
}
You can use recursion:
d = {'a': {'parent': None, 'children': ['b', 'c', 'd']}, 'b': {'parent': 'a', 'children': ['e', 'f', 'g']}, 'c': {'parent': 'a', 'children': []}, 'd': {'parent': 'a', 'children': []}, 'e': {'parent': 'b', 'children': []}, 'f': {'parent': 'b', 'children': ['h']}, 'g': {'parent': 'b', 'children': []}, 'h': {'parent': 'f', 'children': []}}
def group(start=None):
return {a:group(a) for a, b in d.items() if b['parent'] == start}
import json
print(json.dumps(group(), indent=4))
Output:
{
"a": {
"b": {
"e": {},
"f": {
"h": {}
},
"g": {}
},
"c": {},
"d": {}
}
}

Get list of common dictionaries from List of dictionaries on one key value match

I am trying to merge the list of dictionaries inside a list based on a value of key "host". The sample input for the same looks like:
first = [{'host': '1', 'a': 'a', 'b': 'b'}, {'host': '2', 'a': 'c', 'd': 'd'}, {'host': '3', 'a': 'd', 'd': 'd'}]
second = [{'host': '1', 'a': 'w', 'b': 'e'}, {'host': '2', 'a': 'q', 'd': 's'}, {'host': '3', 'a': 'q', 'd': 'c'}]
third= [{'host': '1', 'a': 'r', 'b': 't'}, {'host': '2', 'a': 'f', 'd': 'b'}, {'host': '3', 'a': 'k', 'd': 'p'}]
I am trying to get output like this
final_list = {
"1": [
{ "host": "1", "a": "a", "b": "b" },
{ "host": "1", "a": "w", "b": "e" },
{ "host": "1", "a": "r", "b": "t" }
],
"2": [
{ "host": "2", "a": "c", "d": "d" },
{ "host": "2", "a": "q", "d": "s" },
{ "host": "2", "a": "f", "d": "b" }
],
"3": [
{ "host": "3", "a": "d", "d": "d" },
{ "host": "3", "a": "q", "d": "c" },
{ "host": "3", "a": "k", "d": "p" }
]
}
You want to zip them pairwise and then use enumerate to annotate the pairs starting at 1.
final_list = {str(num): [a, b, c] for num, (a, b, c) in
enumerate(zip(first, second, third), start = 1)}
>>> final_list
{'1': [{'a': 'a', 'b': 'b', 'host': '1'},
{'a': 'w', 'b': 'e', 'host': '1'},
{'a': 'r', 'b': 't', 'host': '1'}],
'2': [{'a': 'c', 'd': 'd', 'host': '2'},
{'a': 'q', 'd': 's', 'host': '2'},
{'a': 'f', 'd': 'b', 'host': '2'}],
'3': [{'a': 'd', 'd': 'd', 'host': '3'},
{'a': 'q', 'd': 'c', 'host': '3'},
{'a': 'k', 'd': 'p', 'host': '3'}]}
I gathered your List for simpler use and you can do that:
first = [{'host': '1', 'a': 'a', 'b': 'b'}, {'host': '2', 'a': 'c', 'd': 'd'}, {'host': '3', 'a': 'd', 'd': 'd'}]
second = [{'host': '1', 'a': 'w', 'b': 'e'}, {'host': '2', 'a': 'q', 'd': 's'}, {'host': '3', 'a': 'q', 'd': 'c'}]
third= [{'host': '1', 'a': 'r', 'b': 't'}, {'host': '2', 'a': 'f', 'd': 'b'}, {'host': '3', 'a': 'k', 'd': 'p'}]
final_dict = {}
allList = [first, second, third]
for aList in allList:
for aDict in aList:
if aDict["host"] not in final_dict.keys():
final_dict[aDict["host"]] = [aDict]
else:
final_dict[aDict["host"]].append([aDict])
print(final_dict)

Nested Dicts in Python keys and values

I got a dict of dicts which looks like this:
d {
1: {
a: 'aaa',
b: 'bbb',
c: 'ccc'
}
2: {
d: 'dddd',
a: 'abc',
c: 'cca'
}
3: {
e: 'eee',
a: 'ababa',
b: 'bebebe'
}
}
I want to convert by dict like this
d {
a: 1,2,3
b: 1,3
c: 1,2
d: 2
e: 3
}
How can I achieve this?I tried reversing it but it throws unhashable dict.
a = {
1: {
"a": "aaa",
"b": "bbb",
"c": "ccc"
},
2: {
"d": "ddd",
"a": "abc",
"c": "cca"
},
3: {
"e": "eee",
"a": "ababa",
"b": "bebebe"
}
}
from collections import defaultdict
b = defaultdict(list)
for i, v in a.items():
for j in v:
b[j].append(i)
The result b is:
defaultdict(<class 'list'>, {'a': [1, 2, 3], 'b': [1, 3], 'c': [1, 2], 'd': [2], 'e': [3]})
You just need to figure out the logic for it. Iterate through the main dictionary, and use the keys of the sub dictionaries to build your new dict.
d = {
1: {
'a': 'aaa',
'b': 'bbb',
'c': 'ccc'
},
2: {
'd': 'dddd',
'a': 'abc',
'c': 'cca'
},
3: {
'e': 'eee',
'a': 'ababa',
'b': 'bebebe'
}
}
newdict = {}
for k,v in d.items():
for keys in v:
newdict.setdefault(keys,[]).append(k)
print(newdict)

Categories

Resources