Pandas Dataframe to JSON Hierarchy - python

I have exhaustively reviewed/attempted implementations all the other questions on SO corresponding to this challenge and have yet to reach a solution.
Question: how do I convert employee and supervisor pairs into a hierarchical JSON structure to be used for a D3 visualization? There are an unknown number of levels, so it has to be dynamic.
I have a dataframe with five columns (yes, I realize this isn't the actual hierarchy of The Office):
Employee_FN Employee_LN Supervisor_FN Supervisor_LN Level
0 Michael Scott None None 0
1 Jim Halpert Michael Scott 1
2 Dwight Schrute Michael Scott 1
3 Stanley Hudson Jim Halpert 2
4 Pam Beasley Jim Halpert 2
5 Ryan Howard Pam Beasley 3
6 Kelly Kapoor Ryan Howard 4
7 Meredith Palmer Ryan Howard 4
Desired Output Snapshot:
{
"Employee_FN": "Michael",
"Employee_LN": "Scott",
"Level": "0",
"Reports": [{
"Employee_FN": "Jim",
"Employee_LN": "Halpert",
"Level": "1",
"Reports": [{
"Employee_FN": "Stanley",
"Employee_LN": "Hudson",
"Level": "2",
}, {
"Employee_FN": "Pam",
"Employee_LN": "Beasley",
"Level": "2",
}]
}]
}
Current State:
j = (df.groupby(['Level','Employee_FN','Employee_LN'], as_index=False)
.apply(lambda x: x[['Level','Employee_FN','Employee_LN']].to_dict('r'))
.reset_index()
.rename(columns={0:'Reports'})
.to_json(orient='records'))
print(json.dumps(json.loads(j), indent=2, sort_keys=True))
Current Output:
[
{
"Employee_FN": "Michael",
"Employee_LN": "Scott",
"Level": 0,
"Reports": [
{
"Employee_FN": "Michael",
"Employee_LN": "Scott",
"Level": 0
}
]
},
{
"Employee_FN": "Dwight",
"Employee_LN": "Schrute",
"Level": 1,
"Reports": [
{
"Employee_FN": "Dwight",
"Employee_LN": "Schrute",
"Level": 1
}
]
},
{
"Employee_FN": "Jim",
"Employee_LN": "Halpert",
"Level": 1,
"Reports": [
{
"Employee_FN": "Jim",
"Employee_LN": "Halpert",
"Level": 1
}
]
},
{
"Employee_FN": "Pam",
"Employee_LN": "Beasley",
"Level": 2,
"Reports": [
{
"Employee_FN": "Pam",
"Employee_LN": "Beasley",
"Level": 2
}
]
},
{
"Employee_FN": "Stanley",
"Employee_LN": "Hudson",
"Level": 2,
"Reports": [
{
"Employee_FN": "Stanley",
"Employee_LN": "Hudson",
"Level": 2
}
]
},
{
"Employee_FN": "Ryan",
"Employee_LN": "Howard",
"Level": 3,
"Reports": [
{
"Employee_FN": "Ryan",
"Employee_LN": "Howard",
"Level": 3
}
]
},
{
"Employee_FN": "Kelly",
"Employee_LN": "Kapoor",
"Level": 4,
"Reports": [
{
"Employee_FN": "Kelly",
"Employee_LN": "Kapoor",
"Level": 4
}
]
},
{
"Employee_FN": "Meredith",
"Employee_LN": "Palmer",
"Level": 4,
"Reports": [
{
"Employee_FN": "Meredith",
"Employee_LN": "Palmer",
"Level": 4
}
]
}
]
Problems:
Each person only has themselves as children
The whole JSON structure appears to be in a dict - I believe it has to be enclosed by {} to be readable
I have tried switched around the groupby and lambda elements in various configurations to reach the desired output as well. Any and all insight would be greatly appreciated! Thank you!
Update:
I changed my code block to this:
j = (df.groupby(['Level','Supervisor_FN','Supervisor_LN'], as_index=False)
.apply(lambda x: x[['Level','Employee_FN','Employee_LN']].to_dict('r'))
.reset_index()
.rename(columns={0:'Reports'})
.rename(columns={'Supervisor_FN':'Employee_FN'})
.rename(columns={'Supervisor_LN':'Employee_LN'})
.to_json(orient='records'))
print(json.dumps(json.loads(j), indent=2, sort_keys=True))
The new output is this:
[
{
"Employee_FN": "Michael",
"Employee_LN": "Scott",
"Level": 1,
"Reports": [
{
"Employee_FN": "Jim",
"Employee_LN": "Halpert",
"Level": 1
},
{
"Employee_FN": "Dwight",
"Employee_LN": "Schrute",
"Level": 1
}
]
},
{
"Employee_FN": "Jim",
"Employee_LN": "Halpert",
"Level": 2,
"Reports": [
{
"Employee_FN": "Stanley",
"Employee_LN": "Hudson",
"Level": 2
},
{
"Employee_FN": "Pam",
"Employee_LN": "Beasley",
"Level": 2
}
]
},
{
"Employee_FN": "Pam",
"Employee_LN": "Beasley",
"Level": 3,
"Reports": [
{
"Employee_FN": "Ryan",
"Employee_LN": "Howard",
"Level": 3
}
]
},
{
"Employee_FN": "Ryan",
"Employee_LN": "Howard",
"Level": 4,
"Reports": [
{
"Employee_FN": "Kelly",
"Employee_LN": "Kapoor",
"Level": 4
},
{
"Employee_FN": "Meredith",
"Employee_LN": "Palmer",
"Level": 4
}
]
}
]
Problems:
The Level matches the underlying employee for both the underlying employee and the supervisor
The nesting only goes one level deep

This type of problem isn't particularly well-suited for Pandas; the data structure you're going after is recursive, not tabular.
Here is one possible solution.
from operator import itemgetter
employee_key = itemgetter('Employee_FN', 'Employee_LN')
supervisor_key = itemgetter('Supervisor_FN', 'Supervisor_LN')
def subset(dict_, keys):
return {k: dict_[k] for k in keys}
# store employee references
cache = {}
# iterate over employees sorted by level, so supervisors are cached before reports
for row in df.sort_values('Level').to_dict('records'):
# look up employee/supervisor references
employee = cache.setdefault(employee_key(row), subset(row, keys=('Employee_FN', 'Employee_LN', 'Level')))
supervisor = cache.get(supervisor_key(row), {})
# link reports to employee
supervisor.setdefault('Reports', []).append(employee)
# grab only top-level employees
[rec for key, rec in cache.iteritems() if rec['Level'] == 0]
[{'Employee_FN': 'Michael',
'Employee_LN': 'Scott',
'Level': 0,
'Reports': [{'Employee_FN': 'Jim',
'Employee_LN': 'Halpert',
'Level': 1,
'Reports': [{'Employee_FN': 'Stanley',
'Employee_LN': 'Hudson',
'Level': 2},
{'Employee_FN': 'Pam',
'Employee_LN': 'Beasley',
'Level': 2,
'Reports': [{'Employee_FN': 'Ryan',
'Employee_LN': 'Howard',
'Level': 3,
'Reports': [{'Employee_FN': 'Kelly',
'Employee_LN': 'Kapoor',
'Level': 4},
{'Employee_FN': 'Meredith',
'Employee_LN': 'Palmer',
'Level': 4}]}]}]},
{'Employee_FN': 'Dwight', 'Employee_LN': 'Schrute', 'Level': 1}]}]

Related

Returning data that is not in ElasticSearch as 0 in doc_count

I am filtering in ElasticSearch. I want doc_count to return 0 on non-data dates, but it doesn't print those dates at all, only dates with data are returned to me. do you know how i can do it? Here is the Python output:
0 NaN
1 NaN
2 NaN
3 NaN
4 NaN
...
33479 {'date': '2022-04-13T08:08:00.000Z', 'value': 7}
33480 {'date': '2022-04-13T08:08:00.000Z', 'value': 7}
33481 {'date': '2022-04-13T08:08:00.000Z', 'value': 7}
33482 {'date': '2022-04-13T08:08:00.000Z', 'value': 7}
33483 {'date': '2022-04-13T08:08:00.000Z', 'value': 7}
And here is my ElasticSearch filter:
"from": 0,
"size": 0,
"query": {
"bool": {
"must":
[
{
"range": {
"#timestamp": {
"gte": "now-1M",
"lt": "now"
}
}
}
]
}
},
"aggs": {
"continent": {
"terms": {
"field": "source.geo.continent_name.keyword"
},
"aggs": {
"_source": {
"date_histogram": {
"field": "#timestamp", "interval": "8m"
}}}}}}
You need to set min_doc_count value to 0 for aggregation where you want result with zero doc_count.
{
"from": 0,
"size": 0,
"query": {
"bool": {
"must": [
{
"range": {
"#timestamp": {
"gte": "now-1M",
"lt": "now"
}
}
}
]
}
},
"aggs": {
"continent": {
"terms": {
"field": "source.geo.continent_name.keyword",
"min_doc_count": 0
},
"aggs": {
"_source": {
"date_histogram": {
"field": "#timestamp",
"interval": "8m",
"min_doc_count": 0
}
}
}
}
}
}

Get fields from a JSON file with Python

I have this json file loaded in Python with json.loads('myfile.json'):
[
{
"cart": {
"items": {
"3154ba405e5c5a22bbdf9bf1": {
"item": {
"_id": "3154ba405e5c5a22bbdf9bf1",
"title": "Drink alla cannella",
"price": 5.65,
"__v": 0
},
"qty": 1,
"price": 5.65
}
},
"totalQty": 1,
"totalPrice": 5.65
}
},
{
"cart": {
"items": {
"6214ba405e4c5a31bbdf9ad7": {
"item": {
"_id": "6214ba405e4c5a31bbdf9ad7",
"title": "Drink alla menta",
"price": 5.65,
"__v": 0
},
"qty": 2,
"price": 11.3
}
},
"totalQty": 2,
"totalPrice": 11.3
}
}
]
How I can access to both totalQty and totalPrice fields at same time and sum them?
How I can access to both Title fields to print it?
Let's assume that you have the JSON data available as a string then:
jdata = '''
[
{
"cart": {
"items": {
"3154ba405e5c5a22bbdf9bf1": {
"item": {
"_id": "3154ba405e5c5a22bbdf9bf1",
"title": "Drink alla cannella",
"price": 5.65,
"__v": 0
},
"qty": 1,
"price": 5.65
}
},
"totalQty": 1,
"totalPrice": 5.65
}
},
{
"cart": {
"items": {
"6214ba405e4c5a31bbdf9ad7": {
"item": {
"_id": "6214ba405e4c5a31bbdf9ad7",
"title": "Drink alla menta",
"price": 5.65,
"__v": 0
},
"qty": 2,
"price": 11.3
}
},
"totalQty": 2,
"totalPrice": 11.3
}
}
]
'''
totalQty = 0
totalPrice = 0
for d in json.loads(jdata):
c = d['cart']
totalQty += c['totalQty']
totalPrice += c['totalPrice']
for sd in c['items'].values():
print(sd['item']['title'])
print(f'{totalQty:d}', f'{totalPrice:.2f}')
Output:
3 16.95
Note:
I suspect that what you really want to do is multiply those two values

Is there an efficient way to compare each key, value pair of a dictionary in a many to one comparison

Idea is to compare N number of dictionaries with a single standard dictionary where each key, value pair comparison has a different conditional rule.
Eg.,
Standard dictionary -
{'ram': 16,
'storage': [512, 1, 2],
'manufacturers': ['Dell', 'Apple', 'Asus', 'Alienware'],
'year': 2018,
'drives': ['A', 'B', 'C', 'D', 'E']
}
List of dictionaries -
{'ram': 8,
'storage': 1,
'manufacturers': 'Apple',
'year': 2018,
'drives': ['C', 'D', 'E']
},
{'ram': 16,
'storage': 4,
'manufacturers': 'Asus',
'year': 2021,
'drives': ['F', 'G','H']
},
{'ram': 4,
'storage': 2,
'manufacturers': 'ACER',
'year': 2016,
'drives': ['F', 'G', 'H']
}
Conditions-
'ram' > 8
if 'ram' >=8 then 'storage' >= 2 else 1
'manufactures' in ['Dell', 'Apple', 'Asus', 'Alienware']
'year' >= 2018
if 'year' > 2018 then 'drives' in ['A', 'B', 'C', 'D', 'E'] else ['F', 'G', 'H']
So the expected output is to display all the non-matching ones with non-matching values and none/null for the matching values.
Expected Output -
{'ram': 8,
'storage': 1,
'manufacturers': None,
'year': None,
'drives': ['C', 'D', 'E']
},
{'ram': None,
'storage': None,
'manufacturers': None,
'year': None,
'drives': ['F','G','H']
},
{'ram': 4,
'storage': 2,
'manufacturers': 'ACER',
'year': 2016,
'drives': None
}
While working with MongoDB I encountered this problem where each document in a data collection should be compared with a standard collection. Any MongoDB direct query would also be very helpful.
To achieve the conditions along using MongoDB Aggregation, use the below Query:
db.collection.aggregate([
{
"$project": {
"ram": {
"$cond": {
"if": {
"$gt": [
"$ram",
8
]
},
"then": null,
"else": "$ram",
}
},
"storage": {
"$cond": {
"if": {
"$and": [
{
"$gte": [
"$ram",
8
]
},
{
"$gte": [
"$storage",
2
]
},
],
},
"then": null,
"else": "$storage",
}
},
"manufacturers": {
"$cond": {
"if": {
"$in": [
"$manufacturers",
[
"Dell",
"Apple",
"Asus",
"Alienware"
],
]
},
"then": null,
"else": "$manufacturers",
}
},
"year": {
"$cond": {
"if": {
"$gte": [
"$year",
2018
]
},
"then": null,
"else": "$year",
}
},
"drives": {
"$cond": {
"if": {
"$gt": [
"$year",
2018
]
},
"then": {
"$setIntersection": [
"$drives",
[
"A",
"B",
"C",
"D",
"E"
]
]
},
"else": "$drives",
}
},
}
}
])
Mongo Playground Sample Execution
You can combine this with for loop in Python
for std_doc in std_col.find({}, {
"ram": 1,
"storage": 1,
"manufacturers": 1,
"year": 1,
"drives": 1,
}):
print(list(list_col.aggregate([
{
"$project": {
"ram": {
"$cond": {
"if": {
"$gt": [
"$ram",
8
]
},
"then": None,
"else": "$ram",
}
},
"storage": {
"$cond": {
"if": {
"$and": [
{
"$gte": [
"$ram",
8
]
},
{
"$gte": [
"$storage",
2
]
},
],
},
"then": None,
"else": "$storage",
}
},
"manufacturers": {
"$cond": {
"if": {
"$in": [
"$manufacturers",
[
"Dell",
"Apple",
"Asus",
"Alienware"
],
]
},
"then": None,
"else": "$manufacturers",
}
},
"year": {
"$cond": {
"if": {
"$gte": [
"$year",
2018
]
},
"then": None,
"else": "$year",
}
},
"drives": {
"$cond": {
"if": {
"$gt": [
"$year",
2018
]
},
"then": {
"$setIntersection": [
"$drives",
[
"A",
"B",
"C",
"D",
"E"
]
]
},
"else": "$drives",
}
},
}
}
])))
The most optimized solution is to perform a lookup, but this varies based on your requirement:
db.std_col.aggregate([
{
"$lookup": {
"from": "dict_col",
"let": {
"cmpRam": "$ram",
"cmpStorage": "$storage",
"cmpManufacturers": "$manufacturers",
"cmpYear": "$year",
"cmpDrives": "$drives",
},
"pipeline": [
{
"$project": {
"ram": {
"$cond": {
"if": {
"$gt": [
"$ram",
"$$cmpRam",
]
},
"then": null,
"else": "$ram",
}
},
"storage": {
"$cond": {
"if": {
"$and": [
{
"$gte": [
"$ram",
"$$cmpRam"
]
},
{
"$gte": [
"$storage",
"$$cmpStorage"
]
},
],
},
"then": null,
"else": "$storage",
}
},
"manufacturers": {
"$cond": {
"if": {
"$in": [
"$manufacturers",
"$$cmpManufacturers",
]
},
"then": null,
"else": "$manufacturers",
}
},
"year": {
"$cond": {
"if": {
"$gte": [
"$year",
"$$cmpYear",
]
},
"then": null,
"else": "$year",
}
},
"drives": {
"$cond": {
"if": {
"$gt": [
"$year",
"$$cmpYear"
]
},
"then": {
"$setIntersection": [
"$drives",
"$$cmpDrives"
]
},
"else": "$drives",
}
},
}
},
],
"as": "inventory_docs"
}
}
])
Mongo Playground Sample Execution

How to map the dictionary values to another dictionary

I have dictionary which is below
{
"aggregations": {
"A": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{ "key": "ADL", "doc_count": 1 },
{ "key": "SDD", "doc_count": 1 },
{ "key": "JJD", "doc_count": 1 }
]
},
"B": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{ "key": "ABC", "doc_count": 1 },
{ "key": "CDE", "doc_count": 1 },
{ "key": "FGH", "doc_count": 1 }
]
},
"C": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{ "key": "XYX", "doc_count": 1 },
{ "key": "NXS", "doc_count": 1 }
]
}
}
}
aggregations.keys will be aggregationfilters.fieldName
aggregations.buckets.key will be aggregationfilters.values.title
aggregationfilters.values.paragraph is null everytime
aggregations.buckets.doc_count will be aggregationfilters.values.count
Basically I need to extract aggregations.keys and aggregations.bucket values and put into different dictionary.
Need to write a general code structure to do that.
I cannot do with .pop(rename) the dictioanry
My expected out
{
"aggregationfilters": [
{
"name": "ABC",
"fieldName": "A",
"values": [
{ "title": "ADL", "paragraph": null, "count": 1 },
{ "title": "SDD", "paragraph": null, "count": 1 },
{ "title": "JJD", "paragraph": null, "count": 1 }
]
}, {
"name": "CDE",
"fieldName": "B",
"values": [
{ "title": "ABC", "paragraph": null, "count": 1 },
{ "title": "CDE", "paragraph": null, "count": 1 },
{ "title": "FGH", "paragraph": null, "count": 1 }
]
}, {
"name": "FGH",
"fieldName": "C",
"values": [
{ "title": "XYX", "paragraph": null, "count": 1 },
{ "title": "NXS", "paragraph": null, "count": 1 }
]
}
]
}
Well, this works, but even with my best effort this still doesn't look that clean.
import json
source = {
"aggregations": {
"A": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{"key": "ADL", "doc_count": 1},
{"key": "SDD", "doc_count": 1},
{"key": "JJD", "doc_count": 1},
],
},
"B": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{"key": "ABC", "doc_count": 1},
{"key": "CDE", "doc_count": 1},
{"key": "FGH", "doc_count": 1},
],
},
"C": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{"key": "XYX", "doc_count": 1}, {"key": "NXS", "doc_count": 1}],
},
}
}
convert_map = {
"buckets": "values",
"doc_count": "count",
"key": "title",
}
remove_map = {"sum_other_doc_count", "doc_count_error_upper_bound"}
add_map = {"name": "Changed VAL_", "fieldName": "VAL_"}
def converting_generator(
source_: dict, convert_map_: dict, remove_map_: set, add_map_: dict
):
working_dict = {k: v for k, v in source_.items()}
variable_identifier = "VAL_"
for key, inner_dic in working_dict.items():
inner_dic: dict
for rm_key in remove_map_:
try:
inner_dic.pop(rm_key)
except KeyError:
pass
for add_key, add_val in add_map_.items():
inner_dic[add_key] = add_val.replace(variable_identifier, key)
dumped = json.dumps(inner_dic, indent=2)
for original, target in convert_map_.items():
dumped = dumped.replace(original, target)
yield json.loads(dumped)
converted = {
"aggregation_filters": list(
converting_generator(source["aggregations"], convert_map, remove_map, add_map)
)
}
for inner_dict in converted["aggregation_filters"]:
for even_inner_dict in inner_dict["values"]:
even_inner_dict["paragraph"] = None
print(json.dumps(converted, indent=2))
Output:
{
"aggregation_filters": [
{
"values": [
{
"title": "ADL",
"count": 1,
"paragraph": null
},
{
"title": "SDD",
"count": 1,
"paragraph": null
},
{
"title": "JJD",
"count": 1,
"paragraph": null
}
],
"name": "Changed A",
"fieldName": "A"
},
{
"values": [
{
"title": "ABC",
"count": 1,
"paragraph": null
},
{
"title": "CDE",
"count": 1,
"paragraph": null
},
{
"title": "FGH",
"count": 1,
"paragraph": null
}
],
"name": "Changed B",
"fieldName": "B"
},
{
"values": [
{
"title": "XYX",
"count": 1,
"paragraph": null
},
{
"title": "NXS",
"count": 1,
"paragraph": null
}
],
"name": "Changed C",
"fieldName": "C"
}
]
}
Always show your code, would be nice if that's a working one - to show that you've put at least that worth of the effort on your problem.
I don't bother it as this feels like puzzle solving, but others may not.

Parsing and creating nested dictionaries

I would like to create a dictionary containing a nested structure of dictionaries, like bellow :
{
"Jaque": {
"ES": {
"Madrid": [
{
"experience": 9
}
]
},
"FR": {
"Lyon": [
{
"experience": 11.4
}
],
"Paris": [
{
"experience": 20
}
]
}
},
"James": {
"UK": {
"London": [
{
"experience": 10.9
}
]
}
},
"Henry": {
"UK": {
"London": [
{
"experience": 15
}
]
}
},
"Joe": {
"US": {
"Boston": [
{
"experience": 100
}
]
}
}
}
}
My input is a list of dictionaries of this format:
c = [{
"country": "US",
"city": "Boston",
"name": "Joe",
"experience": 100
},
{
"country": "FR",
"city": "Paris",
"name": "Jaque",
"experience": 20
},
{
"country": "FR",
"city": "Lyon",
"name": "Jaque",
"experience": 11.4
},
{
"country": "ES",
"city": "Madrid",
"name": "Jaque",
"experience": 9
},
{
"country": "UK",
"city": "London",
"name": "Henry",
"experience": 15
},
{
"country": "UK",
"city": "London",
"name": "James",
"experience": 10.9
}
]
My first approach was to create the nested dict, step by step:
dd = dict.fromkeys([i.get("name") for i in c],defaultdict(dict))
#will create
# dd = {'Joe': defaultdict(<class 'dict'>, {}), 'Jaque': defaultdict(<class 'dict'>, {}), 'James': defaultdict(<class 'dict'>, {}), 'Henry': defaultdict(<class 'dict'>, {})}
for i in dd:
for j in c:
#verify if name from d is in dict j
if i in j.values():
dd[i]=dict(zip([a.get("country") for a in c if i in a.values() ],[b.get("city") for b in c if i in b.values() ]))
# dd will become
#{'Joe': {'US': 'Boston'}, 'Jaque': {'FR': 'Lyon', 'ES': 'Madrid'}, 'Henry': {'UK': 'London'}, 'James': {'UK': 'London'}}
Now I can't figure a way to create/update the nested structure of dict dd. Is there a more dynamic way to create dict? Thx
You could use itertools.groupby to organize the list similarly to your expected output and then loop to convert to a dict.
from itertools import groupby
from operator import itemgetter
data = [{"country": "US", "city": "Boston", "name": "Joe", "experience": 100 }, {"country": "FR", "city": "Paris", "name": "Jaque", "experience": 20 }, {"country": "FR", "city": "Lyon", "name": "Jaque", "experience": 11.4 }, {"country": "ES", "city": "Madrid", "name": "Jaque", "experience": 9 }, {"country": "UK", "city": "London", "name": "Henry", "experience": 15 }, {"country": "UK", "city": "London", "name": "James", "experience": 10.9 } ]
result = {}
for key, values in groupby(sorted(data, key=itemgetter('name')), key=itemgetter('name')):
result[key] = {
v['country']: {v['city']: [{'experience': v['experience']}]} for v in values
}
print(result)
# {'Henry': {'UK': {'London': [{'experience': 15}]}}, 'James': {'UK': {'London': [{'experience': 10.9}]}}, 'Jaque': {'FR': {'Lyon': [{'experience': 11.4}]}, 'ES': {'Madrid': [{'experience': 9}]}}, 'Joe': {'US': {'Boston': [{'experience': 100}]}}}
You can use recursion with itertools.groupby:
from itertools import groupby
def group(d, keys = None):
key, *keys = keys
new_d = {a:list(b) for a, b in groupby(sorted(d, key=lambda x:x[key]), key=lambda x:x[key])}
t = {a:[{c:d for c, d in k.items() if c != key} for k in b] for a, b in new_d.items()}
return {a:group(b, keys) if not all(len(i) == 1 for i in b) else b for a, b in t.items()}
result = group(data, keys = ['name', 'country', 'city', 'experience'])
import json
print(json.dumps(result, indent=4)))
Output:
{
"Henry": {
"UK": {
"London": [
{
"experience": 15
}
]
}
},
"James": {
"UK": {
"London": [
{
"experience": 10.9
}
]
}
},
"Jaque": {
"ES": {
"Madrid": [
{
"experience": 9
}
]
},
"FR": {
"Lyon": [
{
"experience": 11.4
}
],
"Paris": [
{
"experience": 20
}
]
}
},
"Joe": {
"US": {
"Boston": [
{
"experience": 100
}
]
}
}
}

Categories

Resources