Parsing and creating nested dictionaries

Parsing and creating nested dictionaries - python

I would like to create a dictionary containing a nested structure of dictionaries, like bellow :
{
"Jaque": {
"ES": {
"Madrid": [
{
"experience": 9
}
]
},
"FR": {
"Lyon": [
{
"experience": 11.4
}
],
"Paris": [
{
"experience": 20
}
]
}
},
"James": {
"UK": {
"London": [
{
"experience": 10.9
}
]
}
},
"Henry": {
"UK": {
"London": [
{
"experience": 15
}
]
}
},
"Joe": {
"US": {
"Boston": [
{
"experience": 100
}
]
}
}
}
}
My input is a list of dictionaries of this format:
c = [{
"country": "US",
"city": "Boston",
"name": "Joe",
"experience": 100
},
{
"country": "FR",
"city": "Paris",
"name": "Jaque",
"experience": 20
},
{
"country": "FR",
"city": "Lyon",
"name": "Jaque",
"experience": 11.4
},
{
"country": "ES",
"city": "Madrid",
"name": "Jaque",
"experience": 9
},
{
"country": "UK",
"city": "London",
"name": "Henry",
"experience": 15
},
{
"country": "UK",
"city": "London",
"name": "James",
"experience": 10.9
}
]
My first approach was to create the nested dict, step by step:
dd = dict.fromkeys([i.get("name") for i in c],defaultdict(dict))
#will create
# dd = {'Joe': defaultdict(<class 'dict'>, {}), 'Jaque': defaultdict(<class 'dict'>, {}), 'James': defaultdict(<class 'dict'>, {}), 'Henry': defaultdict(<class 'dict'>, {})}
for i in dd:
for j in c:
#verify if name from d is in dict j
if i in j.values():
dd[i]=dict(zip([a.get("country") for a in c if i in a.values() ],[b.get("city") for b in c if i in b.values() ]))
# dd will become
#{'Joe': {'US': 'Boston'}, 'Jaque': {'FR': 'Lyon', 'ES': 'Madrid'}, 'Henry': {'UK': 'London'}, 'James': {'UK': 'London'}}
Now I can't figure a way to create/update the nested structure of dict dd. Is there a more dynamic way to create dict? Thx

You could use itertools.groupby to organize the list similarly to your expected output and then loop to convert to a dict.
from itertools import groupby
from operator import itemgetter
data = [{"country": "US", "city": "Boston", "name": "Joe", "experience": 100 }, {"country": "FR", "city": "Paris", "name": "Jaque", "experience": 20 }, {"country": "FR", "city": "Lyon", "name": "Jaque", "experience": 11.4 }, {"country": "ES", "city": "Madrid", "name": "Jaque", "experience": 9 }, {"country": "UK", "city": "London", "name": "Henry", "experience": 15 }, {"country": "UK", "city": "London", "name": "James", "experience": 10.9 } ]
result = {}
for key, values in groupby(sorted(data, key=itemgetter('name')), key=itemgetter('name')):
result[key] = {
v['country']: {v['city']: [{'experience': v['experience']}]} for v in values
}
print(result)
# {'Henry': {'UK': {'London': [{'experience': 15}]}}, 'James': {'UK': {'London': [{'experience': 10.9}]}}, 'Jaque': {'FR': {'Lyon': [{'experience': 11.4}]}, 'ES': {'Madrid': [{'experience': 9}]}}, 'Joe': {'US': {'Boston': [{'experience': 100}]}}}

You can use recursion with itertools.groupby:
from itertools import groupby
def group(d, keys = None):
key, *keys = keys
new_d = {a:list(b) for a, b in groupby(sorted(d, key=lambda x:x[key]), key=lambda x:x[key])}
t = {a:[{c:d for c, d in k.items() if c != key} for k in b] for a, b in new_d.items()}
return {a:group(b, keys) if not all(len(i) == 1 for i in b) else b for a, b in t.items()}
result = group(data, keys = ['name', 'country', 'city', 'experience'])
import json
print(json.dumps(result, indent=4)))
Output:
{
"Henry": {
"UK": {
"London": [
{
"experience": 15
}
]
}
},
"James": {
"UK": {
"London": [
{
"experience": 10.9
}
]
}
},
"Jaque": {
"ES": {
"Madrid": [
{
"experience": 9
}
]
},
"FR": {
"Lyon": [
{
"experience": 11.4
}
],
"Paris": [
{
"experience": 20
}
]
}
},
"Joe": {
"US": {
"Boston": [
{
"experience": 100
}
]
}
}
}

Related

Get fields from a JSON file with Python

I have this json file loaded in Python with json.loads('myfile.json'):
[
{
"cart": {
"items": {
"3154ba405e5c5a22bbdf9bf1": {
"item": {
"_id": "3154ba405e5c5a22bbdf9bf1",
"title": "Drink alla cannella",
"price": 5.65,
"__v": 0
},
"qty": 1,
"price": 5.65
}
},
"totalQty": 1,
"totalPrice": 5.65
}
},
{
"cart": {
"items": {
"6214ba405e4c5a31bbdf9ad7": {
"item": {
"_id": "6214ba405e4c5a31bbdf9ad7",
"title": "Drink alla menta",
"price": 5.65,
"__v": 0
},
"qty": 2,
"price": 11.3
}
},
"totalQty": 2,
"totalPrice": 11.3
}
}
]
How I can access to both totalQty and totalPrice fields at same time and sum them?
How I can access to both Title fields to print it?

Let's assume that you have the JSON data available as a string then:
jdata = '''
[
{
"cart": {
"items": {
"3154ba405e5c5a22bbdf9bf1": {
"item": {
"_id": "3154ba405e5c5a22bbdf9bf1",
"title": "Drink alla cannella",
"price": 5.65,
"__v": 0
},
"qty": 1,
"price": 5.65
}
},
"totalQty": 1,
"totalPrice": 5.65
}
},
{
"cart": {
"items": {
"6214ba405e4c5a31bbdf9ad7": {
"item": {
"_id": "6214ba405e4c5a31bbdf9ad7",
"title": "Drink alla menta",
"price": 5.65,
"__v": 0
},
"qty": 2,
"price": 11.3
}
},
"totalQty": 2,
"totalPrice": 11.3
}
}
]
'''
totalQty = 0
totalPrice = 0
for d in json.loads(jdata):
c = d['cart']
totalQty += c['totalQty']
totalPrice += c['totalPrice']
for sd in c['items'].values():
print(sd['item']['title'])
print(f'{totalQty:d}', f'{totalPrice:.2f}')
Output:
3 16.95
Note:
I suspect that what you really want to do is multiply those two values

How to map the dictionary values to another dictionary

I have dictionary which is below
{
"aggregations": {
"A": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{ "key": "ADL", "doc_count": 1 },
{ "key": "SDD", "doc_count": 1 },
{ "key": "JJD", "doc_count": 1 }
]
},
"B": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{ "key": "ABC", "doc_count": 1 },
{ "key": "CDE", "doc_count": 1 },
{ "key": "FGH", "doc_count": 1 }
]
},
"C": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{ "key": "XYX", "doc_count": 1 },
{ "key": "NXS", "doc_count": 1 }
]
}
}
}
aggregations.keys will be aggregationfilters.fieldName
aggregations.buckets.key will be aggregationfilters.values.title
aggregationfilters.values.paragraph is null everytime
aggregations.buckets.doc_count will be aggregationfilters.values.count
Basically I need to extract aggregations.keys and aggregations.bucket values and put into different dictionary.
Need to write a general code structure to do that.
I cannot do with .pop(rename) the dictioanry
My expected out
{
"aggregationfilters": [
{
"name": "ABC",
"fieldName": "A",
"values": [
{ "title": "ADL", "paragraph": null, "count": 1 },
{ "title": "SDD", "paragraph": null, "count": 1 },
{ "title": "JJD", "paragraph": null, "count": 1 }
]
}, {
"name": "CDE",
"fieldName": "B",
"values": [
{ "title": "ABC", "paragraph": null, "count": 1 },
{ "title": "CDE", "paragraph": null, "count": 1 },
{ "title": "FGH", "paragraph": null, "count": 1 }
]
}, {
"name": "FGH",
"fieldName": "C",
"values": [
{ "title": "XYX", "paragraph": null, "count": 1 },
{ "title": "NXS", "paragraph": null, "count": 1 }
]
}
]
}

Well, this works, but even with my best effort this still doesn't look that clean.
import json
source = {
"aggregations": {
"A": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{"key": "ADL", "doc_count": 1},
{"key": "SDD", "doc_count": 1},
{"key": "JJD", "doc_count": 1},
],
},
"B": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{"key": "ABC", "doc_count": 1},
{"key": "CDE", "doc_count": 1},
{"key": "FGH", "doc_count": 1},
],
},
"C": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{"key": "XYX", "doc_count": 1}, {"key": "NXS", "doc_count": 1}],
},
}
}
convert_map = {
"buckets": "values",
"doc_count": "count",
"key": "title",
}
remove_map = {"sum_other_doc_count", "doc_count_error_upper_bound"}
add_map = {"name": "Changed VAL_", "fieldName": "VAL_"}
def converting_generator(
source_: dict, convert_map_: dict, remove_map_: set, add_map_: dict
):
working_dict = {k: v for k, v in source_.items()}
variable_identifier = "VAL_"
for key, inner_dic in working_dict.items():
inner_dic: dict
for rm_key in remove_map_:
try:
inner_dic.pop(rm_key)
except KeyError:
pass
for add_key, add_val in add_map_.items():
inner_dic[add_key] = add_val.replace(variable_identifier, key)
dumped = json.dumps(inner_dic, indent=2)
for original, target in convert_map_.items():
dumped = dumped.replace(original, target)
yield json.loads(dumped)
converted = {
"aggregation_filters": list(
converting_generator(source["aggregations"], convert_map, remove_map, add_map)
)
}
for inner_dict in converted["aggregation_filters"]:
for even_inner_dict in inner_dict["values"]:
even_inner_dict["paragraph"] = None
print(json.dumps(converted, indent=2))
Output:
{
"aggregation_filters": [
{
"values": [
{
"title": "ADL",
"count": 1,
"paragraph": null
},
{
"title": "SDD",
"count": 1,
"paragraph": null
},
{
"title": "JJD",
"count": 1,
"paragraph": null
}
],
"name": "Changed A",
"fieldName": "A"
},
{
"values": [
{
"title": "ABC",
"count": 1,
"paragraph": null
},
{
"title": "CDE",
"count": 1,
"paragraph": null
},
{
"title": "FGH",
"count": 1,
"paragraph": null
}
],
"name": "Changed B",
"fieldName": "B"
},
{
"values": [
{
"title": "XYX",
"count": 1,
"paragraph": null
},
{
"title": "NXS",
"count": 1,
"paragraph": null
}
],
"name": "Changed C",
"fieldName": "C"
}
]
}
Always show your code, would be nice if that's a working one - to show that you've put at least that worth of the effort on your problem.
I don't bother it as this feels like puzzle solving, but others may not.

Pandas dataframe and conversion to Json

Basically I´m reading a pandas dataframe and converting it to Json. I´m a beginner in coding, but I know that is preferable to use apply function instead iterrows (and I already tried to use apply function, but some difficulties in understand the syntax and find out my solution arose)!!
===============================
Data that I´m reading from excel
id label id_customer label_customer part_number number_customer product label_product key country value_product
6 Sao Paulo CUST-99992 Brazil 982 10 sho1564 shoes SH-99 Chile 1.5
6 Sao Paulo CUST-99992 Brazil 982 10 sn47282 sneakers SN-71 Germany 43.8
6 Sao Paulo CUST-43535 Argentina 435 15 sk84393 skirt SK-11 Netherlands 87.1
92 Hong Hong CUST-88888 China 785 58 ca40349 cap CA-82 Russia 3.95
===============================
CODE:
import pandas as pd
import json
df = pd.read_excel(path)
result = []
for labels, df1 in df.groupby(['id', 'label'],sort=False):
id_, label = labels
record = {'id': int(id_), 'label': label, 'Customer': []}
for inner_labels, df2 in df1.groupby(['id_customer', 'label_customer'],sort=False):
id_,label = inner_labels
record['Customer'].append({
'id': id_,
'label': label,
'Number': [{'part': str(p), 'number_customer': str(s)} for p, s in zip(df2['part_number'], df2['number_customer'])]
})
result.append(record)
===============================
Json I'm getting:
[
{
"id": 6,
"label": "Sao Paulo",
"Customer": [
{
"id": "CUST-99992",
"label": "Brazil",
"Number": [
{
"part": "982",
"number_customer": "10"
},
{
"part": "982",
"number_customer": "10"
}
]
},
{
"id": "CUST-43535",
"label": "Argentina",
"Number": [
{
"part": "435",
"number_customer": "15"
}
]
}
]
},
{
"id": 92,
"label": "Hong Kong",
"Customer": [
{
"id": "CUST-88888",
"label": "China",
"Number": [
{
"part": "785",
"number_customer": "58"
}
]
}
]
}
]
===============================
Json expected:
[
{
"id": 6,
"label": "Sao Paulo",
"Customer": [
{
"id": "CUST-99992",
"label": "Brazil",
"Number": [
{
"part": "982",
"number_customer": "10",
"Procucts": [
{
"product": "sho1564",
"label_product": "shoes",
"Order": [
{
"key": "SH-99",
"country": "Chile",
"value_product": "1.5"
}
]
},
{
"product": "sn47282",
"label_product": "sneakers",
"Order": [
{
"key": "SN-71",
"country": "Germany",
"value_product": "43.8"
}
]
}
]
}
]
},
{
"id": "CUST-43535",
"label": "Argentina",
"Number": [
{
"part": "435",
"number_customer": "15",
"Procucts": [
{
"product": "sk84393",
"label_product": "skirt",
"Order": [
{
"key": "SK-11",
"country": "Netherlands",
"value_product": "87.1"
}
]
}
]
}
]
}
]
},
{
"id": 92,
"label": "Hong Kong",
"Customer": [
{
"id": "CUST-88888",
"label": "China",
"Number": [
{
"part": "785",
"number_customer": "58",
"Procucts": [
{
"product": "ca40349",
"label_product": "cap",
"Order": [
{
"key": "CA-82",
"country": "Russia",
"value_product": "3.95"
}
]
}
]
}
]
}
]
}
]
===============================
Look that id and label is group of information even as id_customer and label customer is another group, part_number and number_customer is another, product and label_product another, key, country and value_product another.
My expected Json depends of my information inside my dataframe.
Can somebody help me in any way pls?

import pandas as pd
import json
df = pd.read_excel(path)
result = []
for labels, df1 in df.groupby(['id', 'label'], sort=False):
id_, label = labels
record = {'id': int(id_), 'label': label, 'Customer': []}
for inner_labels, df2 in df1.groupby(['id_customer', 'label_customer'], sort=False):
id_, label = inner_labels
customer = {'id': id_, 'label': label, 'Number': []}
for inner_labels, df3 in df2.groupby(['part_number', 'number_customer'], sort=False):
p, s = inner_labels
number = {'part': str(p), 'number_customer': str(s), 'Products': []}
for inner_labels, df4 in df3.groupby(['product', 'label_product'], sort=False):
p, lp = inner_labels
product = {'product': p, 'label_product': lp, 'Order': []}
for k, c, v in zip(df4['key'], df4['country'], df4['value_product']):
product['Order'].append({'key': k, 'country': c, 'value_product': v})
number['Products'].append(product)
customer['Number'].append(number)
record['Customer'].append(customer)
result.append(record)

Hope this is of use!
from io import StringIO
import pandas as pd
import json
csv = """id,label,id_customer,label_customer,part_number,number_customer,product,label_product,key,country,value_product
6,Sao Paulo,CUST-99992,Brazil,982,10,sho1564,shoes,SH-99,Chile,1.5
6,Sao Paulo,CUST-99992,Brazil,982,10,sn47282,sneakers,SN-71,Germany,43.8
6,Sao Paulo,CUST-43535,Argentina,435,15,sk84393,skirt,SK-11,Netherlands,87.1
92,Hong Hong,CUST-88888,China,785,58,ca40349,cap,CA-82,Russia,3.95"""
csv = StringIO(csv)
df = pd.read_csv(csv)
def split(df, groupby, json_func):
for x, group in df.groupby(groupby):
yield json_func(group, *x)
a = list(split(df, ['id', 'label'], lambda grp, id_, label: {"id": id_, "label": label, "Customer": list(
split(grp, ['id_customer', 'label_customer'], lambda grp_1, id_cust, label_cust: {"id": id_cust, "label": label_cust, "Number": list(
split(grp_1, ['part_number', 'number_customer'], lambda grp_2, part, num_cust: {"part": part, "number_customer": num_cust, "Products": list(
split(grp_2, ['product', 'label_product'], lambda grp_3, product, label_product: {"product": product, "label_product": label_product, "Order": list(
split(grp_3, ['key', 'country', 'value_product'], lambda _, key, country, value_product: {"key": key, "country": country, "value_product": value_product}))}
))})
)}))}))
display(a)

Pandas Dataframe to JSON Hierarchy

I have exhaustively reviewed/attempted implementations all the other questions on SO corresponding to this challenge and have yet to reach a solution.
Question: how do I convert employee and supervisor pairs into a hierarchical JSON structure to be used for a D3 visualization? There are an unknown number of levels, so it has to be dynamic.
I have a dataframe with five columns (yes, I realize this isn't the actual hierarchy of The Office):
Employee_FN Employee_LN Supervisor_FN Supervisor_LN Level
0 Michael Scott None None 0
1 Jim Halpert Michael Scott 1
2 Dwight Schrute Michael Scott 1
3 Stanley Hudson Jim Halpert 2
4 Pam Beasley Jim Halpert 2
5 Ryan Howard Pam Beasley 3
6 Kelly Kapoor Ryan Howard 4
7 Meredith Palmer Ryan Howard 4
Desired Output Snapshot:
{
"Employee_FN": "Michael",
"Employee_LN": "Scott",
"Level": "0",
"Reports": [{
"Employee_FN": "Jim",
"Employee_LN": "Halpert",
"Level": "1",
"Reports": [{
"Employee_FN": "Stanley",
"Employee_LN": "Hudson",
"Level": "2",
}, {
"Employee_FN": "Pam",
"Employee_LN": "Beasley",
"Level": "2",
}]
}]
}
Current State:
j = (df.groupby(['Level','Employee_FN','Employee_LN'], as_index=False)
.apply(lambda x: x[['Level','Employee_FN','Employee_LN']].to_dict('r'))
.reset_index()
.rename(columns={0:'Reports'})
.to_json(orient='records'))
print(json.dumps(json.loads(j), indent=2, sort_keys=True))
Current Output:
[
{
"Employee_FN": "Michael",
"Employee_LN": "Scott",
"Level": 0,
"Reports": [
{
"Employee_FN": "Michael",
"Employee_LN": "Scott",
"Level": 0
}
]
},
{
"Employee_FN": "Dwight",
"Employee_LN": "Schrute",
"Level": 1,
"Reports": [
{
"Employee_FN": "Dwight",
"Employee_LN": "Schrute",
"Level": 1
}
]
},
{
"Employee_FN": "Jim",
"Employee_LN": "Halpert",
"Level": 1,
"Reports": [
{
"Employee_FN": "Jim",
"Employee_LN": "Halpert",
"Level": 1
}
]
},
{
"Employee_FN": "Pam",
"Employee_LN": "Beasley",
"Level": 2,
"Reports": [
{
"Employee_FN": "Pam",
"Employee_LN": "Beasley",
"Level": 2
}
]
},
{
"Employee_FN": "Stanley",
"Employee_LN": "Hudson",
"Level": 2,
"Reports": [
{
"Employee_FN": "Stanley",
"Employee_LN": "Hudson",
"Level": 2
}
]
},
{
"Employee_FN": "Ryan",
"Employee_LN": "Howard",
"Level": 3,
"Reports": [
{
"Employee_FN": "Ryan",
"Employee_LN": "Howard",
"Level": 3
}
]
},
{
"Employee_FN": "Kelly",
"Employee_LN": "Kapoor",
"Level": 4,
"Reports": [
{
"Employee_FN": "Kelly",
"Employee_LN": "Kapoor",
"Level": 4
}
]
},
{
"Employee_FN": "Meredith",
"Employee_LN": "Palmer",
"Level": 4,
"Reports": [
{
"Employee_FN": "Meredith",
"Employee_LN": "Palmer",
"Level": 4
}
]
}
]
Problems:
Each person only has themselves as children
The whole JSON structure appears to be in a dict - I believe it has to be enclosed by {} to be readable
I have tried switched around the groupby and lambda elements in various configurations to reach the desired output as well. Any and all insight would be greatly appreciated! Thank you!
Update:
I changed my code block to this:
j = (df.groupby(['Level','Supervisor_FN','Supervisor_LN'], as_index=False)
.apply(lambda x: x[['Level','Employee_FN','Employee_LN']].to_dict('r'))
.reset_index()
.rename(columns={0:'Reports'})
.rename(columns={'Supervisor_FN':'Employee_FN'})
.rename(columns={'Supervisor_LN':'Employee_LN'})
.to_json(orient='records'))
print(json.dumps(json.loads(j), indent=2, sort_keys=True))
The new output is this:
[
{
"Employee_FN": "Michael",
"Employee_LN": "Scott",
"Level": 1,
"Reports": [
{
"Employee_FN": "Jim",
"Employee_LN": "Halpert",
"Level": 1
},
{
"Employee_FN": "Dwight",
"Employee_LN": "Schrute",
"Level": 1
}
]
},
{
"Employee_FN": "Jim",
"Employee_LN": "Halpert",
"Level": 2,
"Reports": [
{
"Employee_FN": "Stanley",
"Employee_LN": "Hudson",
"Level": 2
},
{
"Employee_FN": "Pam",
"Employee_LN": "Beasley",
"Level": 2
}
]
},
{
"Employee_FN": "Pam",
"Employee_LN": "Beasley",
"Level": 3,
"Reports": [
{
"Employee_FN": "Ryan",
"Employee_LN": "Howard",
"Level": 3
}
]
},
{
"Employee_FN": "Ryan",
"Employee_LN": "Howard",
"Level": 4,
"Reports": [
{
"Employee_FN": "Kelly",
"Employee_LN": "Kapoor",
"Level": 4
},
{
"Employee_FN": "Meredith",
"Employee_LN": "Palmer",
"Level": 4
}
]
}
]
Problems:
The Level matches the underlying employee for both the underlying employee and the supervisor
The nesting only goes one level deep

This type of problem isn't particularly well-suited for Pandas; the data structure you're going after is recursive, not tabular.
Here is one possible solution.
from operator import itemgetter
employee_key = itemgetter('Employee_FN', 'Employee_LN')
supervisor_key = itemgetter('Supervisor_FN', 'Supervisor_LN')
def subset(dict_, keys):
return {k: dict_[k] for k in keys}
# store employee references
cache = {}
# iterate over employees sorted by level, so supervisors are cached before reports
for row in df.sort_values('Level').to_dict('records'):
# look up employee/supervisor references
employee = cache.setdefault(employee_key(row), subset(row, keys=('Employee_FN', 'Employee_LN', 'Level')))
supervisor = cache.get(supervisor_key(row), {})
# link reports to employee
supervisor.setdefault('Reports', []).append(employee)
# grab only top-level employees
[rec for key, rec in cache.iteritems() if rec['Level'] == 0]
[{'Employee_FN': 'Michael',
'Employee_LN': 'Scott',
'Level': 0,
'Reports': [{'Employee_FN': 'Jim',
'Employee_LN': 'Halpert',
'Level': 1,
'Reports': [{'Employee_FN': 'Stanley',
'Employee_LN': 'Hudson',
'Level': 2},
{'Employee_FN': 'Pam',
'Employee_LN': 'Beasley',
'Level': 2,
'Reports': [{'Employee_FN': 'Ryan',
'Employee_LN': 'Howard',
'Level': 3,
'Reports': [{'Employee_FN': 'Kelly',
'Employee_LN': 'Kapoor',
'Level': 4},
{'Employee_FN': 'Meredith',
'Employee_LN': 'Palmer',
'Level': 4}]}]}]},
{'Employee_FN': 'Dwight', 'Employee_LN': 'Schrute', 'Level': 1}]}]

Merge two lists of dicts in python using pandas

I have two lists of dicts: one which has monthly data, and another which has quarterly data as follows:
monthly = [
{
"name": "Boston",
"month": "2015-May",
"total_monthly": 2
},
{
"name": "Boston",
"month": "2015-June",
"total_monthly": 8
},
{
"name": "Chicago",
"month": "2015-May",
"total_monthly": 10
},
{
"name": "Chicago",
"month": "2015-June",
"total_monthly": 13
}
]
quarterly =[
{
"name": "Boston",
"quarter": "2015-Q1",
"total_quarterly": 23
},
{
"name": "Boston",
"quarter": "2015-Q2",
"total_quarterly": 24
},
{
"name": "Chicago",
"quarter": "2015-Q1",
"total_quarterly": 40
},
{
"name": "Chicago",
"quarter": "2015-Q2",
"total_quarterly": 32
}
]
Conventionally, I can iterate through the lists and merge them based on common names. However, how can I achieve the merged data as follows using Pandas?
merged = [
{
"name": "Boston",
"trend_monthly" : [
{
"month": "2015-May",
"total_monthly": 2
},
{
"month": "2015-June",
"total_monthly": 8
},
],
"trend_quarterly" : [
{
"quarter": "2015-Q1",
"total_quarterly": 23
},
{
"quarter": "2015-Q2",
"total_quarterly": 24
},
]
},
{
"name": "Chicago",
"trend_monthly" : [
{
"month": "2015-May",
"total_monthly": 10
},
{
"month": "2015-June",
"total_monthly": 13
},
],
"trend_quarterly" : [
{
"quarter": "2015-Q1",
"total_quarterly": 40
},
{
"quarter": "2015-Q2",
"total_quarterly": 32
},
]
}]

You have to do something like this:
import pandas as pd
df_monthly = pd.DataFrame(monthly)
df_quarterly = pd.DataFrame(quarterly)
df = pd.concat([df_monthly, df_quarterly])
# This part does not group correctly, please edit for your needs
result = []
dict_monthly = dict(list(df[df.month.notnull()][['name',
'month',
'total_monthly']
].groupby(by='name')))
dict_quarterly = dict(list(df[df.quarter.notnull()][['name',
'quarter',
'total_quarterly']
].groupby(by='name')))
result.append(dict_monthly)
result.append(dict_quarterly)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Parsing and creating nested dictionaries - python

Related

Get fields from a JSON file with Python

How to map the dictionary values to another dictionary

Pandas dataframe and conversion to Json

Pandas Dataframe to JSON Hierarchy

Merge two lists of dicts in python using pandas

Categories

Resources