Parsing and creating nested dictionaries - python

I would like to create a dictionary containing a nested structure of dictionaries, like bellow :
{
"Jaque": {
"ES": {
"Madrid": [
{
"experience": 9
}
]
},
"FR": {
"Lyon": [
{
"experience": 11.4
}
],
"Paris": [
{
"experience": 20
}
]
}
},
"James": {
"UK": {
"London": [
{
"experience": 10.9
}
]
}
},
"Henry": {
"UK": {
"London": [
{
"experience": 15
}
]
}
},
"Joe": {
"US": {
"Boston": [
{
"experience": 100
}
]
}
}
}
}
My input is a list of dictionaries of this format:
c = [{
"country": "US",
"city": "Boston",
"name": "Joe",
"experience": 100
},
{
"country": "FR",
"city": "Paris",
"name": "Jaque",
"experience": 20
},
{
"country": "FR",
"city": "Lyon",
"name": "Jaque",
"experience": 11.4
},
{
"country": "ES",
"city": "Madrid",
"name": "Jaque",
"experience": 9
},
{
"country": "UK",
"city": "London",
"name": "Henry",
"experience": 15
},
{
"country": "UK",
"city": "London",
"name": "James",
"experience": 10.9
}
]
My first approach was to create the nested dict, step by step:
dd = dict.fromkeys([i.get("name") for i in c],defaultdict(dict))
#will create
# dd = {'Joe': defaultdict(<class 'dict'>, {}), 'Jaque': defaultdict(<class 'dict'>, {}), 'James': defaultdict(<class 'dict'>, {}), 'Henry': defaultdict(<class 'dict'>, {})}
for i in dd:
for j in c:
#verify if name from d is in dict j
if i in j.values():
dd[i]=dict(zip([a.get("country") for a in c if i in a.values() ],[b.get("city") for b in c if i in b.values() ]))
# dd will become
#{'Joe': {'US': 'Boston'}, 'Jaque': {'FR': 'Lyon', 'ES': 'Madrid'}, 'Henry': {'UK': 'London'}, 'James': {'UK': 'London'}}
Now I can't figure a way to create/update the nested structure of dict dd. Is there a more dynamic way to create dict? Thx

You could use itertools.groupby to organize the list similarly to your expected output and then loop to convert to a dict.
from itertools import groupby
from operator import itemgetter
data = [{"country": "US", "city": "Boston", "name": "Joe", "experience": 100 }, {"country": "FR", "city": "Paris", "name": "Jaque", "experience": 20 }, {"country": "FR", "city": "Lyon", "name": "Jaque", "experience": 11.4 }, {"country": "ES", "city": "Madrid", "name": "Jaque", "experience": 9 }, {"country": "UK", "city": "London", "name": "Henry", "experience": 15 }, {"country": "UK", "city": "London", "name": "James", "experience": 10.9 } ]
result = {}
for key, values in groupby(sorted(data, key=itemgetter('name')), key=itemgetter('name')):
result[key] = {
v['country']: {v['city']: [{'experience': v['experience']}]} for v in values
}
print(result)
# {'Henry': {'UK': {'London': [{'experience': 15}]}}, 'James': {'UK': {'London': [{'experience': 10.9}]}}, 'Jaque': {'FR': {'Lyon': [{'experience': 11.4}]}, 'ES': {'Madrid': [{'experience': 9}]}}, 'Joe': {'US': {'Boston': [{'experience': 100}]}}}

You can use recursion with itertools.groupby:
from itertools import groupby
def group(d, keys = None):
key, *keys = keys
new_d = {a:list(b) for a, b in groupby(sorted(d, key=lambda x:x[key]), key=lambda x:x[key])}
t = {a:[{c:d for c, d in k.items() if c != key} for k in b] for a, b in new_d.items()}
return {a:group(b, keys) if not all(len(i) == 1 for i in b) else b for a, b in t.items()}
result = group(data, keys = ['name', 'country', 'city', 'experience'])
import json
print(json.dumps(result, indent=4)))
Output:
{
"Henry": {
"UK": {
"London": [
{
"experience": 15
}
]
}
},
"James": {
"UK": {
"London": [
{
"experience": 10.9
}
]
}
},
"Jaque": {
"ES": {
"Madrid": [
{
"experience": 9
}
]
},
"FR": {
"Lyon": [
{
"experience": 11.4
}
],
"Paris": [
{
"experience": 20
}
]
}
},
"Joe": {
"US": {
"Boston": [
{
"experience": 100
}
]
}
}
}

Related

Get fields from a JSON file with Python

I have this json file loaded in Python with json.loads('myfile.json'):
[
{
"cart": {
"items": {
"3154ba405e5c5a22bbdf9bf1": {
"item": {
"_id": "3154ba405e5c5a22bbdf9bf1",
"title": "Drink alla cannella",
"price": 5.65,
"__v": 0
},
"qty": 1,
"price": 5.65
}
},
"totalQty": 1,
"totalPrice": 5.65
}
},
{
"cart": {
"items": {
"6214ba405e4c5a31bbdf9ad7": {
"item": {
"_id": "6214ba405e4c5a31bbdf9ad7",
"title": "Drink alla menta",
"price": 5.65,
"__v": 0
},
"qty": 2,
"price": 11.3
}
},
"totalQty": 2,
"totalPrice": 11.3
}
}
]
How I can access to both totalQty and totalPrice fields at same time and sum them?
How I can access to both Title fields to print it?
Let's assume that you have the JSON data available as a string then:
jdata = '''
[
{
"cart": {
"items": {
"3154ba405e5c5a22bbdf9bf1": {
"item": {
"_id": "3154ba405e5c5a22bbdf9bf1",
"title": "Drink alla cannella",
"price": 5.65,
"__v": 0
},
"qty": 1,
"price": 5.65
}
},
"totalQty": 1,
"totalPrice": 5.65
}
},
{
"cart": {
"items": {
"6214ba405e4c5a31bbdf9ad7": {
"item": {
"_id": "6214ba405e4c5a31bbdf9ad7",
"title": "Drink alla menta",
"price": 5.65,
"__v": 0
},
"qty": 2,
"price": 11.3
}
},
"totalQty": 2,
"totalPrice": 11.3
}
}
]
'''
totalQty = 0
totalPrice = 0
for d in json.loads(jdata):
c = d['cart']
totalQty += c['totalQty']
totalPrice += c['totalPrice']
for sd in c['items'].values():
print(sd['item']['title'])
print(f'{totalQty:d}', f'{totalPrice:.2f}')
Output:
3 16.95
Note:
I suspect that what you really want to do is multiply those two values

How to map the dictionary values to another dictionary

I have dictionary which is below
{
"aggregations": {
"A": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{ "key": "ADL", "doc_count": 1 },
{ "key": "SDD", "doc_count": 1 },
{ "key": "JJD", "doc_count": 1 }
]
},
"B": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{ "key": "ABC", "doc_count": 1 },
{ "key": "CDE", "doc_count": 1 },
{ "key": "FGH", "doc_count": 1 }
]
},
"C": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{ "key": "XYX", "doc_count": 1 },
{ "key": "NXS", "doc_count": 1 }
]
}
}
}
aggregations.keys will be aggregationfilters.fieldName
aggregations.buckets.key will be aggregationfilters.values.title
aggregationfilters.values.paragraph is null everytime
aggregations.buckets.doc_count will be aggregationfilters.values.count
Basically I need to extract aggregations.keys and aggregations.bucket values and put into different dictionary.
Need to write a general code structure to do that.
I cannot do with .pop(rename) the dictioanry
My expected out
{
"aggregationfilters": [
{
"name": "ABC",
"fieldName": "A",
"values": [
{ "title": "ADL", "paragraph": null, "count": 1 },
{ "title": "SDD", "paragraph": null, "count": 1 },
{ "title": "JJD", "paragraph": null, "count": 1 }
]
}, {
"name": "CDE",
"fieldName": "B",
"values": [
{ "title": "ABC", "paragraph": null, "count": 1 },
{ "title": "CDE", "paragraph": null, "count": 1 },
{ "title": "FGH", "paragraph": null, "count": 1 }
]
}, {
"name": "FGH",
"fieldName": "C",
"values": [
{ "title": "XYX", "paragraph": null, "count": 1 },
{ "title": "NXS", "paragraph": null, "count": 1 }
]
}
]
}
Well, this works, but even with my best effort this still doesn't look that clean.
import json
source = {
"aggregations": {
"A": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{"key": "ADL", "doc_count": 1},
{"key": "SDD", "doc_count": 1},
{"key": "JJD", "doc_count": 1},
],
},
"B": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{"key": "ABC", "doc_count": 1},
{"key": "CDE", "doc_count": 1},
{"key": "FGH", "doc_count": 1},
],
},
"C": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{"key": "XYX", "doc_count": 1}, {"key": "NXS", "doc_count": 1}],
},
}
}
convert_map = {
"buckets": "values",
"doc_count": "count",
"key": "title",
}
remove_map = {"sum_other_doc_count", "doc_count_error_upper_bound"}
add_map = {"name": "Changed VAL_", "fieldName": "VAL_"}
def converting_generator(
source_: dict, convert_map_: dict, remove_map_: set, add_map_: dict
):
working_dict = {k: v for k, v in source_.items()}
variable_identifier = "VAL_"
for key, inner_dic in working_dict.items():
inner_dic: dict
for rm_key in remove_map_:
try:
inner_dic.pop(rm_key)
except KeyError:
pass
for add_key, add_val in add_map_.items():
inner_dic[add_key] = add_val.replace(variable_identifier, key)
dumped = json.dumps(inner_dic, indent=2)
for original, target in convert_map_.items():
dumped = dumped.replace(original, target)
yield json.loads(dumped)
converted = {
"aggregation_filters": list(
converting_generator(source["aggregations"], convert_map, remove_map, add_map)
)
}
for inner_dict in converted["aggregation_filters"]:
for even_inner_dict in inner_dict["values"]:
even_inner_dict["paragraph"] = None
print(json.dumps(converted, indent=2))
Output:
{
"aggregation_filters": [
{
"values": [
{
"title": "ADL",
"count": 1,
"paragraph": null
},
{
"title": "SDD",
"count": 1,
"paragraph": null
},
{
"title": "JJD",
"count": 1,
"paragraph": null
}
],
"name": "Changed A",
"fieldName": "A"
},
{
"values": [
{
"title": "ABC",
"count": 1,
"paragraph": null
},
{
"title": "CDE",
"count": 1,
"paragraph": null
},
{
"title": "FGH",
"count": 1,
"paragraph": null
}
],
"name": "Changed B",
"fieldName": "B"
},
{
"values": [
{
"title": "XYX",
"count": 1,
"paragraph": null
},
{
"title": "NXS",
"count": 1,
"paragraph": null
}
],
"name": "Changed C",
"fieldName": "C"
}
]
}
Always show your code, would be nice if that's a working one - to show that you've put at least that worth of the effort on your problem.
I don't bother it as this feels like puzzle solving, but others may not.

Pandas dataframe and conversion to Json

Basically I´m reading a pandas dataframe and converting it to Json. I´m a beginner in coding, but I know that is preferable to use apply function instead iterrows (and I already tried to use apply function, but some difficulties in understand the syntax and find out my solution arose)!!
===============================
Data that I´m reading from excel
id label id_customer label_customer part_number number_customer product label_product key country value_product
6 Sao Paulo CUST-99992 Brazil 982 10 sho1564 shoes SH-99 Chile 1.5
6 Sao Paulo CUST-99992 Brazil 982 10 sn47282 sneakers SN-71 Germany 43.8
6 Sao Paulo CUST-43535 Argentina 435 15 sk84393 skirt SK-11 Netherlands 87.1
92 Hong Hong CUST-88888 China 785 58 ca40349 cap CA-82 Russia 3.95
===============================
CODE:
import pandas as pd
import json
df = pd.read_excel(path)
result = []
for labels, df1 in df.groupby(['id', 'label'],sort=False):
id_, label = labels
record = {'id': int(id_), 'label': label, 'Customer': []}
for inner_labels, df2 in df1.groupby(['id_customer', 'label_customer'],sort=False):
id_,label = inner_labels
record['Customer'].append({
'id': id_,
'label': label,
'Number': [{'part': str(p), 'number_customer': str(s)} for p, s in zip(df2['part_number'], df2['number_customer'])]
})
result.append(record)
===============================
Json I'm getting:
[
{
"id": 6,
"label": "Sao Paulo",
"Customer": [
{
"id": "CUST-99992",
"label": "Brazil",
"Number": [
{
"part": "982",
"number_customer": "10"
},
{
"part": "982",
"number_customer": "10"
}
]
},
{
"id": "CUST-43535",
"label": "Argentina",
"Number": [
{
"part": "435",
"number_customer": "15"
}
]
}
]
},
{
"id": 92,
"label": "Hong Kong",
"Customer": [
{
"id": "CUST-88888",
"label": "China",
"Number": [
{
"part": "785",
"number_customer": "58"
}
]
}
]
}
]
===============================
Json expected:
[
{
"id": 6,
"label": "Sao Paulo",
"Customer": [
{
"id": "CUST-99992",
"label": "Brazil",
"Number": [
{
"part": "982",
"number_customer": "10",
"Procucts": [
{
"product": "sho1564",
"label_product": "shoes",
"Order": [
{
"key": "SH-99",
"country": "Chile",
"value_product": "1.5"
}
]
},
{
"product": "sn47282",
"label_product": "sneakers",
"Order": [
{
"key": "SN-71",
"country": "Germany",
"value_product": "43.8"
}
]
}
]
}
]
},
{
"id": "CUST-43535",
"label": "Argentina",
"Number": [
{
"part": "435",
"number_customer": "15",
"Procucts": [
{
"product": "sk84393",
"label_product": "skirt",
"Order": [
{
"key": "SK-11",
"country": "Netherlands",
"value_product": "87.1"
}
]
}
]
}
]
}
]
},
{
"id": 92,
"label": "Hong Kong",
"Customer": [
{
"id": "CUST-88888",
"label": "China",
"Number": [
{
"part": "785",
"number_customer": "58",
"Procucts": [
{
"product": "ca40349",
"label_product": "cap",
"Order": [
{
"key": "CA-82",
"country": "Russia",
"value_product": "3.95"
}
]
}
]
}
]
}
]
}
]
===============================
Look that id and label is group of information even as id_customer and label customer is another group, part_number and number_customer is another, product and label_product another, key, country and value_product another.
My expected Json depends of my information inside my dataframe.
Can somebody help me in any way pls?
import pandas as pd
import json
df = pd.read_excel(path)
result = []
for labels, df1 in df.groupby(['id', 'label'], sort=False):
id_, label = labels
record = {'id': int(id_), 'label': label, 'Customer': []}
for inner_labels, df2 in df1.groupby(['id_customer', 'label_customer'], sort=False):
id_, label = inner_labels
customer = {'id': id_, 'label': label, 'Number': []}
for inner_labels, df3 in df2.groupby(['part_number', 'number_customer'], sort=False):
p, s = inner_labels
number = {'part': str(p), 'number_customer': str(s), 'Products': []}
for inner_labels, df4 in df3.groupby(['product', 'label_product'], sort=False):
p, lp = inner_labels
product = {'product': p, 'label_product': lp, 'Order': []}
for k, c, v in zip(df4['key'], df4['country'], df4['value_product']):
product['Order'].append({'key': k, 'country': c, 'value_product': v})
number['Products'].append(product)
customer['Number'].append(number)
record['Customer'].append(customer)
result.append(record)
Hope this is of use!
from io import StringIO
import pandas as pd
import json
csv = """id,label,id_customer,label_customer,part_number,number_customer,product,label_product,key,country,value_product
6,Sao Paulo,CUST-99992,Brazil,982,10,sho1564,shoes,SH-99,Chile,1.5
6,Sao Paulo,CUST-99992,Brazil,982,10,sn47282,sneakers,SN-71,Germany,43.8
6,Sao Paulo,CUST-43535,Argentina,435,15,sk84393,skirt,SK-11,Netherlands,87.1
92,Hong Hong,CUST-88888,China,785,58,ca40349,cap,CA-82,Russia,3.95"""
csv = StringIO(csv)
df = pd.read_csv(csv)
def split(df, groupby, json_func):
for x, group in df.groupby(groupby):
yield json_func(group, *x)
a = list(split(df, ['id', 'label'], lambda grp, id_, label: {"id": id_, "label": label, "Customer": list(
split(grp, ['id_customer', 'label_customer'], lambda grp_1, id_cust, label_cust: {"id": id_cust, "label": label_cust, "Number": list(
split(grp_1, ['part_number', 'number_customer'], lambda grp_2, part, num_cust: {"part": part, "number_customer": num_cust, "Products": list(
split(grp_2, ['product', 'label_product'], lambda grp_3, product, label_product: {"product": product, "label_product": label_product, "Order": list(
split(grp_3, ['key', 'country', 'value_product'], lambda _, key, country, value_product: {"key": key, "country": country, "value_product": value_product}))}
))})
)}))}))
display(a)

Pandas Dataframe to JSON Hierarchy

I have exhaustively reviewed/attempted implementations all the other questions on SO corresponding to this challenge and have yet to reach a solution.
Question: how do I convert employee and supervisor pairs into a hierarchical JSON structure to be used for a D3 visualization? There are an unknown number of levels, so it has to be dynamic.
I have a dataframe with five columns (yes, I realize this isn't the actual hierarchy of The Office):
Employee_FN Employee_LN Supervisor_FN Supervisor_LN Level
0 Michael Scott None None 0
1 Jim Halpert Michael Scott 1
2 Dwight Schrute Michael Scott 1
3 Stanley Hudson Jim Halpert 2
4 Pam Beasley Jim Halpert 2
5 Ryan Howard Pam Beasley 3
6 Kelly Kapoor Ryan Howard 4
7 Meredith Palmer Ryan Howard 4
Desired Output Snapshot:
{
"Employee_FN": "Michael",
"Employee_LN": "Scott",
"Level": "0",
"Reports": [{
"Employee_FN": "Jim",
"Employee_LN": "Halpert",
"Level": "1",
"Reports": [{
"Employee_FN": "Stanley",
"Employee_LN": "Hudson",
"Level": "2",
}, {
"Employee_FN": "Pam",
"Employee_LN": "Beasley",
"Level": "2",
}]
}]
}
Current State:
j = (df.groupby(['Level','Employee_FN','Employee_LN'], as_index=False)
.apply(lambda x: x[['Level','Employee_FN','Employee_LN']].to_dict('r'))
.reset_index()
.rename(columns={0:'Reports'})
.to_json(orient='records'))
print(json.dumps(json.loads(j), indent=2, sort_keys=True))
Current Output:
[
{
"Employee_FN": "Michael",
"Employee_LN": "Scott",
"Level": 0,
"Reports": [
{
"Employee_FN": "Michael",
"Employee_LN": "Scott",
"Level": 0
}
]
},
{
"Employee_FN": "Dwight",
"Employee_LN": "Schrute",
"Level": 1,
"Reports": [
{
"Employee_FN": "Dwight",
"Employee_LN": "Schrute",
"Level": 1
}
]
},
{
"Employee_FN": "Jim",
"Employee_LN": "Halpert",
"Level": 1,
"Reports": [
{
"Employee_FN": "Jim",
"Employee_LN": "Halpert",
"Level": 1
}
]
},
{
"Employee_FN": "Pam",
"Employee_LN": "Beasley",
"Level": 2,
"Reports": [
{
"Employee_FN": "Pam",
"Employee_LN": "Beasley",
"Level": 2
}
]
},
{
"Employee_FN": "Stanley",
"Employee_LN": "Hudson",
"Level": 2,
"Reports": [
{
"Employee_FN": "Stanley",
"Employee_LN": "Hudson",
"Level": 2
}
]
},
{
"Employee_FN": "Ryan",
"Employee_LN": "Howard",
"Level": 3,
"Reports": [
{
"Employee_FN": "Ryan",
"Employee_LN": "Howard",
"Level": 3
}
]
},
{
"Employee_FN": "Kelly",
"Employee_LN": "Kapoor",
"Level": 4,
"Reports": [
{
"Employee_FN": "Kelly",
"Employee_LN": "Kapoor",
"Level": 4
}
]
},
{
"Employee_FN": "Meredith",
"Employee_LN": "Palmer",
"Level": 4,
"Reports": [
{
"Employee_FN": "Meredith",
"Employee_LN": "Palmer",
"Level": 4
}
]
}
]
Problems:
Each person only has themselves as children
The whole JSON structure appears to be in a dict - I believe it has to be enclosed by {} to be readable
I have tried switched around the groupby and lambda elements in various configurations to reach the desired output as well. Any and all insight would be greatly appreciated! Thank you!
Update:
I changed my code block to this:
j = (df.groupby(['Level','Supervisor_FN','Supervisor_LN'], as_index=False)
.apply(lambda x: x[['Level','Employee_FN','Employee_LN']].to_dict('r'))
.reset_index()
.rename(columns={0:'Reports'})
.rename(columns={'Supervisor_FN':'Employee_FN'})
.rename(columns={'Supervisor_LN':'Employee_LN'})
.to_json(orient='records'))
print(json.dumps(json.loads(j), indent=2, sort_keys=True))
The new output is this:
[
{
"Employee_FN": "Michael",
"Employee_LN": "Scott",
"Level": 1,
"Reports": [
{
"Employee_FN": "Jim",
"Employee_LN": "Halpert",
"Level": 1
},
{
"Employee_FN": "Dwight",
"Employee_LN": "Schrute",
"Level": 1
}
]
},
{
"Employee_FN": "Jim",
"Employee_LN": "Halpert",
"Level": 2,
"Reports": [
{
"Employee_FN": "Stanley",
"Employee_LN": "Hudson",
"Level": 2
},
{
"Employee_FN": "Pam",
"Employee_LN": "Beasley",
"Level": 2
}
]
},
{
"Employee_FN": "Pam",
"Employee_LN": "Beasley",
"Level": 3,
"Reports": [
{
"Employee_FN": "Ryan",
"Employee_LN": "Howard",
"Level": 3
}
]
},
{
"Employee_FN": "Ryan",
"Employee_LN": "Howard",
"Level": 4,
"Reports": [
{
"Employee_FN": "Kelly",
"Employee_LN": "Kapoor",
"Level": 4
},
{
"Employee_FN": "Meredith",
"Employee_LN": "Palmer",
"Level": 4
}
]
}
]
Problems:
The Level matches the underlying employee for both the underlying employee and the supervisor
The nesting only goes one level deep
This type of problem isn't particularly well-suited for Pandas; the data structure you're going after is recursive, not tabular.
Here is one possible solution.
from operator import itemgetter
employee_key = itemgetter('Employee_FN', 'Employee_LN')
supervisor_key = itemgetter('Supervisor_FN', 'Supervisor_LN')
def subset(dict_, keys):
return {k: dict_[k] for k in keys}
# store employee references
cache = {}
# iterate over employees sorted by level, so supervisors are cached before reports
for row in df.sort_values('Level').to_dict('records'):
# look up employee/supervisor references
employee = cache.setdefault(employee_key(row), subset(row, keys=('Employee_FN', 'Employee_LN', 'Level')))
supervisor = cache.get(supervisor_key(row), {})
# link reports to employee
supervisor.setdefault('Reports', []).append(employee)
# grab only top-level employees
[rec for key, rec in cache.iteritems() if rec['Level'] == 0]
[{'Employee_FN': 'Michael',
'Employee_LN': 'Scott',
'Level': 0,
'Reports': [{'Employee_FN': 'Jim',
'Employee_LN': 'Halpert',
'Level': 1,
'Reports': [{'Employee_FN': 'Stanley',
'Employee_LN': 'Hudson',
'Level': 2},
{'Employee_FN': 'Pam',
'Employee_LN': 'Beasley',
'Level': 2,
'Reports': [{'Employee_FN': 'Ryan',
'Employee_LN': 'Howard',
'Level': 3,
'Reports': [{'Employee_FN': 'Kelly',
'Employee_LN': 'Kapoor',
'Level': 4},
{'Employee_FN': 'Meredith',
'Employee_LN': 'Palmer',
'Level': 4}]}]}]},
{'Employee_FN': 'Dwight', 'Employee_LN': 'Schrute', 'Level': 1}]}]

Merge two lists of dicts in python using pandas

I have two lists of dicts: one which has monthly data, and another which has quarterly data as follows:
monthly = [
{
"name": "Boston",
"month": "2015-May",
"total_monthly": 2
},
{
"name": "Boston",
"month": "2015-June",
"total_monthly": 8
},
{
"name": "Chicago",
"month": "2015-May",
"total_monthly": 10
},
{
"name": "Chicago",
"month": "2015-June",
"total_monthly": 13
}
]
quarterly =[
{
"name": "Boston",
"quarter": "2015-Q1",
"total_quarterly": 23
},
{
"name": "Boston",
"quarter": "2015-Q2",
"total_quarterly": 24
},
{
"name": "Chicago",
"quarter": "2015-Q1",
"total_quarterly": 40
},
{
"name": "Chicago",
"quarter": "2015-Q2",
"total_quarterly": 32
}
]
Conventionally, I can iterate through the lists and merge them based on common names. However, how can I achieve the merged data as follows using Pandas?
merged = [
{
"name": "Boston",
"trend_monthly" : [
{
"month": "2015-May",
"total_monthly": 2
},
{
"month": "2015-June",
"total_monthly": 8
},
],
"trend_quarterly" : [
{
"quarter": "2015-Q1",
"total_quarterly": 23
},
{
"quarter": "2015-Q2",
"total_quarterly": 24
},
]
},
{
"name": "Chicago",
"trend_monthly" : [
{
"month": "2015-May",
"total_monthly": 10
},
{
"month": "2015-June",
"total_monthly": 13
},
],
"trend_quarterly" : [
{
"quarter": "2015-Q1",
"total_quarterly": 40
},
{
"quarter": "2015-Q2",
"total_quarterly": 32
},
]
}]
You have to do something like this:
import pandas as pd
df_monthly = pd.DataFrame(monthly)
df_quarterly = pd.DataFrame(quarterly)
df = pd.concat([df_monthly, df_quarterly])
# This part does not group correctly, please edit for your needs
result = []
dict_monthly = dict(list(df[df.month.notnull()][['name',
'month',
'total_monthly']
].groupby(by='name')))
dict_quarterly = dict(list(df[df.quarter.notnull()][['name',
'quarter',
'total_quarterly']
].groupby(by='name')))
result.append(dict_monthly)
result.append(dict_quarterly)

Categories

Resources