Merge two lists of dicts in python using pandas

Merge two lists of dicts in python using pandas - python

I have two lists of dicts: one which has monthly data, and another which has quarterly data as follows:
monthly = [
{
"name": "Boston",
"month": "2015-May",
"total_monthly": 2
},
{
"name": "Boston",
"month": "2015-June",
"total_monthly": 8
},
{
"name": "Chicago",
"month": "2015-May",
"total_monthly": 10
},
{
"name": "Chicago",
"month": "2015-June",
"total_monthly": 13
}
]
quarterly =[
{
"name": "Boston",
"quarter": "2015-Q1",
"total_quarterly": 23
},
{
"name": "Boston",
"quarter": "2015-Q2",
"total_quarterly": 24
},
{
"name": "Chicago",
"quarter": "2015-Q1",
"total_quarterly": 40
},
{
"name": "Chicago",
"quarter": "2015-Q2",
"total_quarterly": 32
}
]
Conventionally, I can iterate through the lists and merge them based on common names. However, how can I achieve the merged data as follows using Pandas?
merged = [
{
"name": "Boston",
"trend_monthly" : [
{
"month": "2015-May",
"total_monthly": 2
},
{
"month": "2015-June",
"total_monthly": 8
},
],
"trend_quarterly" : [
{
"quarter": "2015-Q1",
"total_quarterly": 23
},
{
"quarter": "2015-Q2",
"total_quarterly": 24
},
]
},
{
"name": "Chicago",
"trend_monthly" : [
{
"month": "2015-May",
"total_monthly": 10
},
{
"month": "2015-June",
"total_monthly": 13
},
],
"trend_quarterly" : [
{
"quarter": "2015-Q1",
"total_quarterly": 40
},
{
"quarter": "2015-Q2",
"total_quarterly": 32
},
]
}]

You have to do something like this:
import pandas as pd
df_monthly = pd.DataFrame(monthly)
df_quarterly = pd.DataFrame(quarterly)
df = pd.concat([df_monthly, df_quarterly])
# This part does not group correctly, please edit for your needs
result = []
dict_monthly = dict(list(df[df.month.notnull()][['name',
'month',
'total_monthly']
].groupby(by='name')))
dict_quarterly = dict(list(df[df.quarter.notnull()][['name',
'quarter',
'total_quarterly']
].groupby(by='name')))
result.append(dict_monthly)
result.append(dict_quarterly)

Related

KeyError: "Key 'Record_Path' not found. If specifying a record_path, all elements of data should have the path."

I am trying to organize a json response from a URL into a panda dataframe but I am having issues getting at the nested data.
import requests
import json
import numpy as np
from pandas import json_normalize
series = 'f1'
season = 2022
ssnround = '1'
laps = 3
url = "http://ergast.com/api/f1/2011/5/laps/1.json"
record_path = ['Races']
meta = ['driverId', 'position', 'time']
r = requests.get(url = url)
data = json.loads(r.content)
df = pd.json_normalize(data)
df
I am trying to create a table of all the driverIds, their Position and their lap time. However, whenever I use a record_path for example df = pd.json_normalize(data, record_path, meta) I get a Key Error. What am I missing?
The json data looks like this at the URL:
{
"MRData": {
"xmlns": "http://ergast.com/mrd/1.5",
"series": "f1",
"url": "http://ergast.com/api/f1/2011/5/laps/1.json",
"limit": "30",
"offset": "0",
"total": "24",
"RaceTable": {
"season": "2011",
"round": "5",
"Races": [
{
"season": "2011",
"round": "5",
"url": "http://en.wikipedia.org/wiki/2011_Spanish_Grand_Prix",
"raceName": "Spanish Grand Prix",
"Circuit": {
"circuitId": "catalunya",
"url": "http://en.wikipedia.org/wiki/Circuit_de_Barcelona-Catalunya",
"circuitName": "Circuit de Barcelona-Catalunya",
"Location": {
"lat": "41.57",
"long": "2.26111",
"locality": "Montmeló",
"country": "Spain"
}
},
"date": "2011-05-22",
"time": "12:00:00Z",
"Laps": [
{
"number": "1",
"Timings": [
{
"driverId": "alonso",
"position": "1",
"time": "1:34.494"
},
{
"driverId": "vettel",
"position": "2",
"time": "1:35.274"
},
{
"driverId": "webber",
"position": "3",
"time": "1:36.329"
},
{
"driverId": "hamilton",
"position": "4",
"time": "1:36.991"
},
{
"driverId": "petrov",
"position": "5",
"time": "1:38.084"
},
{
"driverId": "michael_schumacher",
"position": "6",
"time": "1:38.633"
},
{
"driverId": "rosberg",
"position": "7",
"time": "1:39.139"
},
{
"driverId": "massa",
"position": "8",
"time": "1:39.979"
},
{
"driverId": "buemi",
"position": "9",
"time": "1:40.611"
},
{
"driverId": "button",
"position": "10",
"time": "1:40.998"
},
{
"driverId": "perez",
"position": "11",
"time": "1:41.433"
},
{
"driverId": "alguersuari",
"position": "12",
"time": "1:41.876"
},
{
"driverId": "maldonado",
"position": "13",
"time": "1:42.255"
},
{
"driverId": "resta",
"position": "14",
"time": "1:42.808"
},
{
"driverId": "trulli",
"position": "15",
"time": "1:43.553"
},
{
"driverId": "kovalainen",
"position": "16",
"time": "1:44.276"
},
{
"driverId": "heidfeld",
"position": "17",
"time": "1:45.164"
},
{
"driverId": "sutil",
"position": "18",
"time": "1:46.107"
},
{
"driverId": "liuzzi",
"position": "19",
"time": "1:46.737"
},
{
"driverId": "barrichello",
"position": "20",
"time": "1:47.077"
},
{
"driverId": "glock",
"position": "21",
"time": "1:47.556"
},
{
"driverId": "karthikeyan",
"position": "22",
"time": "1:48.183"
},
{
"driverId": "ambrosio",
"position": "23",
"time": "1:48.573"
},
{
"driverId": "kobayashi",
"position": "24",
"time": "1:57.590"
}
]
}
]
}
]
}
}
}

Try to construct dataframe without .json_normalize:
import requests
import pandas as pd
url = "http://ergast.com/api/f1/2011/5/laps/1.json"
r = requests.get(url=url)
df = pd.DataFrame(
r.json()["MRData"]["RaceTable"]["Races"][0]["Laps"][0]["Timings"]
)
print(df)
Prints:
driverId position time
0 alonso 1 1:34.494
1 vettel 2 1:35.274
2 webber 3 1:36.329
3 hamilton 4 1:36.991
4 petrov 5 1:38.084
5 michael_schumacher 6 1:38.633
6 rosberg 7 1:39.139
7 massa 8 1:39.979
8 buemi 9 1:40.611
9 button 10 1:40.998
10 perez 11 1:41.433
11 alguersuari 12 1:41.876
12 maldonado 13 1:42.255
13 resta 14 1:42.808
14 trulli 15 1:43.553
15 kovalainen 16 1:44.276
16 heidfeld 17 1:45.164
17 sutil 18 1:46.107
18 liuzzi 19 1:46.737
19 barrichello 20 1:47.077
20 glock 21 1:47.556
21 karthikeyan 22 1:48.183
22 ambrosio 23 1:48.573
23 kobayashi 24 1:57.590

Get fields from a JSON file with Python

I have this json file loaded in Python with json.loads('myfile.json'):
[
{
"cart": {
"items": {
"3154ba405e5c5a22bbdf9bf1": {
"item": {
"_id": "3154ba405e5c5a22bbdf9bf1",
"title": "Drink alla cannella",
"price": 5.65,
"__v": 0
},
"qty": 1,
"price": 5.65
}
},
"totalQty": 1,
"totalPrice": 5.65
}
},
{
"cart": {
"items": {
"6214ba405e4c5a31bbdf9ad7": {
"item": {
"_id": "6214ba405e4c5a31bbdf9ad7",
"title": "Drink alla menta",
"price": 5.65,
"__v": 0
},
"qty": 2,
"price": 11.3
}
},
"totalQty": 2,
"totalPrice": 11.3
}
}
]
How I can access to both totalQty and totalPrice fields at same time and sum them?
How I can access to both Title fields to print it?

Let's assume that you have the JSON data available as a string then:
jdata = '''
[
{
"cart": {
"items": {
"3154ba405e5c5a22bbdf9bf1": {
"item": {
"_id": "3154ba405e5c5a22bbdf9bf1",
"title": "Drink alla cannella",
"price": 5.65,
"__v": 0
},
"qty": 1,
"price": 5.65
}
},
"totalQty": 1,
"totalPrice": 5.65
}
},
{
"cart": {
"items": {
"6214ba405e4c5a31bbdf9ad7": {
"item": {
"_id": "6214ba405e4c5a31bbdf9ad7",
"title": "Drink alla menta",
"price": 5.65,
"__v": 0
},
"qty": 2,
"price": 11.3
}
},
"totalQty": 2,
"totalPrice": 11.3
}
}
]
'''
totalQty = 0
totalPrice = 0
for d in json.loads(jdata):
c = d['cart']
totalQty += c['totalQty']
totalPrice += c['totalPrice']
for sd in c['items'].values():
print(sd['item']['title'])
print(f'{totalQty:d}', f'{totalPrice:.2f}')
Output:
3 16.95
Note:
I suspect that what you really want to do is multiply those two values

Pandas dataframe and conversion to Json

Basically I´m reading a pandas dataframe and converting it to Json. I´m a beginner in coding, but I know that is preferable to use apply function instead iterrows (and I already tried to use apply function, but some difficulties in understand the syntax and find out my solution arose)!!
===============================
Data that I´m reading from excel
id label id_customer label_customer part_number number_customer product label_product key country value_product
6 Sao Paulo CUST-99992 Brazil 982 10 sho1564 shoes SH-99 Chile 1.5
6 Sao Paulo CUST-99992 Brazil 982 10 sn47282 sneakers SN-71 Germany 43.8
6 Sao Paulo CUST-43535 Argentina 435 15 sk84393 skirt SK-11 Netherlands 87.1
92 Hong Hong CUST-88888 China 785 58 ca40349 cap CA-82 Russia 3.95
===============================
CODE:
import pandas as pd
import json
df = pd.read_excel(path)
result = []
for labels, df1 in df.groupby(['id', 'label'],sort=False):
id_, label = labels
record = {'id': int(id_), 'label': label, 'Customer': []}
for inner_labels, df2 in df1.groupby(['id_customer', 'label_customer'],sort=False):
id_,label = inner_labels
record['Customer'].append({
'id': id_,
'label': label,
'Number': [{'part': str(p), 'number_customer': str(s)} for p, s in zip(df2['part_number'], df2['number_customer'])]
})
result.append(record)
===============================
Json I'm getting:
[
{
"id": 6,
"label": "Sao Paulo",
"Customer": [
{
"id": "CUST-99992",
"label": "Brazil",
"Number": [
{
"part": "982",
"number_customer": "10"
},
{
"part": "982",
"number_customer": "10"
}
]
},
{
"id": "CUST-43535",
"label": "Argentina",
"Number": [
{
"part": "435",
"number_customer": "15"
}
]
}
]
},
{
"id": 92,
"label": "Hong Kong",
"Customer": [
{
"id": "CUST-88888",
"label": "China",
"Number": [
{
"part": "785",
"number_customer": "58"
}
]
}
]
}
]
===============================
Json expected:
[
{
"id": 6,
"label": "Sao Paulo",
"Customer": [
{
"id": "CUST-99992",
"label": "Brazil",
"Number": [
{
"part": "982",
"number_customer": "10",
"Procucts": [
{
"product": "sho1564",
"label_product": "shoes",
"Order": [
{
"key": "SH-99",
"country": "Chile",
"value_product": "1.5"
}
]
},
{
"product": "sn47282",
"label_product": "sneakers",
"Order": [
{
"key": "SN-71",
"country": "Germany",
"value_product": "43.8"
}
]
}
]
}
]
},
{
"id": "CUST-43535",
"label": "Argentina",
"Number": [
{
"part": "435",
"number_customer": "15",
"Procucts": [
{
"product": "sk84393",
"label_product": "skirt",
"Order": [
{
"key": "SK-11",
"country": "Netherlands",
"value_product": "87.1"
}
]
}
]
}
]
}
]
},
{
"id": 92,
"label": "Hong Kong",
"Customer": [
{
"id": "CUST-88888",
"label": "China",
"Number": [
{
"part": "785",
"number_customer": "58",
"Procucts": [
{
"product": "ca40349",
"label_product": "cap",
"Order": [
{
"key": "CA-82",
"country": "Russia",
"value_product": "3.95"
}
]
}
]
}
]
}
]
}
]
===============================
Look that id and label is group of information even as id_customer and label customer is another group, part_number and number_customer is another, product and label_product another, key, country and value_product another.
My expected Json depends of my information inside my dataframe.
Can somebody help me in any way pls?

import pandas as pd
import json
df = pd.read_excel(path)
result = []
for labels, df1 in df.groupby(['id', 'label'], sort=False):
id_, label = labels
record = {'id': int(id_), 'label': label, 'Customer': []}
for inner_labels, df2 in df1.groupby(['id_customer', 'label_customer'], sort=False):
id_, label = inner_labels
customer = {'id': id_, 'label': label, 'Number': []}
for inner_labels, df3 in df2.groupby(['part_number', 'number_customer'], sort=False):
p, s = inner_labels
number = {'part': str(p), 'number_customer': str(s), 'Products': []}
for inner_labels, df4 in df3.groupby(['product', 'label_product'], sort=False):
p, lp = inner_labels
product = {'product': p, 'label_product': lp, 'Order': []}
for k, c, v in zip(df4['key'], df4['country'], df4['value_product']):
product['Order'].append({'key': k, 'country': c, 'value_product': v})
number['Products'].append(product)
customer['Number'].append(number)
record['Customer'].append(customer)
result.append(record)

Hope this is of use!
from io import StringIO
import pandas as pd
import json
csv = """id,label,id_customer,label_customer,part_number,number_customer,product,label_product,key,country,value_product
6,Sao Paulo,CUST-99992,Brazil,982,10,sho1564,shoes,SH-99,Chile,1.5
6,Sao Paulo,CUST-99992,Brazil,982,10,sn47282,sneakers,SN-71,Germany,43.8
6,Sao Paulo,CUST-43535,Argentina,435,15,sk84393,skirt,SK-11,Netherlands,87.1
92,Hong Hong,CUST-88888,China,785,58,ca40349,cap,CA-82,Russia,3.95"""
csv = StringIO(csv)
df = pd.read_csv(csv)
def split(df, groupby, json_func):
for x, group in df.groupby(groupby):
yield json_func(group, *x)
a = list(split(df, ['id', 'label'], lambda grp, id_, label: {"id": id_, "label": label, "Customer": list(
split(grp, ['id_customer', 'label_customer'], lambda grp_1, id_cust, label_cust: {"id": id_cust, "label": label_cust, "Number": list(
split(grp_1, ['part_number', 'number_customer'], lambda grp_2, part, num_cust: {"part": part, "number_customer": num_cust, "Products": list(
split(grp_2, ['product', 'label_product'], lambda grp_3, product, label_product: {"product": product, "label_product": label_product, "Order": list(
split(grp_3, ['key', 'country', 'value_product'], lambda _, key, country, value_product: {"key": key, "country": country, "value_product": value_product}))}
))})
)}))}))
display(a)

Parsing and creating nested dictionaries

I would like to create a dictionary containing a nested structure of dictionaries, like bellow :
{
"Jaque": {
"ES": {
"Madrid": [
{
"experience": 9
}
]
},
"FR": {
"Lyon": [
{
"experience": 11.4
}
],
"Paris": [
{
"experience": 20
}
]
}
},
"James": {
"UK": {
"London": [
{
"experience": 10.9
}
]
}
},
"Henry": {
"UK": {
"London": [
{
"experience": 15
}
]
}
},
"Joe": {
"US": {
"Boston": [
{
"experience": 100
}
]
}
}
}
}
My input is a list of dictionaries of this format:
c = [{
"country": "US",
"city": "Boston",
"name": "Joe",
"experience": 100
},
{
"country": "FR",
"city": "Paris",
"name": "Jaque",
"experience": 20
},
{
"country": "FR",
"city": "Lyon",
"name": "Jaque",
"experience": 11.4
},
{
"country": "ES",
"city": "Madrid",
"name": "Jaque",
"experience": 9
},
{
"country": "UK",
"city": "London",
"name": "Henry",
"experience": 15
},
{
"country": "UK",
"city": "London",
"name": "James",
"experience": 10.9
}
]
My first approach was to create the nested dict, step by step:
dd = dict.fromkeys([i.get("name") for i in c],defaultdict(dict))
#will create
# dd = {'Joe': defaultdict(<class 'dict'>, {}), 'Jaque': defaultdict(<class 'dict'>, {}), 'James': defaultdict(<class 'dict'>, {}), 'Henry': defaultdict(<class 'dict'>, {})}
for i in dd:
for j in c:
#verify if name from d is in dict j
if i in j.values():
dd[i]=dict(zip([a.get("country") for a in c if i in a.values() ],[b.get("city") for b in c if i in b.values() ]))
# dd will become
#{'Joe': {'US': 'Boston'}, 'Jaque': {'FR': 'Lyon', 'ES': 'Madrid'}, 'Henry': {'UK': 'London'}, 'James': {'UK': 'London'}}
Now I can't figure a way to create/update the nested structure of dict dd. Is there a more dynamic way to create dict? Thx

You could use itertools.groupby to organize the list similarly to your expected output and then loop to convert to a dict.
from itertools import groupby
from operator import itemgetter
data = [{"country": "US", "city": "Boston", "name": "Joe", "experience": 100 }, {"country": "FR", "city": "Paris", "name": "Jaque", "experience": 20 }, {"country": "FR", "city": "Lyon", "name": "Jaque", "experience": 11.4 }, {"country": "ES", "city": "Madrid", "name": "Jaque", "experience": 9 }, {"country": "UK", "city": "London", "name": "Henry", "experience": 15 }, {"country": "UK", "city": "London", "name": "James", "experience": 10.9 } ]
result = {}
for key, values in groupby(sorted(data, key=itemgetter('name')), key=itemgetter('name')):
result[key] = {
v['country']: {v['city']: [{'experience': v['experience']}]} for v in values
}
print(result)
# {'Henry': {'UK': {'London': [{'experience': 15}]}}, 'James': {'UK': {'London': [{'experience': 10.9}]}}, 'Jaque': {'FR': {'Lyon': [{'experience': 11.4}]}, 'ES': {'Madrid': [{'experience': 9}]}}, 'Joe': {'US': {'Boston': [{'experience': 100}]}}}

You can use recursion with itertools.groupby:
from itertools import groupby
def group(d, keys = None):
key, *keys = keys
new_d = {a:list(b) for a, b in groupby(sorted(d, key=lambda x:x[key]), key=lambda x:x[key])}
t = {a:[{c:d for c, d in k.items() if c != key} for k in b] for a, b in new_d.items()}
return {a:group(b, keys) if not all(len(i) == 1 for i in b) else b for a, b in t.items()}
result = group(data, keys = ['name', 'country', 'city', 'experience'])
import json
print(json.dumps(result, indent=4)))
Output:
{
"Henry": {
"UK": {
"London": [
{
"experience": 15
}
]
}
},
"James": {
"UK": {
"London": [
{
"experience": 10.9
}
]
}
},
"Jaque": {
"ES": {
"Madrid": [
{
"experience": 9
}
]
},
"FR": {
"Lyon": [
{
"experience": 11.4
}
],
"Paris": [
{
"experience": 20
}
]
}
},
"Joe": {
"US": {
"Boston": [
{
"experience": 100
}
]
}
}
}

nested json file with dictionary into a dataframe

I have a json file as
{
"Date": 2017,
"count": 88,
"demographics": [
{
"key": "age",
"value": "20-30"
},
{
"key": "education",
"value": 'bachelor'
},
{
"key": "income",
"value": "70-80"
},
{
"key": "location",
"value": "USA"
}
]
}
I use the code below to convert it into csv file. It gives me a dataframe with 3 columns of Date, count, and demographics, but the output I need is a dataframe with 6 columns of Date, Count, age, education, income, and location.
with open(r'Sample.json') as f:
data = json.load(f)
dfNormalized=json_normalize(data)

By using apply with pd.Serise:
d={
"Date": 2017,
"count": 88,
"demographics": [
{
"key": "age",
"value": "20-30"
},
{
"key": "education",
"value": 'bachelor'
},
{
"key": "income",
"value": "70-80"
},
{
"key": "location",
"value": "USA"
}
]
}
df=pd.DataFrame(d)
A=(df.demographics.apply(pd.Series).T.loc['value'].to_frame().\
set_index(df.demographics.apply(pd.Series).T.loc['key']).T).\
rename_axis(None,1).reset_index(drop=True)
pd.concat([df.drop('demographics',1).loc[0].to_frame().T,A],1)
Out[273]:
Date count age education income location
0 2017 88 20-30 bachelor 70-80 USA

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Merge two lists of dicts in python using pandas - python

Related

KeyError: "Key 'Record_Path' not found. If specifying a record_path, all elements of data should have the path."

Get fields from a JSON file with Python

Pandas dataframe and conversion to Json

Parsing and creating nested dictionaries

nested json file with dictionary into a dataframe

Categories

Resources