Manipulating data of Pandas dataframe - python

I'm reading a dataframe and trying to 'insert' a list inside another list and then converting it to json file. I'm using python 3 and 0.25.3 version of pandas for it.
My dataframe:
id label id_customer label_customer part_number number_client
6 Sao Paulo CUST-99992 Brazil 7897 982
6 Sao Paulo CUST-99992 Brazil 888 12
92 Hong Kong CUST-88888 China 147 288
My code:
import pandas as pd
import json
data = pd.read_excel(path)
data["part_number"] = data["part_number"].apply(lambda x: str(x))
data["number_client"] = data["number_client"].apply(lambda x: str(x))
data = data.groupby(["id", "label", "id_customer", "label_customer"], as_index=False).agg("#".join)
data["part_number"] = data["part_number"].apply(lambda x: {"part": x})
data["number_client"] = data["number_client"].apply(lambda x: {"client": x})
data["id_customer"] = data["id_customer"].apply(lambda x: {"id": x})
data["label_customer"] = data["label_customer"].apply(lambda x: {"label": x})
data["number"] = data.apply(lambda x: [{**x["part_number"], **x["number_client"]}], axis=1)
data["Customer"] = data.apply(lambda x: [{**x["id_customer"], **x["label_customer"], **data["number"]}],axis=1)
data = data[["id", "label", "Customer"]]
data.to_json(path)
Json output I'm getting:
[{
"id": 6,
"label": "Sao Paulo",
"Customer": [{
"id": "CUST-99992",
"label": "Brazil",
"0": [{
"part": "7897",
"client": "982"
}],
"1": [{
"part": "888",
"client": "12"
}],
"2": [{
"part": "147",
"client": "288"
}]
}]
}, {
"id": 6,
"label": "Sao Paulo",
"Customer": [{
"id": "CUST-99992",
"label": "Brazil",
"0": [{
"part": "7897",
"client": "982"
}],
"1": [{
"part": "888",
"client": "12"
}],
"2": [{
"part": "147",
"client": "288"
}]
}]
}, {
"id": 92,
"label": "Hong Kong",
"Customer": [{
"id": "CUST-888888",
"label": "China",
"0": [{
"part": "7897",
"client": "982"
}],
"1": [{
"part": "888",
"client": "12"
}],
"2": [{
"part": "147",
"client": "288"
}]
}]
}]
What I need:
[{
"id": 6,
"label": "Sao Paulo",
"Customer": [{
"id": "CUST-99992",
"label": "Brazil",
"number": [{
"part": "7897",
"client": "982"
},
{
"part": "888",
"client": "12"
}]
}]
},
{
"id": 92,
"label": "Hong Kong",
"Customer": [{
"id": "CUST-888888",
"label": "China",
"number": [{
"part": "147",
"client": "288"
}]
}]
}
]
Look that id and label a is group of information even as id_customer and label_customer is another group, part_number and number_client is another. Customerand number are lists and they can have a lot objects inside them (the number of objects depends of my data in my dataframe).
What am I doing wrong and how can I fix it?
Tks so much!

First cast both columns to strings and then use lambda functions with DataFrame.to_dict and rename columns names, last convert output to json by DataFrame.to_json:
data[["part_number","number_client"]] = data[["part_number","number_client"]].astype(str)
f = lambda x: x.split('_')[0]
j =(data.groupby(["id","label","id_customer","label_customer"])['part_number','number_client']
.apply(lambda x: x.rename(columns=f).to_dict('r')).reset_index(name='number')
.groupby(["id", "label"])[ "id_customer", "label_customer", "number"]
.apply(lambda x: x.rename(columns=f).to_dict('r')).reset_index(name='customer')
.to_json(orient='records'))
print (j)
[{
"id": 6,
"label": "Sao Paulo",
"customer": [{
"id": "CUST-99992",
"label": "Brazil",
"number": [{
"part": "7897",
"number": "982"
}, {
"part": "888",
"number": "12"
}]
}]
}, {
"id": 92,
"label": "Hong Kong",
"customer": [{
"id": "CUST-88888",
"label": "China",
"number": [{
"part": "147",
"number": "288"
}]
}]
}]

Related

Split Json Data by certain string values (Python)

I want to split incidents by "incidentType" values for python. It always have 5 of these values: period, injuryTime, goal, card and substitution.
Json File
{
"incidents": [
{
"text": "FT",
"homeScore": 2,
"awayScore": 1,
"isLive": false,
"time": 90,
"addedTime": 999,
"incidentType": "period"
},
{
"length": 4,
"time": 90,
"addedTime": 0,
"incidentType": "injuryTime"
},
{
"homeScore": 2,
"awayScore": 1,
"player": {
"name": "Mostafa Mohamed",
"firstName": "",
"lastName": "",
"slug": "mostafa-mohamed",
"shortName": "M. Mohamed",
"position": "F",
"userCount": 3949,
"id": 873551
},
"id": 141786584,
"time": 89,
"isHome": true,
"incidentClass": "penalty",
"incidentType": "goal"
},
{
"player": {
"name": "Duško Tošić",
"slug": "dusko-tosic",
"shortName": "D. Tošić",
"position": "D",
"userCount": 215,
"id": 14557
},
"playerName": "Duško Tošić",
"reason": "Foul",
"id": 119728583,
"time": 85,
"isHome": false,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"playerIn": {
"name": "Younès Belhanda",
"slug": "younes-belhanda",
"shortName": "Y. Belhanda",
"position": "M",
"userCount": 2165,
"id": 72999
},
"playerOut": {
"name": "Martin Linnes",
"slug": "martin-linnes",
"shortName": "M. Linnes",
"position": "D",
"userCount": 339,
"id": 109569
},
"id": 120059400,
"time": 82,
"isHome": true,
"incidentType": "substitution"
},
{
"player": {
"name": "Kevin Varga",
"slug": "kevin-varga",
"shortName": "K. Varga",
"position": "M",
"userCount": 274,
"id": 602730
},
"playerName": "Kevin Varga",
"reason": "Foul",
"id": 119728582,
"time": 82,
"isHome": false,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"playerIn": {
"name": "DeAndre Yedlin",
"slug": "deandre-yedlin",
"shortName": "D. Yedlin",
"position": "D",
"userCount": 702,
"id": 314040
},
"playerOut": {
"name": "Muhammed Kerem Aktürkoğlu",
"firstName": "",
"lastName": "",
"slug": "muhammed-kerem-akturkoglu",
"shortName": "M. K. Aktürkoğlu",
"position": "F",
"userCount": 281,
"id": 903324
},
"id": 120059399,
"time": 77,
"isHome": true,
"incidentType": "substitution"
},
{
"playerIn": {
"name": "Ryan Donk",
"slug": "ryan-donk",
"shortName": "R. Donk",
"position": "D",
"userCount": 489,
"id": 14900
},
"playerOut": {
"name": "Ryan Babel",
"slug": "ryan-babel",
"shortName": "R. Babel",
"position": "F",
"userCount": 1577,
"id": 1876
},
"id": 120059397,
"time": 72,
"isHome": true,
"incidentType": "substitution"
},
{
"playerIn": {
"name": "Emre Akbaba",
"slug": "emre-akbaba",
"shortName": "E. Akbaba",
"position": "M",
"userCount": 604,
"id": 343527
},
"playerOut": {
"name": "Gedson Fernandes",
"slug": "fernandes-gedson",
"shortName": "G. Fernandes",
"position": "M",
"userCount": 3030,
"id": 862055
},
"id": 120059396,
"time": 71,
"isHome": true,
"incidentType": "substitution"
},
{
"playerIn": {
"name": "Henry Onyekuru",
"slug": "henry-onyekuru",
"shortName": "H. Onyekuru",
"position": "M",
"userCount": 1474,
"id": 809220
},
"playerOut": {
"name": "Emre Kılınç",
"slug": "emre-kilinc",
"shortName": "E. Kılınç",
"position": "M",
"userCount": 526,
"id": 202032
},
"id": 120059398,
"time": 71,
"isHome": true,
"incidentType": "substitution"
},
{
"player": {
"name": "Haris Hajradinović",
"slug": "haris-hajradinovic",
"shortName": "H. Hajradinović",
"position": "M",
"userCount": 357,
"id": 254979
},
"playerName": "Haris Hajradinović",
"reason": "Foul",
"id": 119728581,
"time": 71,
"isHome": false,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"homeScore": 1,
"awayScore": 1,
"player": {
"name": "Isaac Kiese Thelin",
"slug": "isaac-kiese-thelin",
"shortName": "I. K. Thelin",
"position": "F",
"userCount": 386,
"id": 178743
},
"assist1": {
"name": "Haris Hajradinović",
"slug": "haris-hajradinovic",
"shortName": "H. Hajradinović",
"position": "M",
"userCount": 357,
"id": 254979
},
"id": 141786585,
"time": 51,
"isHome": false,
"incidentClass": "regular",
"incidentType": "goal"
},
{
"playerIn": {
"name": "Kevin Varga",
"slug": "kevin-varga",
"shortName": "K. Varga",
"position": "M",
"userCount": 274,
"id": 602730
},
"playerOut": {
"name": "Gilbert Koomson",
"slug": "gilbert-koomson",
"shortName": "G. Koomson",
"position": "F",
"userCount": 76,
"id": 341107
},
"id": 120059401,
"time": 46,
"isHome": false,
"incidentType": "substitution"
},
{
"text": "HT",
"homeScore": 1,
"awayScore": 0,
"isLive": false,
"time": 45,
"addedTime": 999,
"incidentType": "period"
},
{
"player": {
"name": "Isaac Kiese Thelin",
"slug": "isaac-kiese-thelin",
"shortName": "I. K. Thelin",
"position": "F",
"userCount": 386,
"id": 178743
},
"playerName": "Isaac Kiese Thelin",
"reason": "Foul",
"id": 119728580,
"time": 15,
"isHome": false,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"homeScore": 1,
"awayScore": 0,
"player": {
"name": "Muhammed Kerem Aktürkoğlu",
"firstName": "",
"lastName": "",
"slug": "muhammed-kerem-akturkoglu",
"shortName": "M. K. Aktürkoğlu",
"position": "F",
"userCount": 281,
"id": 903324
},
"id": 141786583,
"time": 9,
"isHome": true,
"incidentClass": "regular",
"incidentType": "goal"
}
]
}
ABC = {
"incidents": [
{
"text": "FT",
"homeScore": 2,
"awayScore": 1,
"isLive": False,
"time": 90,
"addedTime": 999,
"incidentType": "period"
},
{
"length": 4,
"time": 90,
"addedTime": 0,
"incidentType": "injuryTime"
},
{
"homeScore": 2,
"awayScore": 1,
"player": {
"name": "Mostafa Mohamed",
"firstName": "",
"lastName": "",
"slug": "mostafa-mohamed",
"shortName": "M. Mohamed",
"position": "F",
"userCount": 3949,
"id": 873551
},
"id": 141786584,
"time": 89,
"isHome": True,
"incidentClass": "penalty",
"incidentType": "goal"
},
{
"player": {
"name": "Duško Tošić",
"slug": "dusko-tosic",
"shortName": "D. Tošić",
"position": "D",
"userCount": 215,
"id": 14557
},
"playerName": "Duško Tošić",
"reason": "Foul",
"id": 119728583,
"time": 85,
"isHome": False,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"playerIn": {
"name": "Younès Belhanda",
"slug": "younes-belhanda",
"shortName": "Y. Belhanda",
"position": "M",
"userCount": 2165,
"id": 72999
},
"playerOut": {
"name": "Martin Linnes",
"slug": "martin-linnes",
"shortName": "M. Linnes",
"position": "D",
"userCount": 339,
"id": 109569
},
"id": 120059400,
"time": 82,
"isHome": True,
"incidentType": "substitution"
},
{
"player": {
"name": "Kevin Varga",
"slug": "kevin-varga",
"shortName": "K. Varga",
"position": "M",
"userCount": 274,
"id": 602730
},
"playerName": "Kevin Varga",
"reason": "Foul",
"id": 119728582,
"time": 82,
"isHome": False,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"playerIn": {
"name": "DeAndre Yedlin",
"slug": "deandre-yedlin",
"shortName": "D. Yedlin",
"position": "D",
"userCount": 702,
"id": 314040
},
"playerOut": {
"name": "Muhammed Kerem Aktürkoğlu",
"firstName": "",
"lastName": "",
"slug": "muhammed-kerem-akturkoglu",
"shortName": "M. K. Aktürkoğlu",
"position": "F",
"userCount": 281,
"id": 903324
},
"id": 120059399,
"time": 77,
"isHome": True,
"incidentType": "substitution"
},
{
"playerIn": {
"name": "Ryan Donk",
"slug": "ryan-donk",
"shortName": "R. Donk",
"position": "D",
"userCount": 489,
"id": 14900
},
"playerOut": {
"name": "Ryan Babel",
"slug": "ryan-babel",
"shortName": "R. Babel",
"position": "F",
"userCount": 1577,
"id": 1876
},
"id": 120059397,
"time": 72,
"isHome": True,
"incidentType": "substitution"
},
{
"playerIn": {
"name": "Emre Akbaba",
"slug": "emre-akbaba",
"shortName": "E. Akbaba",
"position": "M",
"userCount": 604,
"id": 343527
},
"playerOut": {
"name": "Gedson Fernandes",
"slug": "fernandes-gedson",
"shortName": "G. Fernandes",
"position": "M",
"userCount": 3030,
"id": 862055
},
"id": 120059396,
"time": 71,
"isHome": True,
"incidentType": "substitution"
},
{
"playerIn": {
"name": "Henry Onyekuru",
"slug": "henry-onyekuru",
"shortName": "H. Onyekuru",
"position": "M",
"userCount": 1474,
"id": 809220
},
"playerOut": {
"name": "Emre Kılınç",
"slug": "emre-kilinc",
"shortName": "E. Kılınç",
"position": "M",
"userCount": 526,
"id": 202032
},
"id": 120059398,
"time": 71,
"isHome": True,
"incidentType": "substitution"
},
{
"player": {
"name": "Haris Hajradinović",
"slug": "haris-hajradinovic",
"shortName": "H. Hajradinović",
"position": "M",
"userCount": 357,
"id": 254979
},
"playerName": "Haris Hajradinović",
"reason": "Foul",
"id": 119728581,
"time": 71,
"isHome": False,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"homeScore": 1,
"awayScore": 1,
"player": {
"name": "Isaac Kiese Thelin",
"slug": "isaac-kiese-thelin",
"shortName": "I. K. Thelin",
"position": "F",
"userCount": 386,
"id": 178743
},
"assist1": {
"name": "Haris Hajradinović",
"slug": "haris-hajradinovic",
"shortName": "H. Hajradinović",
"position": "M",
"userCount": 357,
"id": 254979
},
"id": 141786585,
"time": 51,
"isHome": False,
"incidentClass": "regular",
"incidentType": "goal"
},
{
"playerIn": {
"name": "Kevin Varga",
"slug": "kevin-varga",
"shortName": "K. Varga",
"position": "M",
"userCount": 274,
"id": 602730
},
"playerOut": {
"name": "Gilbert Koomson",
"slug": "gilbert-koomson",
"shortName": "G. Koomson",
"position": "F",
"userCount": 76,
"id": 341107
},
"id": 120059401,
"time": 46,
"isHome": False,
"incidentType": "substitution"
},
{
"text": "HT",
"homeScore": 1,
"awayScore": 0,
"isLive": False,
"time": 45,
"addedTime": 999,
"incidentType": "period"
},
{
"player": {
"name": "Isaac Kiese Thelin",
"slug": "isaac-kiese-thelin",
"shortName": "I. K. Thelin",
"position": "F",
"userCount": 386,
"id": 178743
},
"playerName": "Isaac Kiese Thelin",
"reason": "Foul",
"id": 119728580,
"time": 15,
"isHome": False,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"homeScore": 1,
"awayScore": 0,
"player": {
"name": "Muhammed Kerem Aktürkoğlu",
"firstName": "",
"lastName": "",
"slug": "muhammed-kerem-akturkoglu",
"shortName": "M. K. Aktürkoğlu",
"position": "F",
"userCount": 281,
"id": 903324
},
"id": 141786583,
"time": 9,
"isHome": True,
"incidentClass": "regular",
"incidentType": "goal"
}
]
}
First, create a dictionary to hold all distinct incidentType. Then iterate through incidents and check if whether incidentType exists in the dictionary or not. If it exists? Append. if not, create a new key : value pair
result = {}
for js in ABC["incidents"]:
icdType = js["incidentType"]
if icdType in result:
result[icdType].append(js)
else:
result[icdType] = [js]
for key,val in result.items():
print(key, ":", val, "\n")

How to map the dictionary values to another dictionary

I have dictionary which is below
{
"aggregations": {
"A": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{ "key": "ADL", "doc_count": 1 },
{ "key": "SDD", "doc_count": 1 },
{ "key": "JJD", "doc_count": 1 }
]
},
"B": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{ "key": "ABC", "doc_count": 1 },
{ "key": "CDE", "doc_count": 1 },
{ "key": "FGH", "doc_count": 1 }
]
},
"C": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{ "key": "XYX", "doc_count": 1 },
{ "key": "NXS", "doc_count": 1 }
]
}
}
}
aggregations.keys will be aggregationfilters.fieldName
aggregations.buckets.key will be aggregationfilters.values.title
aggregationfilters.values.paragraph is null everytime
aggregations.buckets.doc_count will be aggregationfilters.values.count
Basically I need to extract aggregations.keys and aggregations.bucket values and put into different dictionary.
Need to write a general code structure to do that.
I cannot do with .pop(rename) the dictioanry
My expected out
{
"aggregationfilters": [
{
"name": "ABC",
"fieldName": "A",
"values": [
{ "title": "ADL", "paragraph": null, "count": 1 },
{ "title": "SDD", "paragraph": null, "count": 1 },
{ "title": "JJD", "paragraph": null, "count": 1 }
]
}, {
"name": "CDE",
"fieldName": "B",
"values": [
{ "title": "ABC", "paragraph": null, "count": 1 },
{ "title": "CDE", "paragraph": null, "count": 1 },
{ "title": "FGH", "paragraph": null, "count": 1 }
]
}, {
"name": "FGH",
"fieldName": "C",
"values": [
{ "title": "XYX", "paragraph": null, "count": 1 },
{ "title": "NXS", "paragraph": null, "count": 1 }
]
}
]
}
Well, this works, but even with my best effort this still doesn't look that clean.
import json
source = {
"aggregations": {
"A": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{"key": "ADL", "doc_count": 1},
{"key": "SDD", "doc_count": 1},
{"key": "JJD", "doc_count": 1},
],
},
"B": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{"key": "ABC", "doc_count": 1},
{"key": "CDE", "doc_count": 1},
{"key": "FGH", "doc_count": 1},
],
},
"C": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{"key": "XYX", "doc_count": 1}, {"key": "NXS", "doc_count": 1}],
},
}
}
convert_map = {
"buckets": "values",
"doc_count": "count",
"key": "title",
}
remove_map = {"sum_other_doc_count", "doc_count_error_upper_bound"}
add_map = {"name": "Changed VAL_", "fieldName": "VAL_"}
def converting_generator(
source_: dict, convert_map_: dict, remove_map_: set, add_map_: dict
):
working_dict = {k: v for k, v in source_.items()}
variable_identifier = "VAL_"
for key, inner_dic in working_dict.items():
inner_dic: dict
for rm_key in remove_map_:
try:
inner_dic.pop(rm_key)
except KeyError:
pass
for add_key, add_val in add_map_.items():
inner_dic[add_key] = add_val.replace(variable_identifier, key)
dumped = json.dumps(inner_dic, indent=2)
for original, target in convert_map_.items():
dumped = dumped.replace(original, target)
yield json.loads(dumped)
converted = {
"aggregation_filters": list(
converting_generator(source["aggregations"], convert_map, remove_map, add_map)
)
}
for inner_dict in converted["aggregation_filters"]:
for even_inner_dict in inner_dict["values"]:
even_inner_dict["paragraph"] = None
print(json.dumps(converted, indent=2))
Output:
{
"aggregation_filters": [
{
"values": [
{
"title": "ADL",
"count": 1,
"paragraph": null
},
{
"title": "SDD",
"count": 1,
"paragraph": null
},
{
"title": "JJD",
"count": 1,
"paragraph": null
}
],
"name": "Changed A",
"fieldName": "A"
},
{
"values": [
{
"title": "ABC",
"count": 1,
"paragraph": null
},
{
"title": "CDE",
"count": 1,
"paragraph": null
},
{
"title": "FGH",
"count": 1,
"paragraph": null
}
],
"name": "Changed B",
"fieldName": "B"
},
{
"values": [
{
"title": "XYX",
"count": 1,
"paragraph": null
},
{
"title": "NXS",
"count": 1,
"paragraph": null
}
],
"name": "Changed C",
"fieldName": "C"
}
]
}
Always show your code, would be nice if that's a working one - to show that you've put at least that worth of the effort on your problem.
I don't bother it as this feels like puzzle solving, but others may not.

Pandas dataframe and conversion to Json

Basically I´m reading a pandas dataframe and converting it to Json. I´m a beginner in coding, but I know that is preferable to use apply function instead iterrows (and I already tried to use apply function, but some difficulties in understand the syntax and find out my solution arose)!!
===============================
Data that I´m reading from excel
id label id_customer label_customer part_number number_customer product label_product key country value_product
6 Sao Paulo CUST-99992 Brazil 982 10 sho1564 shoes SH-99 Chile 1.5
6 Sao Paulo CUST-99992 Brazil 982 10 sn47282 sneakers SN-71 Germany 43.8
6 Sao Paulo CUST-43535 Argentina 435 15 sk84393 skirt SK-11 Netherlands 87.1
92 Hong Hong CUST-88888 China 785 58 ca40349 cap CA-82 Russia 3.95
===============================
CODE:
import pandas as pd
import json
df = pd.read_excel(path)
result = []
for labels, df1 in df.groupby(['id', 'label'],sort=False):
id_, label = labels
record = {'id': int(id_), 'label': label, 'Customer': []}
for inner_labels, df2 in df1.groupby(['id_customer', 'label_customer'],sort=False):
id_,label = inner_labels
record['Customer'].append({
'id': id_,
'label': label,
'Number': [{'part': str(p), 'number_customer': str(s)} for p, s in zip(df2['part_number'], df2['number_customer'])]
})
result.append(record)
===============================
Json I'm getting:
[
{
"id": 6,
"label": "Sao Paulo",
"Customer": [
{
"id": "CUST-99992",
"label": "Brazil",
"Number": [
{
"part": "982",
"number_customer": "10"
},
{
"part": "982",
"number_customer": "10"
}
]
},
{
"id": "CUST-43535",
"label": "Argentina",
"Number": [
{
"part": "435",
"number_customer": "15"
}
]
}
]
},
{
"id": 92,
"label": "Hong Kong",
"Customer": [
{
"id": "CUST-88888",
"label": "China",
"Number": [
{
"part": "785",
"number_customer": "58"
}
]
}
]
}
]
===============================
Json expected:
[
{
"id": 6,
"label": "Sao Paulo",
"Customer": [
{
"id": "CUST-99992",
"label": "Brazil",
"Number": [
{
"part": "982",
"number_customer": "10",
"Procucts": [
{
"product": "sho1564",
"label_product": "shoes",
"Order": [
{
"key": "SH-99",
"country": "Chile",
"value_product": "1.5"
}
]
},
{
"product": "sn47282",
"label_product": "sneakers",
"Order": [
{
"key": "SN-71",
"country": "Germany",
"value_product": "43.8"
}
]
}
]
}
]
},
{
"id": "CUST-43535",
"label": "Argentina",
"Number": [
{
"part": "435",
"number_customer": "15",
"Procucts": [
{
"product": "sk84393",
"label_product": "skirt",
"Order": [
{
"key": "SK-11",
"country": "Netherlands",
"value_product": "87.1"
}
]
}
]
}
]
}
]
},
{
"id": 92,
"label": "Hong Kong",
"Customer": [
{
"id": "CUST-88888",
"label": "China",
"Number": [
{
"part": "785",
"number_customer": "58",
"Procucts": [
{
"product": "ca40349",
"label_product": "cap",
"Order": [
{
"key": "CA-82",
"country": "Russia",
"value_product": "3.95"
}
]
}
]
}
]
}
]
}
]
===============================
Look that id and label is group of information even as id_customer and label customer is another group, part_number and number_customer is another, product and label_product another, key, country and value_product another.
My expected Json depends of my information inside my dataframe.
Can somebody help me in any way pls?
import pandas as pd
import json
df = pd.read_excel(path)
result = []
for labels, df1 in df.groupby(['id', 'label'], sort=False):
id_, label = labels
record = {'id': int(id_), 'label': label, 'Customer': []}
for inner_labels, df2 in df1.groupby(['id_customer', 'label_customer'], sort=False):
id_, label = inner_labels
customer = {'id': id_, 'label': label, 'Number': []}
for inner_labels, df3 in df2.groupby(['part_number', 'number_customer'], sort=False):
p, s = inner_labels
number = {'part': str(p), 'number_customer': str(s), 'Products': []}
for inner_labels, df4 in df3.groupby(['product', 'label_product'], sort=False):
p, lp = inner_labels
product = {'product': p, 'label_product': lp, 'Order': []}
for k, c, v in zip(df4['key'], df4['country'], df4['value_product']):
product['Order'].append({'key': k, 'country': c, 'value_product': v})
number['Products'].append(product)
customer['Number'].append(number)
record['Customer'].append(customer)
result.append(record)
Hope this is of use!
from io import StringIO
import pandas as pd
import json
csv = """id,label,id_customer,label_customer,part_number,number_customer,product,label_product,key,country,value_product
6,Sao Paulo,CUST-99992,Brazil,982,10,sho1564,shoes,SH-99,Chile,1.5
6,Sao Paulo,CUST-99992,Brazil,982,10,sn47282,sneakers,SN-71,Germany,43.8
6,Sao Paulo,CUST-43535,Argentina,435,15,sk84393,skirt,SK-11,Netherlands,87.1
92,Hong Hong,CUST-88888,China,785,58,ca40349,cap,CA-82,Russia,3.95"""
csv = StringIO(csv)
df = pd.read_csv(csv)
def split(df, groupby, json_func):
for x, group in df.groupby(groupby):
yield json_func(group, *x)
a = list(split(df, ['id', 'label'], lambda grp, id_, label: {"id": id_, "label": label, "Customer": list(
split(grp, ['id_customer', 'label_customer'], lambda grp_1, id_cust, label_cust: {"id": id_cust, "label": label_cust, "Number": list(
split(grp_1, ['part_number', 'number_customer'], lambda grp_2, part, num_cust: {"part": part, "number_customer": num_cust, "Products": list(
split(grp_2, ['product', 'label_product'], lambda grp_3, product, label_product: {"product": product, "label_product": label_product, "Order": list(
split(grp_3, ['key', 'country', 'value_product'], lambda _, key, country, value_product: {"key": key, "country": country, "value_product": value_product}))}
))})
)}))}))
display(a)

Parsing and creating nested dictionaries

I would like to create a dictionary containing a nested structure of dictionaries, like bellow :
{
"Jaque": {
"ES": {
"Madrid": [
{
"experience": 9
}
]
},
"FR": {
"Lyon": [
{
"experience": 11.4
}
],
"Paris": [
{
"experience": 20
}
]
}
},
"James": {
"UK": {
"London": [
{
"experience": 10.9
}
]
}
},
"Henry": {
"UK": {
"London": [
{
"experience": 15
}
]
}
},
"Joe": {
"US": {
"Boston": [
{
"experience": 100
}
]
}
}
}
}
My input is a list of dictionaries of this format:
c = [{
"country": "US",
"city": "Boston",
"name": "Joe",
"experience": 100
},
{
"country": "FR",
"city": "Paris",
"name": "Jaque",
"experience": 20
},
{
"country": "FR",
"city": "Lyon",
"name": "Jaque",
"experience": 11.4
},
{
"country": "ES",
"city": "Madrid",
"name": "Jaque",
"experience": 9
},
{
"country": "UK",
"city": "London",
"name": "Henry",
"experience": 15
},
{
"country": "UK",
"city": "London",
"name": "James",
"experience": 10.9
}
]
My first approach was to create the nested dict, step by step:
dd = dict.fromkeys([i.get("name") for i in c],defaultdict(dict))
#will create
# dd = {'Joe': defaultdict(<class 'dict'>, {}), 'Jaque': defaultdict(<class 'dict'>, {}), 'James': defaultdict(<class 'dict'>, {}), 'Henry': defaultdict(<class 'dict'>, {})}
for i in dd:
for j in c:
#verify if name from d is in dict j
if i in j.values():
dd[i]=dict(zip([a.get("country") for a in c if i in a.values() ],[b.get("city") for b in c if i in b.values() ]))
# dd will become
#{'Joe': {'US': 'Boston'}, 'Jaque': {'FR': 'Lyon', 'ES': 'Madrid'}, 'Henry': {'UK': 'London'}, 'James': {'UK': 'London'}}
Now I can't figure a way to create/update the nested structure of dict dd. Is there a more dynamic way to create dict? Thx
You could use itertools.groupby to organize the list similarly to your expected output and then loop to convert to a dict.
from itertools import groupby
from operator import itemgetter
data = [{"country": "US", "city": "Boston", "name": "Joe", "experience": 100 }, {"country": "FR", "city": "Paris", "name": "Jaque", "experience": 20 }, {"country": "FR", "city": "Lyon", "name": "Jaque", "experience": 11.4 }, {"country": "ES", "city": "Madrid", "name": "Jaque", "experience": 9 }, {"country": "UK", "city": "London", "name": "Henry", "experience": 15 }, {"country": "UK", "city": "London", "name": "James", "experience": 10.9 } ]
result = {}
for key, values in groupby(sorted(data, key=itemgetter('name')), key=itemgetter('name')):
result[key] = {
v['country']: {v['city']: [{'experience': v['experience']}]} for v in values
}
print(result)
# {'Henry': {'UK': {'London': [{'experience': 15}]}}, 'James': {'UK': {'London': [{'experience': 10.9}]}}, 'Jaque': {'FR': {'Lyon': [{'experience': 11.4}]}, 'ES': {'Madrid': [{'experience': 9}]}}, 'Joe': {'US': {'Boston': [{'experience': 100}]}}}
You can use recursion with itertools.groupby:
from itertools import groupby
def group(d, keys = None):
key, *keys = keys
new_d = {a:list(b) for a, b in groupby(sorted(d, key=lambda x:x[key]), key=lambda x:x[key])}
t = {a:[{c:d for c, d in k.items() if c != key} for k in b] for a, b in new_d.items()}
return {a:group(b, keys) if not all(len(i) == 1 for i in b) else b for a, b in t.items()}
result = group(data, keys = ['name', 'country', 'city', 'experience'])
import json
print(json.dumps(result, indent=4)))
Output:
{
"Henry": {
"UK": {
"London": [
{
"experience": 15
}
]
}
},
"James": {
"UK": {
"London": [
{
"experience": 10.9
}
]
}
},
"Jaque": {
"ES": {
"Madrid": [
{
"experience": 9
}
]
},
"FR": {
"Lyon": [
{
"experience": 11.4
}
],
"Paris": [
{
"experience": 20
}
]
}
},
"Joe": {
"US": {
"Boston": [
{
"experience": 100
}
]
}
}
}

nested json file with dictionary into a dataframe

I have a json file as
{
"Date": 2017,
"count": 88,
"demographics": [
{
"key": "age",
"value": "20-30"
},
{
"key": "education",
"value": 'bachelor'
},
{
"key": "income",
"value": "70-80"
},
{
"key": "location",
"value": "USA"
}
]
}
I use the code below to convert it into csv file. It gives me a dataframe with 3 columns of Date, count, and demographics, but the output I need is a dataframe with 6 columns of Date, Count, age, education, income, and location.
with open(r'Sample.json') as f:
data = json.load(f)
dfNormalized=json_normalize(data)
By using apply with pd.Serise:
d={
"Date": 2017,
"count": 88,
"demographics": [
{
"key": "age",
"value": "20-30"
},
{
"key": "education",
"value": 'bachelor'
},
{
"key": "income",
"value": "70-80"
},
{
"key": "location",
"value": "USA"
}
]
}
df=pd.DataFrame(d)
A=(df.demographics.apply(pd.Series).T.loc['value'].to_frame().\
set_index(df.demographics.apply(pd.Series).T.loc['key']).T).\
rename_axis(None,1).reset_index(drop=True)
pd.concat([df.drop('demographics',1).loc[0].to_frame().T,A],1)
Out[273]:
Date count age education income location
0 2017 88 20-30 bachelor 70-80 USA

Categories

Resources