Adding values to a nested dictionary in python - python

I have the following dictionary
d1 = {
"Completely Agree": {
"child": {
"Male": {
"child": {
"Greater than 54": {
"child": {},
"value": 4,
"label": "Greater than 54"
},
"Between 35 to 39": {
"child": {},
"value": 1,
"label": "Between 35 to 39"
}
},
"value": 5,
"label": "Male"
}
},
"value": 5,
"label": "Completely Agree"
},
"Somewhat Agree": {
"child": {
"Male": {
"child": {
"Greater than 54": {
"child": {},
"value": 1,
"label": "Greater than 54"
},
"Between 45 to 49": {
"child": {},
"value": 2,
"label": "Between 45 to 49"
},
"Between 25 to 29": {
"child": {},
"value": 1,
"label": "Between 25 to 29"
},
"Between 35 to 39": {
"child": {},
"value": 1,
"label": "Between 35 to 39"
},
"Between 50 to 54": {
"child": {},
"value": 3,
"label": "Between 50 to 54"
},
"Between 40 to 44": {
"child": {},
"value": 1,
"label": "Between 40 to 44"
}
},
"value": 9,
"label": "Male"
},
"Female": {
"child": {
"Between 25 to 29": {
"child": {},
"value": 2,
"label": "Between 25 to 29"
},
"Between 30 to 34": {
"child": {},
"value": 1,
"label": "Between 30 to 34"
},
"Greater than 54": {
"child": {},
"value": 1,
"label": "Greater than 54"
}
},
"value": 4,
"label": "Female"
}
},
"value": 13,
"label": "Somewhat Agree"
},
"Neither Agree nor Disagree": {
"child": {
"Male": {
"child": {
"Between 25 to 29": {
"child": {},
"value": 1,
"label": "Between 25 to 29"
},
"Between 35 to 39": {
"child": {},
"value": 1,
"label": "Between 35 to 39"
},
"Between 30 to 34": {
"child": {},
"value": 1,
"label": "Between 30 to 34"
},
"Between 45 to 49": {
"child": {},
"value": 1,
"label": "Between 45 to 49"
},
"Between 50 to 54": {
"child": {},
"value": 1,
"label": "Between 50 to 54"
}
},
"value": 5,
"label": "Male"
},
"Female": {
"child": {
"Less than 25": {
"child": {},
"value": 1,
"label": "Less than 25"
}
},
"value": 1,
"label": "Female"
}
},
"value": 6,
"label": "Neither Agree nor Disagree"
}
I want to insert another key lets say 'data_recs' on the same level where child is an empty dictionary {}. So the result should be
d1 = {
"Completely Agree": {
"child": {
"Male": {
"child": {
"Greater than 54": {
"child": {},
"value": 4,
"label": "Greater than 54",
"data_recs": [1,2,3,4]
},
"Between 35 to 39": {
"child": {},
"value": 1,
"label": "Between 35 to 39",
"data_recs": [1,2,3,4]
}
},
"value": 5,
"label": "Male"
}
},
"value": 5,
"label": "Completely Agree"
},
"Somewhat Agree": {
"child": {
"Male": {
"child": {
"Greater than 54": {
"child": {},
"value": 1,
"label": "Greater than 54",
"data_recs": [1,2,3,4]
},
"Between 45 to 49": {
"child": {},
"value": 2,
"label": "Between 45 to 49"
},
"Between 25 to 29": {
"child": {},
"value": 1,
"label": "Between 25 to 29",
"data_recs": [1,2,3,4]
},
"Between 35 to 39": {
"child": {},
"value": 1,
"label": "Between 35 to 39",
"data_recs": [1,2,3,4]
},
"Between 50 to 54": {
"child": {},
"value": 3,
"label": "Between 50 to 54",
"data_recs": [1,2,3,4]
},
"Between 40 to 44": {
"child": {},
"value": 1,
"label": "Between 40 to 44",
"data_recs": [1,2,3,4]
}
},
"value": 9,
"label": "Male"
},
"Female": {
"child": {
"Between 25 to 29": {
"child": {},
"value": 2,
"label": "Between 25 to 29",
"data_recs": [1,2,3,4]
},
"Between 30 to 34": {
"child": {},
"value": 1,
"label": "Between 30 to 34",
"data_recs": [1,2,3,4]
},
"Greater than 54": {
"child": {},
"value": 1,
"label": "Greater than 54",
"data_recs": [1,2,3,4]
}
},
"value": 4,
"label": "Female"
}
},
"value": 13,
"label": "Somewhat Agree"
}
The dictionary can have n number of hierarchy. I have written the following code to implement this but I think I am missing something out here.
def parse_master_dict(data, recs_map):
for k,v in data.items():
print k, v
if v.get('child', None):
child = v['child']
if not child:
print "here", k
v['data_recs'] = recs_map.get(k, [])
else:
#if child can have further children
parse_master_dict(child, recs_map)
Please advise.

Your if v.get('child', None): statement is preventing you from proceeding to update the dict when the child dict is empty since the condition would be evaluated as False. Remove the if statement and your code should work:
def parse_master_dict(data, recs_map):
for k,v in data.items():
child = v['child']
if not child:
v['data_recs'] = recs_map.get(k, [])
else:
parse_master_dict(child, recs_map)

Related

Split Json Data by certain string values (Python)

I want to split incidents by "incidentType" values for python. It always have 5 of these values: period, injuryTime, goal, card and substitution.
Json File
{
"incidents": [
{
"text": "FT",
"homeScore": 2,
"awayScore": 1,
"isLive": false,
"time": 90,
"addedTime": 999,
"incidentType": "period"
},
{
"length": 4,
"time": 90,
"addedTime": 0,
"incidentType": "injuryTime"
},
{
"homeScore": 2,
"awayScore": 1,
"player": {
"name": "Mostafa Mohamed",
"firstName": "",
"lastName": "",
"slug": "mostafa-mohamed",
"shortName": "M. Mohamed",
"position": "F",
"userCount": 3949,
"id": 873551
},
"id": 141786584,
"time": 89,
"isHome": true,
"incidentClass": "penalty",
"incidentType": "goal"
},
{
"player": {
"name": "Duško Tošić",
"slug": "dusko-tosic",
"shortName": "D. Tošić",
"position": "D",
"userCount": 215,
"id": 14557
},
"playerName": "Duško Tošić",
"reason": "Foul",
"id": 119728583,
"time": 85,
"isHome": false,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"playerIn": {
"name": "Younès Belhanda",
"slug": "younes-belhanda",
"shortName": "Y. Belhanda",
"position": "M",
"userCount": 2165,
"id": 72999
},
"playerOut": {
"name": "Martin Linnes",
"slug": "martin-linnes",
"shortName": "M. Linnes",
"position": "D",
"userCount": 339,
"id": 109569
},
"id": 120059400,
"time": 82,
"isHome": true,
"incidentType": "substitution"
},
{
"player": {
"name": "Kevin Varga",
"slug": "kevin-varga",
"shortName": "K. Varga",
"position": "M",
"userCount": 274,
"id": 602730
},
"playerName": "Kevin Varga",
"reason": "Foul",
"id": 119728582,
"time": 82,
"isHome": false,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"playerIn": {
"name": "DeAndre Yedlin",
"slug": "deandre-yedlin",
"shortName": "D. Yedlin",
"position": "D",
"userCount": 702,
"id": 314040
},
"playerOut": {
"name": "Muhammed Kerem Aktürkoğlu",
"firstName": "",
"lastName": "",
"slug": "muhammed-kerem-akturkoglu",
"shortName": "M. K. Aktürkoğlu",
"position": "F",
"userCount": 281,
"id": 903324
},
"id": 120059399,
"time": 77,
"isHome": true,
"incidentType": "substitution"
},
{
"playerIn": {
"name": "Ryan Donk",
"slug": "ryan-donk",
"shortName": "R. Donk",
"position": "D",
"userCount": 489,
"id": 14900
},
"playerOut": {
"name": "Ryan Babel",
"slug": "ryan-babel",
"shortName": "R. Babel",
"position": "F",
"userCount": 1577,
"id": 1876
},
"id": 120059397,
"time": 72,
"isHome": true,
"incidentType": "substitution"
},
{
"playerIn": {
"name": "Emre Akbaba",
"slug": "emre-akbaba",
"shortName": "E. Akbaba",
"position": "M",
"userCount": 604,
"id": 343527
},
"playerOut": {
"name": "Gedson Fernandes",
"slug": "fernandes-gedson",
"shortName": "G. Fernandes",
"position": "M",
"userCount": 3030,
"id": 862055
},
"id": 120059396,
"time": 71,
"isHome": true,
"incidentType": "substitution"
},
{
"playerIn": {
"name": "Henry Onyekuru",
"slug": "henry-onyekuru",
"shortName": "H. Onyekuru",
"position": "M",
"userCount": 1474,
"id": 809220
},
"playerOut": {
"name": "Emre Kılınç",
"slug": "emre-kilinc",
"shortName": "E. Kılınç",
"position": "M",
"userCount": 526,
"id": 202032
},
"id": 120059398,
"time": 71,
"isHome": true,
"incidentType": "substitution"
},
{
"player": {
"name": "Haris Hajradinović",
"slug": "haris-hajradinovic",
"shortName": "H. Hajradinović",
"position": "M",
"userCount": 357,
"id": 254979
},
"playerName": "Haris Hajradinović",
"reason": "Foul",
"id": 119728581,
"time": 71,
"isHome": false,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"homeScore": 1,
"awayScore": 1,
"player": {
"name": "Isaac Kiese Thelin",
"slug": "isaac-kiese-thelin",
"shortName": "I. K. Thelin",
"position": "F",
"userCount": 386,
"id": 178743
},
"assist1": {
"name": "Haris Hajradinović",
"slug": "haris-hajradinovic",
"shortName": "H. Hajradinović",
"position": "M",
"userCount": 357,
"id": 254979
},
"id": 141786585,
"time": 51,
"isHome": false,
"incidentClass": "regular",
"incidentType": "goal"
},
{
"playerIn": {
"name": "Kevin Varga",
"slug": "kevin-varga",
"shortName": "K. Varga",
"position": "M",
"userCount": 274,
"id": 602730
},
"playerOut": {
"name": "Gilbert Koomson",
"slug": "gilbert-koomson",
"shortName": "G. Koomson",
"position": "F",
"userCount": 76,
"id": 341107
},
"id": 120059401,
"time": 46,
"isHome": false,
"incidentType": "substitution"
},
{
"text": "HT",
"homeScore": 1,
"awayScore": 0,
"isLive": false,
"time": 45,
"addedTime": 999,
"incidentType": "period"
},
{
"player": {
"name": "Isaac Kiese Thelin",
"slug": "isaac-kiese-thelin",
"shortName": "I. K. Thelin",
"position": "F",
"userCount": 386,
"id": 178743
},
"playerName": "Isaac Kiese Thelin",
"reason": "Foul",
"id": 119728580,
"time": 15,
"isHome": false,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"homeScore": 1,
"awayScore": 0,
"player": {
"name": "Muhammed Kerem Aktürkoğlu",
"firstName": "",
"lastName": "",
"slug": "muhammed-kerem-akturkoglu",
"shortName": "M. K. Aktürkoğlu",
"position": "F",
"userCount": 281,
"id": 903324
},
"id": 141786583,
"time": 9,
"isHome": true,
"incidentClass": "regular",
"incidentType": "goal"
}
]
}
ABC = {
"incidents": [
{
"text": "FT",
"homeScore": 2,
"awayScore": 1,
"isLive": False,
"time": 90,
"addedTime": 999,
"incidentType": "period"
},
{
"length": 4,
"time": 90,
"addedTime": 0,
"incidentType": "injuryTime"
},
{
"homeScore": 2,
"awayScore": 1,
"player": {
"name": "Mostafa Mohamed",
"firstName": "",
"lastName": "",
"slug": "mostafa-mohamed",
"shortName": "M. Mohamed",
"position": "F",
"userCount": 3949,
"id": 873551
},
"id": 141786584,
"time": 89,
"isHome": True,
"incidentClass": "penalty",
"incidentType": "goal"
},
{
"player": {
"name": "Duško Tošić",
"slug": "dusko-tosic",
"shortName": "D. Tošić",
"position": "D",
"userCount": 215,
"id": 14557
},
"playerName": "Duško Tošić",
"reason": "Foul",
"id": 119728583,
"time": 85,
"isHome": False,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"playerIn": {
"name": "Younès Belhanda",
"slug": "younes-belhanda",
"shortName": "Y. Belhanda",
"position": "M",
"userCount": 2165,
"id": 72999
},
"playerOut": {
"name": "Martin Linnes",
"slug": "martin-linnes",
"shortName": "M. Linnes",
"position": "D",
"userCount": 339,
"id": 109569
},
"id": 120059400,
"time": 82,
"isHome": True,
"incidentType": "substitution"
},
{
"player": {
"name": "Kevin Varga",
"slug": "kevin-varga",
"shortName": "K. Varga",
"position": "M",
"userCount": 274,
"id": 602730
},
"playerName": "Kevin Varga",
"reason": "Foul",
"id": 119728582,
"time": 82,
"isHome": False,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"playerIn": {
"name": "DeAndre Yedlin",
"slug": "deandre-yedlin",
"shortName": "D. Yedlin",
"position": "D",
"userCount": 702,
"id": 314040
},
"playerOut": {
"name": "Muhammed Kerem Aktürkoğlu",
"firstName": "",
"lastName": "",
"slug": "muhammed-kerem-akturkoglu",
"shortName": "M. K. Aktürkoğlu",
"position": "F",
"userCount": 281,
"id": 903324
},
"id": 120059399,
"time": 77,
"isHome": True,
"incidentType": "substitution"
},
{
"playerIn": {
"name": "Ryan Donk",
"slug": "ryan-donk",
"shortName": "R. Donk",
"position": "D",
"userCount": 489,
"id": 14900
},
"playerOut": {
"name": "Ryan Babel",
"slug": "ryan-babel",
"shortName": "R. Babel",
"position": "F",
"userCount": 1577,
"id": 1876
},
"id": 120059397,
"time": 72,
"isHome": True,
"incidentType": "substitution"
},
{
"playerIn": {
"name": "Emre Akbaba",
"slug": "emre-akbaba",
"shortName": "E. Akbaba",
"position": "M",
"userCount": 604,
"id": 343527
},
"playerOut": {
"name": "Gedson Fernandes",
"slug": "fernandes-gedson",
"shortName": "G. Fernandes",
"position": "M",
"userCount": 3030,
"id": 862055
},
"id": 120059396,
"time": 71,
"isHome": True,
"incidentType": "substitution"
},
{
"playerIn": {
"name": "Henry Onyekuru",
"slug": "henry-onyekuru",
"shortName": "H. Onyekuru",
"position": "M",
"userCount": 1474,
"id": 809220
},
"playerOut": {
"name": "Emre Kılınç",
"slug": "emre-kilinc",
"shortName": "E. Kılınç",
"position": "M",
"userCount": 526,
"id": 202032
},
"id": 120059398,
"time": 71,
"isHome": True,
"incidentType": "substitution"
},
{
"player": {
"name": "Haris Hajradinović",
"slug": "haris-hajradinovic",
"shortName": "H. Hajradinović",
"position": "M",
"userCount": 357,
"id": 254979
},
"playerName": "Haris Hajradinović",
"reason": "Foul",
"id": 119728581,
"time": 71,
"isHome": False,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"homeScore": 1,
"awayScore": 1,
"player": {
"name": "Isaac Kiese Thelin",
"slug": "isaac-kiese-thelin",
"shortName": "I. K. Thelin",
"position": "F",
"userCount": 386,
"id": 178743
},
"assist1": {
"name": "Haris Hajradinović",
"slug": "haris-hajradinovic",
"shortName": "H. Hajradinović",
"position": "M",
"userCount": 357,
"id": 254979
},
"id": 141786585,
"time": 51,
"isHome": False,
"incidentClass": "regular",
"incidentType": "goal"
},
{
"playerIn": {
"name": "Kevin Varga",
"slug": "kevin-varga",
"shortName": "K. Varga",
"position": "M",
"userCount": 274,
"id": 602730
},
"playerOut": {
"name": "Gilbert Koomson",
"slug": "gilbert-koomson",
"shortName": "G. Koomson",
"position": "F",
"userCount": 76,
"id": 341107
},
"id": 120059401,
"time": 46,
"isHome": False,
"incidentType": "substitution"
},
{
"text": "HT",
"homeScore": 1,
"awayScore": 0,
"isLive": False,
"time": 45,
"addedTime": 999,
"incidentType": "period"
},
{
"player": {
"name": "Isaac Kiese Thelin",
"slug": "isaac-kiese-thelin",
"shortName": "I. K. Thelin",
"position": "F",
"userCount": 386,
"id": 178743
},
"playerName": "Isaac Kiese Thelin",
"reason": "Foul",
"id": 119728580,
"time": 15,
"isHome": False,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"homeScore": 1,
"awayScore": 0,
"player": {
"name": "Muhammed Kerem Aktürkoğlu",
"firstName": "",
"lastName": "",
"slug": "muhammed-kerem-akturkoglu",
"shortName": "M. K. Aktürkoğlu",
"position": "F",
"userCount": 281,
"id": 903324
},
"id": 141786583,
"time": 9,
"isHome": True,
"incidentClass": "regular",
"incidentType": "goal"
}
]
}
First, create a dictionary to hold all distinct incidentType. Then iterate through incidents and check if whether incidentType exists in the dictionary or not. If it exists? Append. if not, create a new key : value pair
result = {}
for js in ABC["incidents"]:
icdType = js["incidentType"]
if icdType in result:
result[icdType].append(js)
else:
result[icdType] = [js]
for key,val in result.items():
print(key, ":", val, "\n")

How to map the dictionary values to another dictionary

I have dictionary which is below
{
"aggregations": {
"A": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{ "key": "ADL", "doc_count": 1 },
{ "key": "SDD", "doc_count": 1 },
{ "key": "JJD", "doc_count": 1 }
]
},
"B": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{ "key": "ABC", "doc_count": 1 },
{ "key": "CDE", "doc_count": 1 },
{ "key": "FGH", "doc_count": 1 }
]
},
"C": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{ "key": "XYX", "doc_count": 1 },
{ "key": "NXS", "doc_count": 1 }
]
}
}
}
aggregations.keys will be aggregationfilters.fieldName
aggregations.buckets.key will be aggregationfilters.values.title
aggregationfilters.values.paragraph is null everytime
aggregations.buckets.doc_count will be aggregationfilters.values.count
Basically I need to extract aggregations.keys and aggregations.bucket values and put into different dictionary.
Need to write a general code structure to do that.
I cannot do with .pop(rename) the dictioanry
My expected out
{
"aggregationfilters": [
{
"name": "ABC",
"fieldName": "A",
"values": [
{ "title": "ADL", "paragraph": null, "count": 1 },
{ "title": "SDD", "paragraph": null, "count": 1 },
{ "title": "JJD", "paragraph": null, "count": 1 }
]
}, {
"name": "CDE",
"fieldName": "B",
"values": [
{ "title": "ABC", "paragraph": null, "count": 1 },
{ "title": "CDE", "paragraph": null, "count": 1 },
{ "title": "FGH", "paragraph": null, "count": 1 }
]
}, {
"name": "FGH",
"fieldName": "C",
"values": [
{ "title": "XYX", "paragraph": null, "count": 1 },
{ "title": "NXS", "paragraph": null, "count": 1 }
]
}
]
}
Well, this works, but even with my best effort this still doesn't look that clean.
import json
source = {
"aggregations": {
"A": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{"key": "ADL", "doc_count": 1},
{"key": "SDD", "doc_count": 1},
{"key": "JJD", "doc_count": 1},
],
},
"B": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{"key": "ABC", "doc_count": 1},
{"key": "CDE", "doc_count": 1},
{"key": "FGH", "doc_count": 1},
],
},
"C": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{"key": "XYX", "doc_count": 1}, {"key": "NXS", "doc_count": 1}],
},
}
}
convert_map = {
"buckets": "values",
"doc_count": "count",
"key": "title",
}
remove_map = {"sum_other_doc_count", "doc_count_error_upper_bound"}
add_map = {"name": "Changed VAL_", "fieldName": "VAL_"}
def converting_generator(
source_: dict, convert_map_: dict, remove_map_: set, add_map_: dict
):
working_dict = {k: v for k, v in source_.items()}
variable_identifier = "VAL_"
for key, inner_dic in working_dict.items():
inner_dic: dict
for rm_key in remove_map_:
try:
inner_dic.pop(rm_key)
except KeyError:
pass
for add_key, add_val in add_map_.items():
inner_dic[add_key] = add_val.replace(variable_identifier, key)
dumped = json.dumps(inner_dic, indent=2)
for original, target in convert_map_.items():
dumped = dumped.replace(original, target)
yield json.loads(dumped)
converted = {
"aggregation_filters": list(
converting_generator(source["aggregations"], convert_map, remove_map, add_map)
)
}
for inner_dict in converted["aggregation_filters"]:
for even_inner_dict in inner_dict["values"]:
even_inner_dict["paragraph"] = None
print(json.dumps(converted, indent=2))
Output:
{
"aggregation_filters": [
{
"values": [
{
"title": "ADL",
"count": 1,
"paragraph": null
},
{
"title": "SDD",
"count": 1,
"paragraph": null
},
{
"title": "JJD",
"count": 1,
"paragraph": null
}
],
"name": "Changed A",
"fieldName": "A"
},
{
"values": [
{
"title": "ABC",
"count": 1,
"paragraph": null
},
{
"title": "CDE",
"count": 1,
"paragraph": null
},
{
"title": "FGH",
"count": 1,
"paragraph": null
}
],
"name": "Changed B",
"fieldName": "B"
},
{
"values": [
{
"title": "XYX",
"count": 1,
"paragraph": null
},
{
"title": "NXS",
"count": 1,
"paragraph": null
}
],
"name": "Changed C",
"fieldName": "C"
}
]
}
Always show your code, would be nice if that's a working one - to show that you've put at least that worth of the effort on your problem.
I don't bother it as this feels like puzzle solving, but others may not.

Manipulating data of Pandas dataframe

I'm reading a dataframe and trying to 'insert' a list inside another list and then converting it to json file. I'm using python 3 and 0.25.3 version of pandas for it.
My dataframe:
id label id_customer label_customer part_number number_client
6 Sao Paulo CUST-99992 Brazil 7897 982
6 Sao Paulo CUST-99992 Brazil 888 12
92 Hong Kong CUST-88888 China 147 288
My code:
import pandas as pd
import json
data = pd.read_excel(path)
data["part_number"] = data["part_number"].apply(lambda x: str(x))
data["number_client"] = data["number_client"].apply(lambda x: str(x))
data = data.groupby(["id", "label", "id_customer", "label_customer"], as_index=False).agg("#".join)
data["part_number"] = data["part_number"].apply(lambda x: {"part": x})
data["number_client"] = data["number_client"].apply(lambda x: {"client": x})
data["id_customer"] = data["id_customer"].apply(lambda x: {"id": x})
data["label_customer"] = data["label_customer"].apply(lambda x: {"label": x})
data["number"] = data.apply(lambda x: [{**x["part_number"], **x["number_client"]}], axis=1)
data["Customer"] = data.apply(lambda x: [{**x["id_customer"], **x["label_customer"], **data["number"]}],axis=1)
data = data[["id", "label", "Customer"]]
data.to_json(path)
Json output I'm getting:
[{
"id": 6,
"label": "Sao Paulo",
"Customer": [{
"id": "CUST-99992",
"label": "Brazil",
"0": [{
"part": "7897",
"client": "982"
}],
"1": [{
"part": "888",
"client": "12"
}],
"2": [{
"part": "147",
"client": "288"
}]
}]
}, {
"id": 6,
"label": "Sao Paulo",
"Customer": [{
"id": "CUST-99992",
"label": "Brazil",
"0": [{
"part": "7897",
"client": "982"
}],
"1": [{
"part": "888",
"client": "12"
}],
"2": [{
"part": "147",
"client": "288"
}]
}]
}, {
"id": 92,
"label": "Hong Kong",
"Customer": [{
"id": "CUST-888888",
"label": "China",
"0": [{
"part": "7897",
"client": "982"
}],
"1": [{
"part": "888",
"client": "12"
}],
"2": [{
"part": "147",
"client": "288"
}]
}]
}]
What I need:
[{
"id": 6,
"label": "Sao Paulo",
"Customer": [{
"id": "CUST-99992",
"label": "Brazil",
"number": [{
"part": "7897",
"client": "982"
},
{
"part": "888",
"client": "12"
}]
}]
},
{
"id": 92,
"label": "Hong Kong",
"Customer": [{
"id": "CUST-888888",
"label": "China",
"number": [{
"part": "147",
"client": "288"
}]
}]
}
]
Look that id and label a is group of information even as id_customer and label_customer is another group, part_number and number_client is another. Customerand number are lists and they can have a lot objects inside them (the number of objects depends of my data in my dataframe).
What am I doing wrong and how can I fix it?
Tks so much!
First cast both columns to strings and then use lambda functions with DataFrame.to_dict and rename columns names, last convert output to json by DataFrame.to_json:
data[["part_number","number_client"]] = data[["part_number","number_client"]].astype(str)
f = lambda x: x.split('_')[0]
j =(data.groupby(["id","label","id_customer","label_customer"])['part_number','number_client']
.apply(lambda x: x.rename(columns=f).to_dict('r')).reset_index(name='number')
.groupby(["id", "label"])[ "id_customer", "label_customer", "number"]
.apply(lambda x: x.rename(columns=f).to_dict('r')).reset_index(name='customer')
.to_json(orient='records'))
print (j)
[{
"id": 6,
"label": "Sao Paulo",
"customer": [{
"id": "CUST-99992",
"label": "Brazil",
"number": [{
"part": "7897",
"number": "982"
}, {
"part": "888",
"number": "12"
}]
}]
}, {
"id": 92,
"label": "Hong Kong",
"customer": [{
"id": "CUST-88888",
"label": "China",
"number": [{
"part": "147",
"number": "288"
}]
}]
}]

remove duplicates from json [duplicate]

This question already has answers here:
Removing Duplicates From Dictionary
(11 answers)
Closed 3 years ago.
I have following JSON
{
"FileResults": [
{
"FileName": "gtg.0.wav",
"FileUrl": null,
"Results": [
{
"Status": "Success",
"ChannelNumber": null,
"SpeakerId": null,
"Offset": 90200000,
"Duration": 25600000,
"NBest": [
{
"Confidence": 0.9415368,
"Lexical": "",
"ITN": "",
"MaskedITN": "",
"Display": ".",
"Sentiment": null,
"Words": [
{
"Word": "ask",
"Offset": 944400000,
"Duration": 3500000
},
{
"Word": "everybody",
"Offset": 94000000,
"Duration": 4400000
},
{
"Word": "to",
"Offset": 98400000,
"Duration": 1200000
},
{
"Word": "please",
"Offset": 99600000,
"Duration": 3000000
},
{
"Word": "take",
"Offset": 102600000,
"Duration": 2400000
},
{
"Word": "their",
"Offset": 105000000,
"Duration": 2400000
},
{
"Word": "seats",
"Offset": 107400000,
"Duration": 8200000
}
]
}
]
},
{
"Status": "Success",
"ChannelNumber": null,
"SpeakerId": null,
"Offset": 90200000,
"Duration": 25600000,
"NBest": [
{
"Confidence": 0.9415368,
"Lexical": "",
"ITN": "",
"MaskedITN": "",
"Display": ".",
"Sentiment": null,
"Words": [
{
"Word": "ask",
"Offset": 90500000,
"Duration": 3500000
},
{
"Word": "everybody",
"Offset": 94000000,
"Duration": 4400000
},
{
"Word": "to",
"Offset": 98400000,
"Duration": 1200000
},
{
"Word": "please",
"Offset": 99600000,
"Duration": 3000000
},
{
"Word": "take",
"Offset": 102600000,
"Duration": 2400000
},
{
"Word": "their",
"Offset": 105000000,
"Duration": 2400000
},
{
"Word": "seats",
"Offset": 107400000,
"Duration": 8200000
}
]
}
]
},
{
"Status": "Success",
"ChannelNumber": null,
"SpeakerId": null,
"Offset": 169400000,
"Duration": 157500000,
"NBest": [
{
"Confidence": 0.944001734,
"Lexical": "",
"ITN": "",
"MaskedITN": "",
"Display": "",
"Sentiment": null,
"Words": [
{
"Word": "welcome",
"Offset": 169700000,
"Duration": 4500000
},
{
"Word": "to",
"Offset": 174200000,
"Duration": 2600000
},
{
"Word": "the",
"Offset": 176800000,
"Duration": 8600000
},
{
"Word": "scheduled",
"Offset": 186500000,
"Duration": 7900000
},
{
"Word": "special",
"Offset": 194400000,
"Duration": 6000000
},
{
"Word": "budget",
"Offset": 200400000,
"Duration": 4400000
},
{
"Word": "hearings",
"Offset": 204800000,
"Duration": 6400000
},
{
"Word": "meeting",
"Offset": 211400000,
"Duration": 4800000
},
{
"Word": "of",
"Offset": 216200000,
"Duration": 1600000
},
{
"Word": "the",
"Offset": 217800000,
"Duration": 1300000
},
{
"Word": "los",
"Offset": 219100000,
"Duration": 2300000
},
{
"Word": "lm",
"Offset": 221400000,
"Duration": 3600000
},
{
"Word": "mk",
"Offset": 225000000,
"Duration": 5500000
},
{
"Word": "board",
"Offset": 231800000,
"Duration": 4600000
},
{
"Word": "of",
"Offset": 236400000,
"Duration": 1000000
},
{
"Word": "supervisors",
"Offset": 237400000,
"Duration": 9200000
},
{
"Word": "seems",
"Offset": 246600000,
"Duration": 3000000
},
{
"Word": "like",
"Offset": 249600000,
"Duration": 2400000
},
{
"Word": "we",
"Offset": 252000000,
"Duration": 1400000
},
{
"Word": "were",
"Offset": 253400000,
"Duration": 1600000
},
{
"Word": "just",
"Offset": 255000000,
"Duration": 3400000
},
{
"Word": "here",
"Offset": 258400000,
"Duration": 5500000
},
{
"Word": "but",
"Offset": 270200000,
"Duration": 4000000
},
{
"Word": "no",
"Offset": 274200000,
"Duration": 3000000
},
{
"Word": "it's",
"Offset": 277200000,
"Duration": 1600000
},
{
"Word": "wednesday",
"Offset": 278800000,
"Duration": 6700000
},
{
"Word": "may",
"Offset": 288600000,
"Duration": 3800000
},
{
"Word": "sixteenth",
"Offset": 292400000,
"Duration": 8800000
},
{
"Word": "full",
"Offset": 307200000,
"Duration": 4600000
},
{
"Word": "complement",
"Offset": 311800000,
"Duration": 6600000
},
{
"Word": "not",
"Offset": 318400000,
"Duration": 3000000
},
{
"Word": "quite",
"Offset": 321400000,
"Duration": 5300000
}
]
}
]
}
]
}
]
}
I would like to remove duplicates from the JSON only
For instance "Word": "ask" came twice; I would like to retain first occurrence of "Word": "ask" and remove second.
{
"Word": "welcome",
"Offset": 169700000,
"Duration": 4500000
},
I have tried various dedup techniques but nothing is helping
Here is my sample code:
import json
with open('example1.json') as json_data:
obj = json.load(json_data)
#attr = lambda x: x['hdfs:batchprocessing'][0]['application']['app_id']+x['hdfs:batchprocessing'][0]['application']['app_id']
el_set = set()
el_list = []
for el in obj:
if str(el) not in el_set:
el_set.add(str(el))
el_list.append(el)
open("updated_structure.json", "w").write(
json.dumps(el_list, sort_keys=True, indent=4, separators=(',', ': '))
)
JSON without any duplicate values for "Word"
Here ('data' is the data struct from the post)
The code removes duplicate words from 'data'
import copy
import pprint
data = {
"FileResults": [
{
"FileName": "gtg.0.wav",
"FileUrl": None,
"Results": [
{
"Status": "Success",
"ChannelNumber": None,
"SpeakerId": None,
"Offset": 90200000,
"Duration": 25600000,
"NBest": [
{
"Confidence": 0.9415368,
"Lexical": "",
"ITN": "",
"MaskedITN": "",
"Display": ".",
"Sentiment": None,
"Words": [
{
"Word": "ask",
"Offset": 944400000,
"Duration": 3500000
},
{
"Word": "everybody",
"Offset": 94000000,
"Duration": 4400000
},
{
"Word": "to",
"Offset": 98400000,
"Duration": 1200000
},
{
"Word": "please",
"Offset": 99600000,
"Duration": 3000000
},
{
"Word": "take",
"Offset": 102600000,
"Duration": 2400000
},
{
"Word": "their",
"Offset": 105000000,
"Duration": 2400000
},
{
"Word": "seats",
"Offset": 107400000,
"Duration": 8200000
}
]
}
]
},
{
"Status": "Success",
"ChannelNumber": None,
"SpeakerId": None,
"Offset": 90200000,
"Duration": 25600000,
"NBest": [
{
"Confidence": 0.9415368,
"Lexical": "",
"ITN": "",
"MaskedITN": "",
"Display": ".",
"Sentiment": None,
"Words": [
{
"Word": "ask",
"Offset": 90500000,
"Duration": 3500000
},
{
"Word": "everybody",
"Offset": 94000000,
"Duration": 4400000
},
{
"Word": "to",
"Offset": 98400000,
"Duration": 1200000
},
{
"Word": "please",
"Offset": 99600000,
"Duration": 3000000
},
{
"Word": "take",
"Offset": 102600000,
"Duration": 2400000
},
{
"Word": "their",
"Offset": 105000000,
"Duration": 2400000
},
{
"Word": "seats",
"Offset": 107400000,
"Duration": 8200000
}
]
}
]
},
{
"Status": "Success",
"ChannelNumber": None,
"SpeakerId": None,
"Offset": 169400000,
"Duration": 157500000,
"NBest": [
{
"Confidence": 0.944001734,
"Lexical": "",
"ITN": "",
"MaskedITN": "",
"Display": "",
"Sentiment": None,
"Words": [
{
"Word": "welcome",
"Offset": 169700000,
"Duration": 4500000
},
{
"Word": "to",
"Offset": 174200000,
"Duration": 2600000
},
{
"Word": "the",
"Offset": 176800000,
"Duration": 8600000
},
{
"Word": "scheduled",
"Offset": 186500000,
"Duration": 7900000
},
{
"Word": "special",
"Offset": 194400000,
"Duration": 6000000
},
{
"Word": "budget",
"Offset": 200400000,
"Duration": 4400000
},
{
"Word": "hearings",
"Offset": 204800000,
"Duration": 6400000
},
{
"Word": "meeting",
"Offset": 211400000,
"Duration": 4800000
},
{
"Word": "of",
"Offset": 216200000,
"Duration": 1600000
},
{
"Word": "the",
"Offset": 217800000,
"Duration": 1300000
},
{
"Word": "los",
"Offset": 219100000,
"Duration": 2300000
},
{
"Word": "lm",
"Offset": 221400000,
"Duration": 3600000
},
{
"Word": "mk",
"Offset": 225000000,
"Duration": 5500000
},
{
"Word": "board",
"Offset": 231800000,
"Duration": 4600000
},
{
"Word": "of",
"Offset": 236400000,
"Duration": 1000000
},
{
"Word": "supervisors",
"Offset": 237400000,
"Duration": 9200000
},
{
"Word": "seems",
"Offset": 246600000,
"Duration": 3000000
},
{
"Word": "like",
"Offset": 249600000,
"Duration": 2400000
},
{
"Word": "we",
"Offset": 252000000,
"Duration": 1400000
},
{
"Word": "were",
"Offset": 253400000,
"Duration": 1600000
},
{
"Word": "just",
"Offset": 255000000,
"Duration": 3400000
},
{
"Word": "here",
"Offset": 258400000,
"Duration": 5500000
},
{
"Word": "but",
"Offset": 270200000,
"Duration": 4000000
},
{
"Word": "no",
"Offset": 274200000,
"Duration": 3000000
},
{
"Word": "it's",
"Offset": 277200000,
"Duration": 1600000
},
{
"Word": "wednesday",
"Offset": 278800000,
"Duration": 6700000
},
{
"Word": "may",
"Offset": 288600000,
"Duration": 3800000
},
{
"Word": "sixteenth",
"Offset": 292400000,
"Duration": 8800000
},
{
"Word": "full",
"Offset": 307200000,
"Duration": 4600000
},
{
"Word": "complement",
"Offset": 311800000,
"Duration": 6600000
},
{
"Word": "not",
"Offset": 318400000,
"Duration": 3000000
},
{
"Word": "quite",
"Offset": 321400000,
"Duration": 5300000
}
]
}
]
}
]
}
]
}
words_set = set()
for entry in data['FileResults']:
for result in entry['Results']:
for nbsets_dict in result['NBest']:
clone = copy.deepcopy(nbsets_dict['Words'])
tmp = []
for idx, words in enumerate(nbsets_dict['Words']):
if words['Word'] in words_set:
print('About to remove entry: ' + words['Word'])
tmp.append(idx)
else:
words_set.add(words['Word'])
for idx in sorted(tmp,reverse=True):
del clone[idx]
nbsets_dict['Words'] = clone
pprint.pprint(data)

How to Convert a list of dicts into nested JSON in python without using pandas DataFrame

I have a list of dicts like this
[
{
"subject_id": 1,
"subject_name": "HR Sector 0",
"id": 1,
"name": "parent2",
"value": 10.6
},
{
"subject_id": 18,
"subject_name": "Test11",
"id": 1,
"name": "parent2",
"value": 12
},
{
"subject_id": 2,
"subject_name": "AG1",
"id": 2,
"name": "Customer Delivery Dpt. 1",
"value": 17
},
{
"subject_id": 3,
"subject_name": "Finance Group 2",
"id": 2,
"name": "Customer Delivery Dpt. 1",
"value": 1.5
},
{
"subject_id": 10,
"subject_name": "test",
"id": 2,
"name": "Customer Delivery Dpt. 1",
"value": 10
},
{
"subject_id": null,
"subject_name": null,
"id": 3,
"name": "Technology Team 2",
"value": null
},
{
"subject_id": 8,
"subject_name": "Group 4",
"id": 5,
"name": "Accounting Group 4",
"value": 10
},
{
"subject_id": null,
"subject_name": null,
"id": 9,
"name": "PG2",
"value": null
}
]
I want to convert it into nested JSON and ignore null values to get below result set
[
{
"id": 1,
"name": "parent2",
"subjects”: [
{”subject_id": 1,
"subject_name": "HR Sector 0",
"value": 10.6
},
{”subject_id": 18,
"subject_name": "Test11",
"value": 12
}
]
},
{
"id": 2,
"name": "Customer Delivery Dpt. 1",
"subjects”: [
{“subject_id": 2,
"subject_name": "AG1",
"value": 17
},
{“subject_id": 3,
"subject_name": "Finance Group 2",
"value": 1.5
},
{“subject_id": 10,
"subject_name": “test”,
"value": 10
}
]
},
{
"id": 3,
"name": "Technology Team 2",
"subjects”: []
},
{
"id": 5,
"name": "Accounting Group 4",
"subjects” : [
{ "subject_id": 8,
"subject_name": "Group 4",
"value": 10
}
]
},
{
"id": 9,
"name": "PG2",
"subjects”: []
}
]
import json
arr = [
{
"subject_id": 1,
"subject_name": "HR Sector 0",
"id": 1,
"name": "parent2",
"value": 10.6
},
{
"subject_id": 18,
"subject_name": "Test11",
"id": 1,
"name": "parent2",
"value": 12
},
{
"subject_id": 2,
"subject_name": "AG1",
"id": 2,
"name": "Customer Delivery Dpt. 1",
"value": 17
},
{
"subject_id": 3,
"subject_name": "Finance Group 2",
"id": 2,
"name": "Customer Delivery Dpt. 1",
"value": 1.5
},
{
"subject_id": 10,
"subject_name": "test",
"id": 2,
"name": "Customer Delivery Dpt. 1",
"value": 10
},
{
"subject_id": None,
"subject_name": None,
"id": 3,
"name": "Technology Team 2",
"value": None
},
{
"subject_id": 8,
"subject_name": "Group 4",
"id": 5,
"name": "Accounting Group 4",
"value": 10
},
{
"subject_id": None,
"subject_name": None,
"id": 9,
"name": "PG2",
"value": None
}
]
def process_arr_to_json(arr):
newArr = []
addedIds = {}
for item in arr:
if(addedIds.get(item["id"]) is None):
formatted_item = {"subjects":[]}
newArr.append(formatted_item)
addedIds[item["id"]] = {"idx": 0, "pos": len(newArr)-1} #index in the dictionary for the subject item
else:
formatted_item = newArr[addedIds[item["id"]]["pos"]]
addedIds[item["id"]]["idx"] += 1
for k,v in item.items():
if(v is not None):
if(k == "id" or k == "name"):
formatted_item[k] = v
else:
if(len(formatted_item["subjects"]) <= addedIds[item["id"]]["idx"]):
formatted_item["subjects"].append({k:v})
else:
formatted_item["subjects"][addedIds[item["id"]]["idx"]][k] = v
print(newArr)
return json.dumps(newArr)
if __name__ == "__main__":
process_arr_to_json(arr)
my solution
Please see code below to form the merged results
import json
def process_items(items):
results = {}
for item in items:
results[item['id']] = {
'id': item['id'],
'name': item['name'],
}
to_append = {}
for k in ['subject_id', 'value', 'subject_name']:
if item.get(k):
to_append[k] = item[k]
results[item['id']].setdefault('subjects', [])
if to_append:
results[item['id']]['subjects'].append(to_append)
return results
items = [
{
"subject_id": 1,
"subject_name": "HR Sector 0",
"id": 1,
"name": "parent2",
"value": 10.6
},
{
"subject_id": 18,
"subject_name": "Test11",
"id": 1,
"name": "parent2",
"value": 12
},
{
"subject_id": 2,
"subject_name": "AG1",
"id": 2,
"name": "Customer Delivery Dpt. 1",
"value": 17
},
{
"subject_id": 3,
"subject_name": "Finance Group 2",
"id": 2,
"name": "Customer Delivery Dpt. 1",
"value": 1.5
},
{
"subject_id": 10,
"subject_name": "test",
"id": 2,
"name": "Customer Delivery Dpt. 1",
"value": 10
},
{
"subject_id": None,
"subject_name": None,
"id": 3,
"name": "Technology Team 2",
"value": None
},
{
"subject_id": 8,
"subject_name": "Group 4",
"id": 5,
"name": "Accounting Group 4",
"value": 10
},
{
"subject_id": None,
"subject_name": None,
"id": 9,
"name": "PG2",
"value": None
}
]
result = process_items(items)
json.dumps(result.values()) # For python 3: json.dumps(list(results.values()))

Categories

Resources