This question already has answers here:
Removing Duplicates From Dictionary
(11 answers)
Closed 3 years ago.
I have following JSON
{
"FileResults": [
{
"FileName": "gtg.0.wav",
"FileUrl": null,
"Results": [
{
"Status": "Success",
"ChannelNumber": null,
"SpeakerId": null,
"Offset": 90200000,
"Duration": 25600000,
"NBest": [
{
"Confidence": 0.9415368,
"Lexical": "",
"ITN": "",
"MaskedITN": "",
"Display": ".",
"Sentiment": null,
"Words": [
{
"Word": "ask",
"Offset": 944400000,
"Duration": 3500000
},
{
"Word": "everybody",
"Offset": 94000000,
"Duration": 4400000
},
{
"Word": "to",
"Offset": 98400000,
"Duration": 1200000
},
{
"Word": "please",
"Offset": 99600000,
"Duration": 3000000
},
{
"Word": "take",
"Offset": 102600000,
"Duration": 2400000
},
{
"Word": "their",
"Offset": 105000000,
"Duration": 2400000
},
{
"Word": "seats",
"Offset": 107400000,
"Duration": 8200000
}
]
}
]
},
{
"Status": "Success",
"ChannelNumber": null,
"SpeakerId": null,
"Offset": 90200000,
"Duration": 25600000,
"NBest": [
{
"Confidence": 0.9415368,
"Lexical": "",
"ITN": "",
"MaskedITN": "",
"Display": ".",
"Sentiment": null,
"Words": [
{
"Word": "ask",
"Offset": 90500000,
"Duration": 3500000
},
{
"Word": "everybody",
"Offset": 94000000,
"Duration": 4400000
},
{
"Word": "to",
"Offset": 98400000,
"Duration": 1200000
},
{
"Word": "please",
"Offset": 99600000,
"Duration": 3000000
},
{
"Word": "take",
"Offset": 102600000,
"Duration": 2400000
},
{
"Word": "their",
"Offset": 105000000,
"Duration": 2400000
},
{
"Word": "seats",
"Offset": 107400000,
"Duration": 8200000
}
]
}
]
},
{
"Status": "Success",
"ChannelNumber": null,
"SpeakerId": null,
"Offset": 169400000,
"Duration": 157500000,
"NBest": [
{
"Confidence": 0.944001734,
"Lexical": "",
"ITN": "",
"MaskedITN": "",
"Display": "",
"Sentiment": null,
"Words": [
{
"Word": "welcome",
"Offset": 169700000,
"Duration": 4500000
},
{
"Word": "to",
"Offset": 174200000,
"Duration": 2600000
},
{
"Word": "the",
"Offset": 176800000,
"Duration": 8600000
},
{
"Word": "scheduled",
"Offset": 186500000,
"Duration": 7900000
},
{
"Word": "special",
"Offset": 194400000,
"Duration": 6000000
},
{
"Word": "budget",
"Offset": 200400000,
"Duration": 4400000
},
{
"Word": "hearings",
"Offset": 204800000,
"Duration": 6400000
},
{
"Word": "meeting",
"Offset": 211400000,
"Duration": 4800000
},
{
"Word": "of",
"Offset": 216200000,
"Duration": 1600000
},
{
"Word": "the",
"Offset": 217800000,
"Duration": 1300000
},
{
"Word": "los",
"Offset": 219100000,
"Duration": 2300000
},
{
"Word": "lm",
"Offset": 221400000,
"Duration": 3600000
},
{
"Word": "mk",
"Offset": 225000000,
"Duration": 5500000
},
{
"Word": "board",
"Offset": 231800000,
"Duration": 4600000
},
{
"Word": "of",
"Offset": 236400000,
"Duration": 1000000
},
{
"Word": "supervisors",
"Offset": 237400000,
"Duration": 9200000
},
{
"Word": "seems",
"Offset": 246600000,
"Duration": 3000000
},
{
"Word": "like",
"Offset": 249600000,
"Duration": 2400000
},
{
"Word": "we",
"Offset": 252000000,
"Duration": 1400000
},
{
"Word": "were",
"Offset": 253400000,
"Duration": 1600000
},
{
"Word": "just",
"Offset": 255000000,
"Duration": 3400000
},
{
"Word": "here",
"Offset": 258400000,
"Duration": 5500000
},
{
"Word": "but",
"Offset": 270200000,
"Duration": 4000000
},
{
"Word": "no",
"Offset": 274200000,
"Duration": 3000000
},
{
"Word": "it's",
"Offset": 277200000,
"Duration": 1600000
},
{
"Word": "wednesday",
"Offset": 278800000,
"Duration": 6700000
},
{
"Word": "may",
"Offset": 288600000,
"Duration": 3800000
},
{
"Word": "sixteenth",
"Offset": 292400000,
"Duration": 8800000
},
{
"Word": "full",
"Offset": 307200000,
"Duration": 4600000
},
{
"Word": "complement",
"Offset": 311800000,
"Duration": 6600000
},
{
"Word": "not",
"Offset": 318400000,
"Duration": 3000000
},
{
"Word": "quite",
"Offset": 321400000,
"Duration": 5300000
}
]
}
]
}
]
}
]
}
I would like to remove duplicates from the JSON only
For instance "Word": "ask" came twice; I would like to retain first occurrence of "Word": "ask" and remove second.
{
"Word": "welcome",
"Offset": 169700000,
"Duration": 4500000
},
I have tried various dedup techniques but nothing is helping
Here is my sample code:
import json
with open('example1.json') as json_data:
obj = json.load(json_data)
#attr = lambda x: x['hdfs:batchprocessing'][0]['application']['app_id']+x['hdfs:batchprocessing'][0]['application']['app_id']
el_set = set()
el_list = []
for el in obj:
if str(el) not in el_set:
el_set.add(str(el))
el_list.append(el)
open("updated_structure.json", "w").write(
json.dumps(el_list, sort_keys=True, indent=4, separators=(',', ': '))
)
JSON without any duplicate values for "Word"
Here ('data' is the data struct from the post)
The code removes duplicate words from 'data'
import copy
import pprint
data = {
"FileResults": [
{
"FileName": "gtg.0.wav",
"FileUrl": None,
"Results": [
{
"Status": "Success",
"ChannelNumber": None,
"SpeakerId": None,
"Offset": 90200000,
"Duration": 25600000,
"NBest": [
{
"Confidence": 0.9415368,
"Lexical": "",
"ITN": "",
"MaskedITN": "",
"Display": ".",
"Sentiment": None,
"Words": [
{
"Word": "ask",
"Offset": 944400000,
"Duration": 3500000
},
{
"Word": "everybody",
"Offset": 94000000,
"Duration": 4400000
},
{
"Word": "to",
"Offset": 98400000,
"Duration": 1200000
},
{
"Word": "please",
"Offset": 99600000,
"Duration": 3000000
},
{
"Word": "take",
"Offset": 102600000,
"Duration": 2400000
},
{
"Word": "their",
"Offset": 105000000,
"Duration": 2400000
},
{
"Word": "seats",
"Offset": 107400000,
"Duration": 8200000
}
]
}
]
},
{
"Status": "Success",
"ChannelNumber": None,
"SpeakerId": None,
"Offset": 90200000,
"Duration": 25600000,
"NBest": [
{
"Confidence": 0.9415368,
"Lexical": "",
"ITN": "",
"MaskedITN": "",
"Display": ".",
"Sentiment": None,
"Words": [
{
"Word": "ask",
"Offset": 90500000,
"Duration": 3500000
},
{
"Word": "everybody",
"Offset": 94000000,
"Duration": 4400000
},
{
"Word": "to",
"Offset": 98400000,
"Duration": 1200000
},
{
"Word": "please",
"Offset": 99600000,
"Duration": 3000000
},
{
"Word": "take",
"Offset": 102600000,
"Duration": 2400000
},
{
"Word": "their",
"Offset": 105000000,
"Duration": 2400000
},
{
"Word": "seats",
"Offset": 107400000,
"Duration": 8200000
}
]
}
]
},
{
"Status": "Success",
"ChannelNumber": None,
"SpeakerId": None,
"Offset": 169400000,
"Duration": 157500000,
"NBest": [
{
"Confidence": 0.944001734,
"Lexical": "",
"ITN": "",
"MaskedITN": "",
"Display": "",
"Sentiment": None,
"Words": [
{
"Word": "welcome",
"Offset": 169700000,
"Duration": 4500000
},
{
"Word": "to",
"Offset": 174200000,
"Duration": 2600000
},
{
"Word": "the",
"Offset": 176800000,
"Duration": 8600000
},
{
"Word": "scheduled",
"Offset": 186500000,
"Duration": 7900000
},
{
"Word": "special",
"Offset": 194400000,
"Duration": 6000000
},
{
"Word": "budget",
"Offset": 200400000,
"Duration": 4400000
},
{
"Word": "hearings",
"Offset": 204800000,
"Duration": 6400000
},
{
"Word": "meeting",
"Offset": 211400000,
"Duration": 4800000
},
{
"Word": "of",
"Offset": 216200000,
"Duration": 1600000
},
{
"Word": "the",
"Offset": 217800000,
"Duration": 1300000
},
{
"Word": "los",
"Offset": 219100000,
"Duration": 2300000
},
{
"Word": "lm",
"Offset": 221400000,
"Duration": 3600000
},
{
"Word": "mk",
"Offset": 225000000,
"Duration": 5500000
},
{
"Word": "board",
"Offset": 231800000,
"Duration": 4600000
},
{
"Word": "of",
"Offset": 236400000,
"Duration": 1000000
},
{
"Word": "supervisors",
"Offset": 237400000,
"Duration": 9200000
},
{
"Word": "seems",
"Offset": 246600000,
"Duration": 3000000
},
{
"Word": "like",
"Offset": 249600000,
"Duration": 2400000
},
{
"Word": "we",
"Offset": 252000000,
"Duration": 1400000
},
{
"Word": "were",
"Offset": 253400000,
"Duration": 1600000
},
{
"Word": "just",
"Offset": 255000000,
"Duration": 3400000
},
{
"Word": "here",
"Offset": 258400000,
"Duration": 5500000
},
{
"Word": "but",
"Offset": 270200000,
"Duration": 4000000
},
{
"Word": "no",
"Offset": 274200000,
"Duration": 3000000
},
{
"Word": "it's",
"Offset": 277200000,
"Duration": 1600000
},
{
"Word": "wednesday",
"Offset": 278800000,
"Duration": 6700000
},
{
"Word": "may",
"Offset": 288600000,
"Duration": 3800000
},
{
"Word": "sixteenth",
"Offset": 292400000,
"Duration": 8800000
},
{
"Word": "full",
"Offset": 307200000,
"Duration": 4600000
},
{
"Word": "complement",
"Offset": 311800000,
"Duration": 6600000
},
{
"Word": "not",
"Offset": 318400000,
"Duration": 3000000
},
{
"Word": "quite",
"Offset": 321400000,
"Duration": 5300000
}
]
}
]
}
]
}
]
}
words_set = set()
for entry in data['FileResults']:
for result in entry['Results']:
for nbsets_dict in result['NBest']:
clone = copy.deepcopy(nbsets_dict['Words'])
tmp = []
for idx, words in enumerate(nbsets_dict['Words']):
if words['Word'] in words_set:
print('About to remove entry: ' + words['Word'])
tmp.append(idx)
else:
words_set.add(words['Word'])
for idx in sorted(tmp,reverse=True):
del clone[idx]
nbsets_dict['Words'] = clone
pprint.pprint(data)
Related
I am trying to organize a json response from a URL into a panda dataframe but I am having issues getting at the nested data.
import requests
import json
import numpy as np
from pandas import json_normalize
series = 'f1'
season = 2022
ssnround = '1'
laps = 3
url = "http://ergast.com/api/f1/2011/5/laps/1.json"
record_path = ['Races']
meta = ['driverId', 'position', 'time']
r = requests.get(url = url)
data = json.loads(r.content)
df = pd.json_normalize(data)
df
I am trying to create a table of all the driverIds, their Position and their lap time. However, whenever I use a record_path for example df = pd.json_normalize(data, record_path, meta) I get a Key Error. What am I missing?
The json data looks like this at the URL:
{
"MRData": {
"xmlns": "http://ergast.com/mrd/1.5",
"series": "f1",
"url": "http://ergast.com/api/f1/2011/5/laps/1.json",
"limit": "30",
"offset": "0",
"total": "24",
"RaceTable": {
"season": "2011",
"round": "5",
"Races": [
{
"season": "2011",
"round": "5",
"url": "http://en.wikipedia.org/wiki/2011_Spanish_Grand_Prix",
"raceName": "Spanish Grand Prix",
"Circuit": {
"circuitId": "catalunya",
"url": "http://en.wikipedia.org/wiki/Circuit_de_Barcelona-Catalunya",
"circuitName": "Circuit de Barcelona-Catalunya",
"Location": {
"lat": "41.57",
"long": "2.26111",
"locality": "Montmeló",
"country": "Spain"
}
},
"date": "2011-05-22",
"time": "12:00:00Z",
"Laps": [
{
"number": "1",
"Timings": [
{
"driverId": "alonso",
"position": "1",
"time": "1:34.494"
},
{
"driverId": "vettel",
"position": "2",
"time": "1:35.274"
},
{
"driverId": "webber",
"position": "3",
"time": "1:36.329"
},
{
"driverId": "hamilton",
"position": "4",
"time": "1:36.991"
},
{
"driverId": "petrov",
"position": "5",
"time": "1:38.084"
},
{
"driverId": "michael_schumacher",
"position": "6",
"time": "1:38.633"
},
{
"driverId": "rosberg",
"position": "7",
"time": "1:39.139"
},
{
"driverId": "massa",
"position": "8",
"time": "1:39.979"
},
{
"driverId": "buemi",
"position": "9",
"time": "1:40.611"
},
{
"driverId": "button",
"position": "10",
"time": "1:40.998"
},
{
"driverId": "perez",
"position": "11",
"time": "1:41.433"
},
{
"driverId": "alguersuari",
"position": "12",
"time": "1:41.876"
},
{
"driverId": "maldonado",
"position": "13",
"time": "1:42.255"
},
{
"driverId": "resta",
"position": "14",
"time": "1:42.808"
},
{
"driverId": "trulli",
"position": "15",
"time": "1:43.553"
},
{
"driverId": "kovalainen",
"position": "16",
"time": "1:44.276"
},
{
"driverId": "heidfeld",
"position": "17",
"time": "1:45.164"
},
{
"driverId": "sutil",
"position": "18",
"time": "1:46.107"
},
{
"driverId": "liuzzi",
"position": "19",
"time": "1:46.737"
},
{
"driverId": "barrichello",
"position": "20",
"time": "1:47.077"
},
{
"driverId": "glock",
"position": "21",
"time": "1:47.556"
},
{
"driverId": "karthikeyan",
"position": "22",
"time": "1:48.183"
},
{
"driverId": "ambrosio",
"position": "23",
"time": "1:48.573"
},
{
"driverId": "kobayashi",
"position": "24",
"time": "1:57.590"
}
]
}
]
}
]
}
}
}
Try to construct dataframe without .json_normalize:
import requests
import pandas as pd
url = "http://ergast.com/api/f1/2011/5/laps/1.json"
r = requests.get(url=url)
df = pd.DataFrame(
r.json()["MRData"]["RaceTable"]["Races"][0]["Laps"][0]["Timings"]
)
print(df)
Prints:
driverId position time
0 alonso 1 1:34.494
1 vettel 2 1:35.274
2 webber 3 1:36.329
3 hamilton 4 1:36.991
4 petrov 5 1:38.084
5 michael_schumacher 6 1:38.633
6 rosberg 7 1:39.139
7 massa 8 1:39.979
8 buemi 9 1:40.611
9 button 10 1:40.998
10 perez 11 1:41.433
11 alguersuari 12 1:41.876
12 maldonado 13 1:42.255
13 resta 14 1:42.808
14 trulli 15 1:43.553
15 kovalainen 16 1:44.276
16 heidfeld 17 1:45.164
17 sutil 18 1:46.107
18 liuzzi 19 1:46.737
19 barrichello 20 1:47.077
20 glock 21 1:47.556
21 karthikeyan 22 1:48.183
22 ambrosio 23 1:48.573
23 kobayashi 24 1:57.590
I want to split incidents by "incidentType" values for python. It always have 5 of these values: period, injuryTime, goal, card and substitution.
Json File
{
"incidents": [
{
"text": "FT",
"homeScore": 2,
"awayScore": 1,
"isLive": false,
"time": 90,
"addedTime": 999,
"incidentType": "period"
},
{
"length": 4,
"time": 90,
"addedTime": 0,
"incidentType": "injuryTime"
},
{
"homeScore": 2,
"awayScore": 1,
"player": {
"name": "Mostafa Mohamed",
"firstName": "",
"lastName": "",
"slug": "mostafa-mohamed",
"shortName": "M. Mohamed",
"position": "F",
"userCount": 3949,
"id": 873551
},
"id": 141786584,
"time": 89,
"isHome": true,
"incidentClass": "penalty",
"incidentType": "goal"
},
{
"player": {
"name": "Duško Tošić",
"slug": "dusko-tosic",
"shortName": "D. Tošić",
"position": "D",
"userCount": 215,
"id": 14557
},
"playerName": "Duško Tošić",
"reason": "Foul",
"id": 119728583,
"time": 85,
"isHome": false,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"playerIn": {
"name": "Younès Belhanda",
"slug": "younes-belhanda",
"shortName": "Y. Belhanda",
"position": "M",
"userCount": 2165,
"id": 72999
},
"playerOut": {
"name": "Martin Linnes",
"slug": "martin-linnes",
"shortName": "M. Linnes",
"position": "D",
"userCount": 339,
"id": 109569
},
"id": 120059400,
"time": 82,
"isHome": true,
"incidentType": "substitution"
},
{
"player": {
"name": "Kevin Varga",
"slug": "kevin-varga",
"shortName": "K. Varga",
"position": "M",
"userCount": 274,
"id": 602730
},
"playerName": "Kevin Varga",
"reason": "Foul",
"id": 119728582,
"time": 82,
"isHome": false,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"playerIn": {
"name": "DeAndre Yedlin",
"slug": "deandre-yedlin",
"shortName": "D. Yedlin",
"position": "D",
"userCount": 702,
"id": 314040
},
"playerOut": {
"name": "Muhammed Kerem Aktürkoğlu",
"firstName": "",
"lastName": "",
"slug": "muhammed-kerem-akturkoglu",
"shortName": "M. K. Aktürkoğlu",
"position": "F",
"userCount": 281,
"id": 903324
},
"id": 120059399,
"time": 77,
"isHome": true,
"incidentType": "substitution"
},
{
"playerIn": {
"name": "Ryan Donk",
"slug": "ryan-donk",
"shortName": "R. Donk",
"position": "D",
"userCount": 489,
"id": 14900
},
"playerOut": {
"name": "Ryan Babel",
"slug": "ryan-babel",
"shortName": "R. Babel",
"position": "F",
"userCount": 1577,
"id": 1876
},
"id": 120059397,
"time": 72,
"isHome": true,
"incidentType": "substitution"
},
{
"playerIn": {
"name": "Emre Akbaba",
"slug": "emre-akbaba",
"shortName": "E. Akbaba",
"position": "M",
"userCount": 604,
"id": 343527
},
"playerOut": {
"name": "Gedson Fernandes",
"slug": "fernandes-gedson",
"shortName": "G. Fernandes",
"position": "M",
"userCount": 3030,
"id": 862055
},
"id": 120059396,
"time": 71,
"isHome": true,
"incidentType": "substitution"
},
{
"playerIn": {
"name": "Henry Onyekuru",
"slug": "henry-onyekuru",
"shortName": "H. Onyekuru",
"position": "M",
"userCount": 1474,
"id": 809220
},
"playerOut": {
"name": "Emre Kılınç",
"slug": "emre-kilinc",
"shortName": "E. Kılınç",
"position": "M",
"userCount": 526,
"id": 202032
},
"id": 120059398,
"time": 71,
"isHome": true,
"incidentType": "substitution"
},
{
"player": {
"name": "Haris Hajradinović",
"slug": "haris-hajradinovic",
"shortName": "H. Hajradinović",
"position": "M",
"userCount": 357,
"id": 254979
},
"playerName": "Haris Hajradinović",
"reason": "Foul",
"id": 119728581,
"time": 71,
"isHome": false,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"homeScore": 1,
"awayScore": 1,
"player": {
"name": "Isaac Kiese Thelin",
"slug": "isaac-kiese-thelin",
"shortName": "I. K. Thelin",
"position": "F",
"userCount": 386,
"id": 178743
},
"assist1": {
"name": "Haris Hajradinović",
"slug": "haris-hajradinovic",
"shortName": "H. Hajradinović",
"position": "M",
"userCount": 357,
"id": 254979
},
"id": 141786585,
"time": 51,
"isHome": false,
"incidentClass": "regular",
"incidentType": "goal"
},
{
"playerIn": {
"name": "Kevin Varga",
"slug": "kevin-varga",
"shortName": "K. Varga",
"position": "M",
"userCount": 274,
"id": 602730
},
"playerOut": {
"name": "Gilbert Koomson",
"slug": "gilbert-koomson",
"shortName": "G. Koomson",
"position": "F",
"userCount": 76,
"id": 341107
},
"id": 120059401,
"time": 46,
"isHome": false,
"incidentType": "substitution"
},
{
"text": "HT",
"homeScore": 1,
"awayScore": 0,
"isLive": false,
"time": 45,
"addedTime": 999,
"incidentType": "period"
},
{
"player": {
"name": "Isaac Kiese Thelin",
"slug": "isaac-kiese-thelin",
"shortName": "I. K. Thelin",
"position": "F",
"userCount": 386,
"id": 178743
},
"playerName": "Isaac Kiese Thelin",
"reason": "Foul",
"id": 119728580,
"time": 15,
"isHome": false,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"homeScore": 1,
"awayScore": 0,
"player": {
"name": "Muhammed Kerem Aktürkoğlu",
"firstName": "",
"lastName": "",
"slug": "muhammed-kerem-akturkoglu",
"shortName": "M. K. Aktürkoğlu",
"position": "F",
"userCount": 281,
"id": 903324
},
"id": 141786583,
"time": 9,
"isHome": true,
"incidentClass": "regular",
"incidentType": "goal"
}
]
}
ABC = {
"incidents": [
{
"text": "FT",
"homeScore": 2,
"awayScore": 1,
"isLive": False,
"time": 90,
"addedTime": 999,
"incidentType": "period"
},
{
"length": 4,
"time": 90,
"addedTime": 0,
"incidentType": "injuryTime"
},
{
"homeScore": 2,
"awayScore": 1,
"player": {
"name": "Mostafa Mohamed",
"firstName": "",
"lastName": "",
"slug": "mostafa-mohamed",
"shortName": "M. Mohamed",
"position": "F",
"userCount": 3949,
"id": 873551
},
"id": 141786584,
"time": 89,
"isHome": True,
"incidentClass": "penalty",
"incidentType": "goal"
},
{
"player": {
"name": "Duško Tošić",
"slug": "dusko-tosic",
"shortName": "D. Tošić",
"position": "D",
"userCount": 215,
"id": 14557
},
"playerName": "Duško Tošić",
"reason": "Foul",
"id": 119728583,
"time": 85,
"isHome": False,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"playerIn": {
"name": "Younès Belhanda",
"slug": "younes-belhanda",
"shortName": "Y. Belhanda",
"position": "M",
"userCount": 2165,
"id": 72999
},
"playerOut": {
"name": "Martin Linnes",
"slug": "martin-linnes",
"shortName": "M. Linnes",
"position": "D",
"userCount": 339,
"id": 109569
},
"id": 120059400,
"time": 82,
"isHome": True,
"incidentType": "substitution"
},
{
"player": {
"name": "Kevin Varga",
"slug": "kevin-varga",
"shortName": "K. Varga",
"position": "M",
"userCount": 274,
"id": 602730
},
"playerName": "Kevin Varga",
"reason": "Foul",
"id": 119728582,
"time": 82,
"isHome": False,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"playerIn": {
"name": "DeAndre Yedlin",
"slug": "deandre-yedlin",
"shortName": "D. Yedlin",
"position": "D",
"userCount": 702,
"id": 314040
},
"playerOut": {
"name": "Muhammed Kerem Aktürkoğlu",
"firstName": "",
"lastName": "",
"slug": "muhammed-kerem-akturkoglu",
"shortName": "M. K. Aktürkoğlu",
"position": "F",
"userCount": 281,
"id": 903324
},
"id": 120059399,
"time": 77,
"isHome": True,
"incidentType": "substitution"
},
{
"playerIn": {
"name": "Ryan Donk",
"slug": "ryan-donk",
"shortName": "R. Donk",
"position": "D",
"userCount": 489,
"id": 14900
},
"playerOut": {
"name": "Ryan Babel",
"slug": "ryan-babel",
"shortName": "R. Babel",
"position": "F",
"userCount": 1577,
"id": 1876
},
"id": 120059397,
"time": 72,
"isHome": True,
"incidentType": "substitution"
},
{
"playerIn": {
"name": "Emre Akbaba",
"slug": "emre-akbaba",
"shortName": "E. Akbaba",
"position": "M",
"userCount": 604,
"id": 343527
},
"playerOut": {
"name": "Gedson Fernandes",
"slug": "fernandes-gedson",
"shortName": "G. Fernandes",
"position": "M",
"userCount": 3030,
"id": 862055
},
"id": 120059396,
"time": 71,
"isHome": True,
"incidentType": "substitution"
},
{
"playerIn": {
"name": "Henry Onyekuru",
"slug": "henry-onyekuru",
"shortName": "H. Onyekuru",
"position": "M",
"userCount": 1474,
"id": 809220
},
"playerOut": {
"name": "Emre Kılınç",
"slug": "emre-kilinc",
"shortName": "E. Kılınç",
"position": "M",
"userCount": 526,
"id": 202032
},
"id": 120059398,
"time": 71,
"isHome": True,
"incidentType": "substitution"
},
{
"player": {
"name": "Haris Hajradinović",
"slug": "haris-hajradinovic",
"shortName": "H. Hajradinović",
"position": "M",
"userCount": 357,
"id": 254979
},
"playerName": "Haris Hajradinović",
"reason": "Foul",
"id": 119728581,
"time": 71,
"isHome": False,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"homeScore": 1,
"awayScore": 1,
"player": {
"name": "Isaac Kiese Thelin",
"slug": "isaac-kiese-thelin",
"shortName": "I. K. Thelin",
"position": "F",
"userCount": 386,
"id": 178743
},
"assist1": {
"name": "Haris Hajradinović",
"slug": "haris-hajradinovic",
"shortName": "H. Hajradinović",
"position": "M",
"userCount": 357,
"id": 254979
},
"id": 141786585,
"time": 51,
"isHome": False,
"incidentClass": "regular",
"incidentType": "goal"
},
{
"playerIn": {
"name": "Kevin Varga",
"slug": "kevin-varga",
"shortName": "K. Varga",
"position": "M",
"userCount": 274,
"id": 602730
},
"playerOut": {
"name": "Gilbert Koomson",
"slug": "gilbert-koomson",
"shortName": "G. Koomson",
"position": "F",
"userCount": 76,
"id": 341107
},
"id": 120059401,
"time": 46,
"isHome": False,
"incidentType": "substitution"
},
{
"text": "HT",
"homeScore": 1,
"awayScore": 0,
"isLive": False,
"time": 45,
"addedTime": 999,
"incidentType": "period"
},
{
"player": {
"name": "Isaac Kiese Thelin",
"slug": "isaac-kiese-thelin",
"shortName": "I. K. Thelin",
"position": "F",
"userCount": 386,
"id": 178743
},
"playerName": "Isaac Kiese Thelin",
"reason": "Foul",
"id": 119728580,
"time": 15,
"isHome": False,
"incidentClass": "yellow",
"incidentType": "card"
},
{
"homeScore": 1,
"awayScore": 0,
"player": {
"name": "Muhammed Kerem Aktürkoğlu",
"firstName": "",
"lastName": "",
"slug": "muhammed-kerem-akturkoglu",
"shortName": "M. K. Aktürkoğlu",
"position": "F",
"userCount": 281,
"id": 903324
},
"id": 141786583,
"time": 9,
"isHome": True,
"incidentClass": "regular",
"incidentType": "goal"
}
]
}
First, create a dictionary to hold all distinct incidentType. Then iterate through incidents and check if whether incidentType exists in the dictionary or not. If it exists? Append. if not, create a new key : value pair
result = {}
for js in ABC["incidents"]:
icdType = js["incidentType"]
if icdType in result:
result[icdType].append(js)
else:
result[icdType] = [js]
for key,val in result.items():
print(key, ":", val, "\n")
I have dictionary which is below
{
"aggregations": {
"A": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{ "key": "ADL", "doc_count": 1 },
{ "key": "SDD", "doc_count": 1 },
{ "key": "JJD", "doc_count": 1 }
]
},
"B": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{ "key": "ABC", "doc_count": 1 },
{ "key": "CDE", "doc_count": 1 },
{ "key": "FGH", "doc_count": 1 }
]
},
"C": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{ "key": "XYX", "doc_count": 1 },
{ "key": "NXS", "doc_count": 1 }
]
}
}
}
aggregations.keys will be aggregationfilters.fieldName
aggregations.buckets.key will be aggregationfilters.values.title
aggregationfilters.values.paragraph is null everytime
aggregations.buckets.doc_count will be aggregationfilters.values.count
Basically I need to extract aggregations.keys and aggregations.bucket values and put into different dictionary.
Need to write a general code structure to do that.
I cannot do with .pop(rename) the dictioanry
My expected out
{
"aggregationfilters": [
{
"name": "ABC",
"fieldName": "A",
"values": [
{ "title": "ADL", "paragraph": null, "count": 1 },
{ "title": "SDD", "paragraph": null, "count": 1 },
{ "title": "JJD", "paragraph": null, "count": 1 }
]
}, {
"name": "CDE",
"fieldName": "B",
"values": [
{ "title": "ABC", "paragraph": null, "count": 1 },
{ "title": "CDE", "paragraph": null, "count": 1 },
{ "title": "FGH", "paragraph": null, "count": 1 }
]
}, {
"name": "FGH",
"fieldName": "C",
"values": [
{ "title": "XYX", "paragraph": null, "count": 1 },
{ "title": "NXS", "paragraph": null, "count": 1 }
]
}
]
}
Well, this works, but even with my best effort this still doesn't look that clean.
import json
source = {
"aggregations": {
"A": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{"key": "ADL", "doc_count": 1},
{"key": "SDD", "doc_count": 1},
{"key": "JJD", "doc_count": 1},
],
},
"B": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{"key": "ABC", "doc_count": 1},
{"key": "CDE", "doc_count": 1},
{"key": "FGH", "doc_count": 1},
],
},
"C": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{"key": "XYX", "doc_count": 1}, {"key": "NXS", "doc_count": 1}],
},
}
}
convert_map = {
"buckets": "values",
"doc_count": "count",
"key": "title",
}
remove_map = {"sum_other_doc_count", "doc_count_error_upper_bound"}
add_map = {"name": "Changed VAL_", "fieldName": "VAL_"}
def converting_generator(
source_: dict, convert_map_: dict, remove_map_: set, add_map_: dict
):
working_dict = {k: v for k, v in source_.items()}
variable_identifier = "VAL_"
for key, inner_dic in working_dict.items():
inner_dic: dict
for rm_key in remove_map_:
try:
inner_dic.pop(rm_key)
except KeyError:
pass
for add_key, add_val in add_map_.items():
inner_dic[add_key] = add_val.replace(variable_identifier, key)
dumped = json.dumps(inner_dic, indent=2)
for original, target in convert_map_.items():
dumped = dumped.replace(original, target)
yield json.loads(dumped)
converted = {
"aggregation_filters": list(
converting_generator(source["aggregations"], convert_map, remove_map, add_map)
)
}
for inner_dict in converted["aggregation_filters"]:
for even_inner_dict in inner_dict["values"]:
even_inner_dict["paragraph"] = None
print(json.dumps(converted, indent=2))
Output:
{
"aggregation_filters": [
{
"values": [
{
"title": "ADL",
"count": 1,
"paragraph": null
},
{
"title": "SDD",
"count": 1,
"paragraph": null
},
{
"title": "JJD",
"count": 1,
"paragraph": null
}
],
"name": "Changed A",
"fieldName": "A"
},
{
"values": [
{
"title": "ABC",
"count": 1,
"paragraph": null
},
{
"title": "CDE",
"count": 1,
"paragraph": null
},
{
"title": "FGH",
"count": 1,
"paragraph": null
}
],
"name": "Changed B",
"fieldName": "B"
},
{
"values": [
{
"title": "XYX",
"count": 1,
"paragraph": null
},
{
"title": "NXS",
"count": 1,
"paragraph": null
}
],
"name": "Changed C",
"fieldName": "C"
}
]
}
Always show your code, would be nice if that's a working one - to show that you've put at least that worth of the effort on your problem.
I don't bother it as this feels like puzzle solving, but others may not.
I am trying to write a CSV file from JSON returned from an API call.
I am using pandas, json and requests libraries.
response = requests.get("valid_api_call")
My response.json() looks like this:
"request": {
"Target": "Affiliate_Report",
"Format": "json",
"Service": "service",
"Version": "2",
"api_key": "key",
"Method": "getStats",
"fields": [
"Stat.offer_id",
"Stat.offer_file_id",
"Stat.hour",
"Stat.date",
"Stat.affiliate_info5",
"Stat.affiliate_info4",
"Stat.affiliate_info3",
"Stat.affiliate_info1",
"Offer.name"
],
"limit": "10",
"data_start": "2019-11-11",
"data_end": "2019-11-18"
},
"response": {
"status": 1,
"httpStatus": 200,
"data": {
"page": 1,
"current": 10,
"count": 23500,
"pageCount": 2350,
"data": [
{
"Stat": {
"offer_id": "18",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-11",
"affiliate_info5": "editor_pick",
"affiliate_info4": "www.google.com",
"affiliate_info3": "1",
"affiliate_info1": "Other (unique values)"
},
"Offer": {
"name": "Special"
}
},
{
"Stat": {
"offer_id": "18",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-11",
"affiliate_info5": "popup",
"affiliate_info4": "www.google.com",
"affiliate_info3": "6",
"affiliate_info1": "Other (unique values)"
},
"Offer": {
"name": "Special"
}
},
{
"Stat": {
"offer_id": "18",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-11",
"affiliate_info5": "popup",
"affiliate_info4": "www.google.com",
"affiliate_info3": "1",
"affiliate_info1": "Other (unique values)"
},
"Offer": {
"name": "Special"
}
},
{
"Stat": {
"offer_id": "18",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-11",
"affiliate_info5": "popup",
"affiliate_info4": "www.google.com",
"affiliate_info3": "1",
"affiliate_info1": "Other (unique values)"
},
"Offer": {
"name": "Special"
}
},
{
"Stat": {
"offer_id": "18",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-11",
"affiliate_info5": "popup",
"affiliate_info4": "www.google.com",
"affiliate_info3": "6",
"affiliate_info1": "Other (unique values)"
},
"Offer": {
"name": "Special"
}
},
{
"Stat": {
"offer_id": "18",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-11",
"affiliate_info5": "popup",
"affiliate_info4": "www.google.com",
"affiliate_info3": "6",
"affiliate_info1": "Other (unique values)"
},
"Offer": {
"name": "Special"
}
},
{
"Stat": {
"offer_id": "18",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-11",
"affiliate_info5": "popup_below_2nd",
"affiliate_info4": "Other (unique values)",
"affiliate_info3": "1",
"affiliate_info1": "Other (unique values)"
},
"Offer": {
"name": "Special"
}
},
{
"Stat": {
"offer_id": "18",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-11",
"affiliate_info5": "popup_below_2nd",
"affiliate_info4": "www.google.com",
"affiliate_info3": "1",
"affiliate_info1": "Other (unique values)"
},
"Offer": {
"name": "Special"
}
},
{
"Stat": {
"offer_id": "18",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-11",
"affiliate_info5": "popup_below_2nd",
"affiliate_info4": "www.google.com",
"affiliate_info3": "6",
"affiliate_info1": "Other (unique values)"
},
"Offer": {
"name": "Special"
}
},
{
"Stat": {
"offer_id": "18",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-11",
"affiliate_info5": "popup_below_2nd",
"affiliate_info4": "www.google.com",
"affiliate_info3": "6",
"affiliate_info1": "Other (unique values)"
},
"Offer": {
"name": "Special"
}
}
],
"dbSource": "branddb"
},
"errors": [],
"errorMessage": null
}
}
I am trying to write the output to a CSV file where the columns would be the field :
Stat.offer_id,
Stat.offer_file_id,
Stat.hour,
Stat.date,
Stat.affiliate_info5,
Stat.affiliate_info4,
Stat.affiliate_info3,
Stat.affiliate_info1,
Offer.name
I haven't worked with JSON that much, not sure how would I iterate the file and write ite to CSV with python. The way I am going to write the file will probably look like this:
f = csv.writer(open(response,'w'))
f.writerow(keylist)
I do not think that csv.writer accepts response as a file which is the json returned by an API call.
I know there is a method with pandas to convert JSON to CSV
df.to_csv('output.csv', encoding='utf-8', index=False)
So my question would be what would be the smartest way of achieving CSV file as an output when the input is JSON returned by API call which may have pagination?
Thank you for the suggestions.
EDIT:
Second API response follows the same structure but throws an error - string indices must be integers.
{
"request": {
"Target": "Affiliate_Report",
"Format": "json",
"Service": "service",
"Version": "2",
"api_key": "key",
"Method": "getStats",
"fields": [
"Stat.offer_id",
"Stat.offer_file_id",
"Stat.hour",
"Stat.date",
"Stat.affiliate_info5",
"Stat.affiliate_info4",
"Stat.affiliate_info3",
"Stat.affiliate_info1",
"Offer.name"
],
"limit": "10",
"data_start": "2019-11-20",
"data_end": "2019-11-20"
},
"response": {
"status": 1,
"httpStatus": 200,
"data": {
"page": 1,
"current": 10,
"count": 4037,
"pageCount": 404,
"data": [
{
"Stat": {
"offer_id": "20",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-20",
"affiliate_info5": "editor_pick",
"affiliate_info4": "www.google.com",
"affiliate_info3": "info",
"affiliate_info1": "1"
},
"Offer": {
"name": "60"
}
},
{
"Stat": {
"offer_id": "20",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-20",
"affiliate_info5": "editor_pick",
"affiliate_info4": "www.google.com",
"affiliate_info3": "info",
"affiliate_info1": "1"
},
"Offer": {
"name": "60"
}
},
{
"Stat": {
"offer_id": "20",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-20",
"affiliate_info5": "editor_pick",
"affiliate_info4": "www.google.com",
"affiliate_info3": "info",
"affiliate_info1": "1"
},
"Offer": {
"name": "60"
}
},
{
"Stat": {
"offer_id": "20",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-20",
"affiliate_info5": "editor_pick",
"affiliate_info4": "www.google.com",
"affiliate_info3": "info",
"affiliate_info1": "1"
},
"Offer": {
"name": "60"
}
},
{
"Stat": {
"offer_id": "20",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-20",
"affiliate_info5": "editor_pick",
"affiliate_info4": "www.google.com",
"affiliate_info3": "info",
"affiliate_info1": "1"
},
"Offer": {
"name": "60"
}
},
{
"Stat": {
"offer_id": "20",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-20",
"affiliate_info5": "editor_pick",
"affiliate_info4": "www.google.com",
"affiliate_info3": "info",
"affiliate_info1": "1"
},
"Offer": {
"name": "60"
}
},
{
"Stat": {
"offer_id": "20",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-20",
"affiliate_info5": "editor_pick",
"affiliate_info4": "www.google.com",
"affiliate_info3": "info",
"affiliate_info1": "1"
},
"Offer": {
"name": "60"
}
},
{
"Stat": {
"offer_id": "20",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-20",
"affiliate_info5": "editor_pick",
"affiliate_info4": "www.google.com",
"affiliate_info3": "info",
"affiliate_info1": "1"
},
"Offer": {
"name": "60"
}
},
{
"Stat": {
"offer_id": "20",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-20",
"affiliate_info5": "editor_pick",
"affiliate_info4": "www.google.com",
"affiliate_info3": "info",
"affiliate_info1": "1"
},
"Offer": {
"name": "60"
}
},
{
"Stat": {
"offer_id": "20",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-20",
"affiliate_info5": "editor_pick",
"affiliate_info4": "www.google.com",
"affiliate_info3": "info",
"affiliate_info1": "1"
},
"Offer": {
"name": "60"
}
}
],
"dbSource": "branddb"
},
"errors": [],
"errorMessage": null
}
}
Code used
response_2 = requests.get(JSON)
response_json_2 = response_2.json()
df_2 = pd.io.json.json_normalize(response_json_2['response']['data']['data'])
df_2.to_csv(f'{from_date} - 2.csv', encoding='utf-8', index=False)
In this case, try using pandas.io.json.json_normalize like:
response_json = reponse.json()
# Or try this if it still doesn't work
# import json
# response_json = json.loads(response.json())
df = pd.io.json.json_normalize(response_json['response']['data']['data'])
Should return the DataFrame
Stat.offer_id Stat.offer_file_id Stat.hour Stat.date Stat.affiliate_info5 \
0 18 0 0 2019-11-11 editor_pick
1 18 0 0 2019-11-11 popup
2 18 0 0 2019-11-11 popup
3 18 0 0 2019-11-11 popup
4 18 0 0 2019-11-11 popup
5 18 0 0 2019-11-11 popup
6 18 0 0 2019-11-11 popup_below_2nd
7 18 0 0 2019-11-11 popup_below_2nd
8 18 0 0 2019-11-11 popup_below_2nd
9 18 0 0 2019-11-11 popup_below_2nd
Stat.affiliate_info4 Stat.affiliate_info3 Stat.affiliate_info1 \
0 www.google.com 1 Other (unique values)
1 www.google.com 6 Other (unique values)
2 www.google.com 1 Other (unique values)
3 www.google.com 1 Other (unique values)
4 www.google.com 6 Other (unique values)
5 www.google.com 6 Other (unique values)
6 Other (unique values) 1 Other (unique values)
7 www.google.com 1 Other (unique values)
8 www.google.com 6 Other (unique values)
9 www.google.com 6 Other (unique values)
Offer.name
0 Special
1 Special
2 Special
3 Special
4 Special
5 Special
6 Special
7 Special
8 Special
9 Special
Then DataFrame.to_csv
df.to_csv('output.csv', encoding='utf-8', index=False)
I have the following dictionary
d1 = {
"Completely Agree": {
"child": {
"Male": {
"child": {
"Greater than 54": {
"child": {},
"value": 4,
"label": "Greater than 54"
},
"Between 35 to 39": {
"child": {},
"value": 1,
"label": "Between 35 to 39"
}
},
"value": 5,
"label": "Male"
}
},
"value": 5,
"label": "Completely Agree"
},
"Somewhat Agree": {
"child": {
"Male": {
"child": {
"Greater than 54": {
"child": {},
"value": 1,
"label": "Greater than 54"
},
"Between 45 to 49": {
"child": {},
"value": 2,
"label": "Between 45 to 49"
},
"Between 25 to 29": {
"child": {},
"value": 1,
"label": "Between 25 to 29"
},
"Between 35 to 39": {
"child": {},
"value": 1,
"label": "Between 35 to 39"
},
"Between 50 to 54": {
"child": {},
"value": 3,
"label": "Between 50 to 54"
},
"Between 40 to 44": {
"child": {},
"value": 1,
"label": "Between 40 to 44"
}
},
"value": 9,
"label": "Male"
},
"Female": {
"child": {
"Between 25 to 29": {
"child": {},
"value": 2,
"label": "Between 25 to 29"
},
"Between 30 to 34": {
"child": {},
"value": 1,
"label": "Between 30 to 34"
},
"Greater than 54": {
"child": {},
"value": 1,
"label": "Greater than 54"
}
},
"value": 4,
"label": "Female"
}
},
"value": 13,
"label": "Somewhat Agree"
},
"Neither Agree nor Disagree": {
"child": {
"Male": {
"child": {
"Between 25 to 29": {
"child": {},
"value": 1,
"label": "Between 25 to 29"
},
"Between 35 to 39": {
"child": {},
"value": 1,
"label": "Between 35 to 39"
},
"Between 30 to 34": {
"child": {},
"value": 1,
"label": "Between 30 to 34"
},
"Between 45 to 49": {
"child": {},
"value": 1,
"label": "Between 45 to 49"
},
"Between 50 to 54": {
"child": {},
"value": 1,
"label": "Between 50 to 54"
}
},
"value": 5,
"label": "Male"
},
"Female": {
"child": {
"Less than 25": {
"child": {},
"value": 1,
"label": "Less than 25"
}
},
"value": 1,
"label": "Female"
}
},
"value": 6,
"label": "Neither Agree nor Disagree"
}
I want to insert another key lets say 'data_recs' on the same level where child is an empty dictionary {}. So the result should be
d1 = {
"Completely Agree": {
"child": {
"Male": {
"child": {
"Greater than 54": {
"child": {},
"value": 4,
"label": "Greater than 54",
"data_recs": [1,2,3,4]
},
"Between 35 to 39": {
"child": {},
"value": 1,
"label": "Between 35 to 39",
"data_recs": [1,2,3,4]
}
},
"value": 5,
"label": "Male"
}
},
"value": 5,
"label": "Completely Agree"
},
"Somewhat Agree": {
"child": {
"Male": {
"child": {
"Greater than 54": {
"child": {},
"value": 1,
"label": "Greater than 54",
"data_recs": [1,2,3,4]
},
"Between 45 to 49": {
"child": {},
"value": 2,
"label": "Between 45 to 49"
},
"Between 25 to 29": {
"child": {},
"value": 1,
"label": "Between 25 to 29",
"data_recs": [1,2,3,4]
},
"Between 35 to 39": {
"child": {},
"value": 1,
"label": "Between 35 to 39",
"data_recs": [1,2,3,4]
},
"Between 50 to 54": {
"child": {},
"value": 3,
"label": "Between 50 to 54",
"data_recs": [1,2,3,4]
},
"Between 40 to 44": {
"child": {},
"value": 1,
"label": "Between 40 to 44",
"data_recs": [1,2,3,4]
}
},
"value": 9,
"label": "Male"
},
"Female": {
"child": {
"Between 25 to 29": {
"child": {},
"value": 2,
"label": "Between 25 to 29",
"data_recs": [1,2,3,4]
},
"Between 30 to 34": {
"child": {},
"value": 1,
"label": "Between 30 to 34",
"data_recs": [1,2,3,4]
},
"Greater than 54": {
"child": {},
"value": 1,
"label": "Greater than 54",
"data_recs": [1,2,3,4]
}
},
"value": 4,
"label": "Female"
}
},
"value": 13,
"label": "Somewhat Agree"
}
The dictionary can have n number of hierarchy. I have written the following code to implement this but I think I am missing something out here.
def parse_master_dict(data, recs_map):
for k,v in data.items():
print k, v
if v.get('child', None):
child = v['child']
if not child:
print "here", k
v['data_recs'] = recs_map.get(k, [])
else:
#if child can have further children
parse_master_dict(child, recs_map)
Please advise.
Your if v.get('child', None): statement is preventing you from proceeding to update the dict when the child dict is empty since the condition would be evaluated as False. Remove the if statement and your code should work:
def parse_master_dict(data, recs_map):
for k,v in data.items():
child = v['child']
if not child:
v['data_recs'] = recs_map.get(k, [])
else:
parse_master_dict(child, recs_map)