How to flatten complex json before converting into Dataframe - python

I am trying to convert the complex json into the pandas dataframe but i am having problem when i am converting it. I am not possibly sure how to convert the complex json into the pandas dataframe.
I want to show all the data in the dataframe this is how i am getting it and i am sharing the format of my json data so that it can be understandable
{
'1164223': {
'fullSummaryLink': '/series/19014/scorecard/1164223/new-zealand-a-vs-india-a-1st-unofficial-odi-india-a-tour-of-nz-2018-19',
'innings': {
'1': {
'batsmen': [
{
'name': 'HD Rutherford',
'href': 'http://www.espncricinfo.com/ci/content/player/331375.html',
'stats': {
'runs': {
'name': 'runs',
'text': 'RUNS',
'value': '70'
},
'ballsFaced': {
'name': 'ballsFaced',
'text': 'BF',
'value': '66'
},
'notouts': {
'name': 'notouts',
'text': 'Not Out',
'value': '0'
}
}
},
{
'name': 'JDS Neesham',
'href': 'http://www.espncricinfo.com/ci/content/player/355269.html',
'stats': {
'runs': {
'name': 'runs',
'text': 'RUNS',
'value': '79'
},
'ballsFaced': {
'name': 'ballsFaced',
'text': 'BF',
'value': '48'
},
'notouts': {
'name': 'notouts',
'text': 'Not Out',
'value': '1'
}
}
}
],
'team': {
'teamDisplayName': 'NEW ZEALAND A',
'innDisplayName': 'INNINGS',
'runs': 308,
'overs': 50,
'wickets': 6,
'description': 'complete',
'inningsRunWicket': '308/6',
'inningStatus': ''
},
'bowlers': [
{
'name': 'S Kaul',
'href': 'http://www.espncricinfo.com/ci/content/player/326017.html',
'stats': {
'overs': {
'name': 'overs',
'text': 'O',
'value': '10'
},
'conceded': {
'name': 'conceded',
'text': 'R',
'value': '74'
},
'wickets': {
'name': 'wickets',
'text': 'E',
'value': '2'
}
}
},
{
'name': 'K Gowtham',
'href': 'http://www.espncricinfo.com/ci/content/player/424377.html',
'stats': {
'overs': {
'name': 'overs',
'text': 'O',
'value': '9'
},
'conceded': {
'name': 'conceded',
'text': 'R',
'value': '46'
},
'wickets': {
'name': 'wickets',
'text': 'E',
'value': '1'
}
}
}
]
},
'2': {
'bowlers': [
{
'name': 'HK Bennett',
'href': 'http://www.espncricinfo.com/ci/content/player/226493.html',
'stats': {
'overs': {
'name': 'overs',
'text': 'O',
'value': '10'
},
'conceded': {
'name': 'conceded',
'text': 'R',
'value': '65'
},
'wickets': {
'name': 'wickets',
'text': 'E',
'value': '2'
}
}
},
{
'name': 'LH Ferguson',
'href': 'http://www.espncricinfo.com/ci/content/player/493773.html',
'stats': {
'overs': {
'name': 'overs',
'text': 'O',
'value': '10'
},
'conceded': {
'name': 'conceded',
'text': 'R',
'value': '75'
},
'wickets': {
'name': 'wickets',
'text': 'E',
'value': '2'
}
}
}
],
'batsmen': [
{
'name': 'V Shankar',
'href': 'http://www.espncricinfo.com/ci/content/player/477021.html',
'stats': {
'runs': {
'name': 'runs',
'text': 'RUNS',
'value': '87'
},
'ballsFaced': {
'name': 'ballsFaced',
'text': 'BF',
'value': '80'
},
'notouts': {
'name': 'notouts',
'text': 'Not Out',
'value': '1'
}
}
},
{
'name': 'SS Iyer',
'href': 'http://www.espncricinfo.com/ci/content/player/642519.html',
'stats': {
'runs': {
'name': 'runs',
'text': 'RUNS',
'value': '54'
},
'ballsFaced': {
'name': 'ballsFaced',
'text': 'BF',
'value': '54'
},
'notouts': {
'name': 'notouts',
'text': 'Not Out',
'value': '0'
}
}
}
],
'team': {
'teamDisplayName': 'INDIA A',
'innDisplayName': 'INNINGS',
'runs': 311,
'overs': 49,
'wickets': 6,
'description': 'target reached',
'inningsRunWicket': '311/6',
'inningStatus': ''
}
}
},
'isAvailable': True
}
}
this is my python code which converts the dictionary of json values into the pandas dataframe
df = pd.concat({k: pd.DataFrame(v) for k, v in scorecard_summary.items()})
df

Related

Creating Lists of Dictionaries with N dicts per list

Problem
I am dealing with a response object and I am trying to massage it into something which could be easily consumed by a Typescript frontend.
The object:
r_obj = [
{
'custom_fields': [
{'id': 360018501198, 'value': '5678'},
{'id': 360023508598, 'value': 'Jim'},
{'id': 1900000084913, 'value': '2021-09-03'}
]
},
{
'custom_fields': [
{'id': 360018501198, 'value': '1234'},
{'id': 360023508598, 'value': 'Bob'},
{'id': 1900000084913, 'value': '2021-09-03'}
]
}
]
Desired Structure
[
{
"order_id": '5678',
"name": "Jim",
"date": '2021-09-03'
},
{
"order_id": '1234',
"name": "Bob",
"date": '2021-09-03'
},
]
So id values in the custom_field payload are known and I want to map them to enum representation and with the hope that the output payload contain saner k,v pairs.
The Code
I have only managed to do the mapping of the enum values.
if __name__ == "__main__":
class FieldEnum(enum.IntEnum):
ORDER_ID = 360018501198
DATE = 1900000084913
NAME = 360023508598
r_obj = [
{
'custom_fields': [
{'id': 360018501198, 'value': '5678'},
{'id': 360023508598, 'value': 'Jim'},
{'id': 1900000084913, 'value': '2021-09-03'}
]
},
{
'custom_fields': [
{'id': 360018501198, 'value': '1234'},
{'id': 360023508598, 'value': 'Bob'},
{'id': 1900000084913, 'value': '2021-09-03'}
]
}
]
for row in r_obj:
for field in row['custom_fields']:
if field['id']:
field['id'] = FieldEnum(field['id']).name.lower()
print(field)
Code Output
{'id': 'order_id', 'value': '5678'}
{'id': 'name', 'value': 'Jim'}
{'id': 'date', 'value': '2021-09-03'}
{'id': 'order_id', 'value': '1234'}
{'id': 'name', 'value': 'Bob'}
{'id': 'date', 'value': '2021-09-03'}
I am struggling with how best to group(?) each set of 3 dicts into its own list structure.
EDIT
As per #balderman, I have altered to this:
if __name__ == "__main__":
class FieldEnum(enum.IntEnum):
ORDER_ID = 360018501198
DATE = 1900000084913
NAME = 360023508598
r_obj = [
{
'custom_fields': [
{'id': 360018501198, 'value': '5678'},
{'id': 360023508598, 'value': 'Jim'},
{'id': 1900000084913, 'value': '2021-09-03'}
]
},
{
'custom_fields': [
{'id': 360018501198, 'value': '1234'},
{'id': 360023508598, 'value': 'Bob'},
{'id': 1900000084913, 'value': '2021-09-03'}
]
}
]
out = list()
for entry in r_obj:
out.append({})
for f in entry['custom_fields']:
out[-1][FieldEnum(f['id']).name.lower()] = f['value']
print(out)
Code Output
[
{
'order_id': '5678',
'name': 'Jim',
'date': '2021-09-03'
},
{
'order_id': '1234',
'name': 'Bob',
'date': '2021-09-03'
}
]
Which is the desired output.
I was wondering if there is another way syntactically to achieve the same output?
I would do it with a list comprehension to make it one-liner
code:
import enum
class FieldEnum(enum.IntEnum):
ORDER_ID = 360018501198
DATE = 1900000084913
NAME = 360023508598
r_obj = [
{
'custom_fields': [
{'id': 360018501198, 'value': '5678'},
{'id': 360023508598, 'value': 'Jim'},
{'id': 1900000084913, 'value': '2021-09-03'}
]
},
{
'custom_fields': [
{'id': 360018501198, 'value': '1234'},
{'id': 360023508598, 'value': 'Bob'},
{'id': 1900000084913, 'value': '2021-09-03'}
]
}
]
print([{FieldEnum(f['id']).name.lower():f['value'] for f in entry['custom_fields'] } for entry in r_obj])
result:
[
{"order_id": "5678", "name": "Jim", "date": "2021-09-03"},
{"order_id": "1234", "name": "Bob", "date": "2021-09-03"},
]
try the below
lookup = {360018501198: 'order_id', 360023508598: 'name', 1900000084913: 'date'}
data = [{
'custom_fields': [
{'id': 360018501198, 'value': '5678'},
{'id': 360023508598, 'value': 'Jim'},
{'id': 1900000084913, 'value': '2021-09-03'}
]
},
{
'custom_fields': [
{'id': 360018501198, 'value': '1234'},
{'id': 360023508598, 'value': 'Bob'},
{'id': 1900000084913, 'value': '2021-09-03'}
]
}
]
out = list()
for entry in data:
out.append({})
for f in entry['custom_fields']:
out[-1][lookup[f['id']]] = f['value']
print(out)
output
[{'order_id': '5678', 'name': 'Jim', 'date': '2021-09-03'}, {'order_id': '1234', 'name': 'Bob', 'date': '2021-09-03'}]

How to get count for a particular key in the dictionary with values as list

My content inside a dictionary is below
I need to know count for 1. BusinessArea and its count of values
Designation and its count of values
test= [ { 'masterid': '1', 'name': 'Group1', 'BusinessArea': [ 'Accounting','Research'], 'Designation': [ 'L1' 'L2' ] }, { 'masterid': '2', 'name': 'Group1', 'BusinessArea': ['Research','Accounting' ], 'Role': [ { 'id': '5032', 'name': 'Tester' }, { 'id': '5033', 'name': 'Developer' } ], 'Designation': [ 'L1' 'L2' ]}, { 'masterid': '3', 'name': 'Group1', 'BusinessArea': [ 'Engineering' ], 'Role': [ { 'id': '5032', 'name': 'Developer' }, { 'id': '5033', 'name': 'Developer', 'parentname': '' } ], 'Designation': [ 'L1' ]}]
I want to get the count of masterid of BusinessArea and Designation which is all the names
Expected out is below
[
{
"name": "BusinessArea",
"values": [
{
"name": "Accounting",
"count": "2"
},
{
"name": "Research",
"count": "2"
},
{
"name": "Engineering",
"count": "1"
}
]
},
{
"name": "Designation",
"values": [
{
"name": "L1",
"count": "3"
},
{
"name": "l2",
"count": "2"
}
]
}
]
masterid 1,2 and 3 there are L1 and masterid 1 and 2 there are L2 so for L1:3, and L2:2
something like the below (not exactly the output you mentioned but quite close..)
from collections import defaultdict
test = [{'masterid': '1', 'name': 'Group1', 'BusinessArea': ['Accounting', 'Research'], 'Designation': ['L1', 'L2']},
{'masterid': '2', 'name': 'Group1', 'BusinessArea': ['Research', 'Accounting'],
'Role': [{'id': '5032', 'name': 'Tester'}, {'id': '5033', 'name': 'Developer'}], 'Designation': ['L1', 'L2']},
{'masterid': '3', 'name': 'Group1', 'BusinessArea': ['Engineering'],
'Role': [{'id': '5032', 'name': 'Developer'}, {'id': '5033', 'name': 'Developer', 'parentname': ''}],
'Designation': ['L1']}]
b_area = defaultdict(int)
des = defaultdict(int)
for entry in test:
for val in entry['BusinessArea']:
b_area[val] += 1
for val in entry['Designation']:
des[val] += 1
print(b_area)
print(des)
output
defaultdict(<class 'int'>, {'Accounting': 2, 'Research': 2, 'Engineering': 1})
defaultdict(<class 'int'>, {'L1': 3, 'L2': 2})

How to extract elements from nested dict

1.I need to get the value for business' name and append it to a list.
2.I need to get the value policies and append to a list after checking parent.
3.if parent is Marketing name has to added to level1.
4.if parent is Advertising name has to added to level2.
5.if some place Business is [] I need to pass None instead of Null List
Also need to check key exists or not for some keys there is a chance of missing policies, business
Sample dictionary is below
searchtest = [{'_index': 'newtest',
'_type': '_doc',
'_id': '100',
'_score': 1.0,
'_source': {'id': '100',
'name': 'A',
'Business': [{'id': '7', 'name': 'Enterprise'},
{'id': '8', 'name': 'Customer'}],
'policies': [{'id': '332',
'name': 'Second division',
'parent': 'Marketing'},
{'id': '3323', 'name': 'First division', 'parent': 'Marketing'}]}},
{'_index': 'newtest',
'_type': '_doc',
'_id': '101',
'_score': 1.0,
'_source': {'id': '101',
'name': 'B',
'Business': [{'id': '7'},
{'id': '8', 'name': 'Customer'}],
'policies': [{'id': '332',
'name': 'Second division',
'parent': 'Marketing'},
{'id': '3323', 'name': 'First division', 'parent': 'Marketing'}]}}]
Expected out
[
{
"id": "100",
"name": "A",
"Business": ["Enterprise", "Customer"],
"level1": ['Second division', 'First division'],
"level2": [ ]
},
{
"id": "101",
"name": "B",
"Business": ["Enterprise", "Customer"],
"level1": ['Second division', 'First division'],
"level2": [ ]
}
]
COde is below
def do_the_thing(lst):
resp = []
parents_mapper = {
'Marketing': 'level1',
'Advertising': 'level2'
}
for el in lst:
d = {
'id': el['_source']['id'],
'name': el['_source']['name'],
'Business': [],
'level1': [],
'level2': []
}
for business in el.get("_source", {}).get("business", {}).get("name", ""):
business_name = business.get('name')
if business_name:
d['Business'].append(business_name)
for policy in el.get('policies', []):
policy_parent = policy.get('parent')
parent_found = parents_mapper.get(policy_parent)
policy_name = policy.get('name')
if parent_found and policy_name:
d[parent_found].append(policy_name)
resp.append(d)
return resp
if __name__ == "__main__":
import pprint
pp = pprint.PrettyPrinter(4)
pp.pprint(do_the_thing(searchtest))
My output
[ {'Business': [], 'id': '100', 'level1': [], 'level2': [], 'name': 'A'},
{'Business': [], 'id': '101', 'level1': [], 'level2': [], 'name': 'B'}]
The problem in my output you can see:
'Business', 'level1' is [] is null list.
Adding one more dictionary for testing
searchtest = [{'_index': 'newtest',
'_type': '_doc',
'_id': '100',
'_score': 1.0,
'_source': {'id': '100',
'name': 'A',
'policies': [{'id': '332',
'name': 'Second division',
'parent': 'Marketing'},
{'id': '3323', 'name': 'First division', 'parent': 'Marketing'}]}},
{'_index': 'newtest',
'_type': '_doc',
'_id': '101',
'_score': 1.0,
'_source': {'id': '101',
'name': 'B',
'Business': [{'id': '9'}, {'id': '10', 'name': 'Customer'}],
'policies': [{'id': '332',
'name': 'Second division',
'parent': 'Marketing'},
{'id': '3323', 'name': 'First division', 'parent': 'Advertising'}]}}]
In the above dictionary you can see that there is no Business in 100 key and for 101 there is no name inside the Business key. So there will be key error will be coming. Need to handle that
You are not collecting data from dict, You have to select particular key to get it's value.
replace this :
for el in lst:
d = {
'id': el['_source']['id'],
'name': el['_source']['name'],
'Business': [],
'level1': [],
'level2': []
}
with this:
for el in data:
d = {
'id' : el['_source']['id'],
'name' : el['_source']['name'],
'Business' : [name['name'] for name in el['_source']['Business']],
'level1' : [name['name'] for name in el['_source']['policies']],
'level2' : []
}
output:
[ { 'Business': ['Enterprise', 'Customer'],
'id': '100',
'level1': ['Second division', 'First division'],
'level2': [],
'name': 'A'},
{ 'Business': ['Enterprise', 'Customer'],
'id': '101',
'level1': ['Second division', 'First division'],
'level2': [],
'name': 'B'}]

Python - Add multiple values in dictionary / json value

I'm building a python application which receives REST response in below format:
[
{
'metric': 'pass_status',
'history': [
{
'date': '2019-02-20T10:26:52+0000',
'value': 'OK'
},
{
'date': '2019-03-13T11:37:39+0000',
'value': 'FAIL'
},
{
'date': '2019-03-13T12:00:57+0000',
'value': 'OK'
}
]
},
{
'metric': 'bugs',
'history': [
{
'date': '2019-02-20T10:26:52+0000',
'value': '1'
},
{
'date': '2019-03-13T11:37:39+0000',
'value': '6'
},
{
'date': '2019-03-13T12:00:57+0000',
'value': '2'
}
]
},
{
'metric': 'code_smells',
'history': [
{
'date': '2019-02-20T10:26:52+0000',
'value': '0'
},
{
'date': '2019-03-13T11:37:39+0000',
'value': '1'
},
{
'date': '2019-03-13T12:00:57+0000',
'value': '2'
}
]
}
]
You can see dates are same within for each metric.
I want to collate this data date-wise, i.e. my result json/dictionary should look like:
[
'2019-02-20T10:26:52+0000' : {
'pass_status' : 'OK',
'bugs' : '1',
'code_smells' : '0'
},
'2019-03-13T11:37:39+0000' : {
'pass_status' : 'FAIL',
'bugs' : '6',
'code_smells' : '1'
},
'2019-03-13T11:37:39+0000' : {
'pass_status' : 'OK',
'bugs' : '2',
'code_smells' : '2'
}
]
What will be the suggested approach to do this?
Thanks
I tried some itertools.groupby magic, but it turned into a mess...
maybe iteration + defaultdict is just keeping it simple...
like this:
from collections import defaultdict
result = defaultdict(dict)
for metric_dict in data:
metric_name = metric_dict['metric']
for entry in metric_dict['history']:
result[entry['date']][metric_name] = entry['value']
print(dict(result))
or a full example with the data:
data = [
{
'metric': 'pass_status',
'history': [
{
'date': '2019-02-20T10:26:52+0000',
'value': 'OK'
},
{
'date': '2019-03-13T11:37:39+0000',
'value': 'FAIL'
},
{
'date': '2019-03-13T12:00:57+0000',
'value': 'OK'
}
]
},
{
'metric': 'bugs',
'history': [
{
'date': '2019-02-20T10:26:52+0000',
'value': '1'
},
{
'date': '2019-03-13T11:37:39+0000',
'value': '6'
},
{
'date': '2019-03-13T12:00:57+0000',
'value': '2'
}
]
},
{
'metric': 'code_smells',
'history': [
{
'date': '2019-02-20T10:26:52+0000',
'value': '0'
},
{
'date': '2019-03-13T11:37:39+0000',
'value': '1'
},
{
'date': '2019-03-13T12:00:57+0000',
'value': '2'
}
]
}
]
from collections import defaultdict
result = defaultdict(dict)
for metric_dict in data:
metric_name = metric_dict['metric']
for entry in metric_dict['history']:
result[entry['date']][metric_name] = entry['value']
print(result)

How to convert complex nested json into the pandas Dataframe

I am trying to convert the nested complex json into the dataframe this is my json format
There are multiple json rows like this i just have used here only one which is the id above now when i am trying to convert the dataframe i am getting the output like this
I further want to display the innings batsmen name stats runs
ballsFaced . I want to show the complete data into the dataframe.
{
'1164223': {
'fullSummaryLink': '/series/19014/scorecard/1164223/new-zealand-a-vs-india-a-1st-unofficial-odi-india-a-tour-of-nz-2018-19',
'innings': {
'1': {
'batsmen': [
{
'name': 'HD Rutherford',
'href': 'http://www.espncricinfo.com/ci/content/player/331375.html',
'stats': {
'runs': {
'name': 'runs',
'text': 'RUNS',
'value': '70'
},
'ballsFaced': {
'name': 'ballsFaced',
'text': 'BF',
'value': '66'
},
'notouts': {
'name': 'notouts',
'text': 'Not Out',
'value': '0'
}
}
},
{
'name': 'JDS Neesham',
'href': 'http://www.espncricinfo.com/ci/content/player/355269.html',
'stats': {
'runs': {
'name': 'runs',
'text': 'RUNS',
'value': '79'
},
'ballsFaced': {
'name': 'ballsFaced',
'text': 'BF',
'value': '48'
},
'notouts': {
'name': 'notouts',
'text': 'Not Out',
'value': '1'
}
}
}
],
'team': {
'teamDisplayName': 'NEW ZEALAND A',
'innDisplayName': 'INNINGS',
'runs': 308,
'overs': 50,
'wickets': 6,
'description': 'complete',
'inningsRunWicket': '308/6',
'inningStatus': ''
},
'bowlers': [
{
'name': 'S Kaul',
'href': 'http://www.espncricinfo.com/ci/content/player/326017.html',
'stats': {
'overs': {
'name': 'overs',
'text': 'O',
'value': '10'
},
'conceded': {
'name': 'conceded',
'text': 'R',
'value': '74'
},
'wickets': {
'name': 'wickets',
'text': 'E',
'value': '2'
}
}
},
{
'name': 'K Gowtham',
'href': 'http://www.espncricinfo.com/ci/content/player/424377.html',
'stats': {
'overs': {
'name': 'overs',
'text': 'O',
'value': '9'
},
'conceded': {
'name': 'conceded',
'text': 'R',
'value': '46'
},
'wickets': {
'name': 'wickets',
'text': 'E',
'value': '1'
}
}
}
]
},
'2': {
'bowlers': [
{
'name': 'HK Bennett',
'href': 'http://www.espncricinfo.com/ci/content/player/226493.html',
'stats': {
'overs': {
'name': 'overs',
'text': 'O',
'value': '10'
},
'conceded': {
'name': 'conceded',
'text': 'R',
'value': '65'
},
'wickets': {
'name': 'wickets',
'text': 'E',
'value': '2'
}
}
},
{
'name': 'LH Ferguson',
'href': 'http://www.espncricinfo.com/ci/content/player/493773.html',
'stats': {
'overs': {
'name': 'overs',
'text': 'O',
'value': '10'
},
'conceded': {
'name': 'conceded',
'text': 'R',
'value': '75'
},
'wickets': {
'name': 'wickets',
'text': 'E',
'value': '2'
}
}
}
],
'batsmen': [
{
'name': 'V Shankar',
'href': 'http://www.espncricinfo.com/ci/content/player/477021.html',
'stats': {
'runs': {
'name': 'runs',
'text': 'RUNS',
'value': '87'
},
'ballsFaced': {
'name': 'ballsFaced',
'text': 'BF',
'value': '80'
},
'notouts': {
'name': 'notouts',
'text': 'Not Out',
'value': '1'
}
}
},
{
'name': 'SS Iyer',
'href': 'http://www.espncricinfo.com/ci/content/player/642519.html',
'stats': {
'runs': {
'name': 'runs',
'text': 'RUNS',
'value': '54'
},
'ballsFaced': {
'name': 'ballsFaced',
'text': 'BF',
'value': '54'
},
'notouts': {
'name': 'notouts',
'text': 'Not Out',
'value': '0'
}
}
}
],
'team': {
'teamDisplayName': 'INDIA A',
'innDisplayName': 'INNINGS',
'runs': 311,
'overs': 49,
'wickets': 6,
'description': 'target reached',
'inningsRunWicket': '311/6',
'inningStatus': ''
}
}
},
'isAvailable': True
}
}
This is the output of my code which i have tried and i have given the image sample thats the output.. I am trying to make into simple format which i have not been able to do that .
this is my python code which converts the dictionary of json values into the pandas dataframe
df = pd.concat({k: pd.DataFrame(v) for k, v in scorecard_summary.items()})
df

Categories

Resources