I want to create a multi nested json from my pandas dataframe - python

I have a pandas data frame in the following format :-
EMPCODE|Indicator|CustNAME
1 CASA Raja
1 CASA lala
1 CASA dada
1 TL Nan
1 l Nan
1 p Nan
1 q Nan
2 CASA Crick
2 CASA Flick
2 TL Nan
2 l Nan
2 p Nan
2 q Nan
I want to convert this into a nested json .
Ive tried various different methods including groupby(),apply() but I Cant get a the output in the required json format.From the code mentioned below I am getting duplicate custNAme values for both employees .
group = merge_hr_anr.groupby('EMPCODE_y').groups
group1 = merge_hr_anr.groupby("EMPNAME").groups
for variable in range(a):
d = {'EMPCODE_y': list(group.keys())[variable],'EMPNAME':
list(group1.keys())[variable] ,'Indicators': [{'IndicatorName':
merge_hr_anr.loc[i, 'IndicatorName']} for i in list(group.values())
[variable].unique()]}
d['Indicators'] = list(map(dict,sorted(set(map(lambda x:
tuple(x.items()),d['Indicators'])), key=list(map(lambda x:
tuple(x.items()),d['Indicators'])).index)))
d['Performance'] = [{i['IndicatorName']:
(merge_hr_anr.loc[merge_hr_anr['IndicatorName'].eq(i['IndicatorName']),"CUSTNAME"]).dropna().tolist()} for i in d['Indicators']]
My output is
{
"EMPCODE": "1",
"Indicators": [
{
"IndicatorName": "CASA"
},
{
"IndicatorName": "TL"
},
{
"IndicatorName": "l"
},
{
"IndicatorName": "p"
},
{
"IndicatorName": "q"
}
]
"Performance":[
{
"CASA":[{"Custname":"Raja"},{"Custname":"lala"},{"Custname":"dada"}]
},
{
"TL":[]
}
{
"l":[]
}
{
"p":[]
}
{
"q":[]
}
]
}
{
"EMPCODE": "2",
"Indicators": [
{
"IndicatorName": "CASA"
},
{
"IndicatorName": "TL"
},
{
"IndicatorName": "l"
},
{
"IndicatorName": "p"
},
{
"IndicatorName": "q"
}
]
"Performance":[
{
"CASA":[{"Custname":"Raja"},{"Custname":"lala"},{"CustName":"dada"}]
},
{
"TL":[]
}
{
"l":[]
}
{
"p":[]
}
{
"q":[]
}
]
}
i want the output to be
{
"EMPCODE": "1",
"Indicators": [
{
"IndicatorName": "CASA"
},
{
"IndicatorName": "TL"
},
{
"IndicatorName": "l"
},
{
"IndicatorName": "p"
},
{
"IndicatorName": "q"
}
]
"Performance":[
{
"CASA":[{"Custname":"Raja"},{"Custname":"lala"},{"Custname":"dada"}]
},
{
"TL":[]
}
{
"l":[]
}
{
"p":[]
}
{
"q":[]
}
]
}
{
"EMPCODE": "2",
"Indicators": [
{
"IndicatorName": "CASA"
},
{
"IndicatorName": "TL"
},
{
"IndicatorName": "l"
},
{
"IndicatorName": "p"
},
{
"IndicatorName": "q"
}
]
"Performance":[
{
"CASA":[{"Custname":"Crick"},{"Custname":"Flick"}]
},
{
"TL":[]
}
{
"l":[]
}
{
"p":[]
}
{
"q":[]
}
]
}

Try the below code with constructing a dictionary:
group = merge_hr_anr.groupby('EMPCODE').groups
d = {'EMPCODE': list(group.keys())[0], 'Indicators': [{'IndicatorName': merge_hr_anr.loc[i, 'Indicator']} for i in list(group.values())[0].unique()]}
d['Indicators'] = list(map(dict,sorted(set(map(lambda x: tuple(x.items()),d['Indicators'])), key=list(map(lambda x: tuple(x.items()),d['Indicators'])).index)))
d['Performance'] = [{i['IndicatorName']: merge_hr_anr.loc[merge_hr_anr['Indicator'].eq(i['IndicatorName']), 'CustNAME'].dropna().tolist()} for i in d['Indicators']]
print(d)
Output:
{'EMPCODE': 1, 'Indicators': [{'IndicatorName': 'CASA'}, {'IndicatorName': 'TL'}, {'IndicatorName': 'l'}, {'IndicatorName': 'p'}, {'IndicatorName': 'q'}], 'Performance': [{'CASA': ['Raja', 'lala', 'dada']}, {'TL': []}, {'l': []}, {'p': []}, {'q': []}]}
To write a .json file:
with open('outvalue7.json', 'w') as f:
f.write(str(d))

Related

Encoding python dictionary into JSON using a schema

I am struggling to encode json from a python dictionary. When using json.dumps on a dictionary there is no way to tell the function what objects and properties to map the dictionary keys and values to.
The result of the blind dump is that I end up with schemaless json with unique keys rather than a coherent json structure
import json
d = {
"Laptop": {
"sony": 1,
"apple": 2,
"asus": 5,
},
"Camera": {
"sony": 2,
"sumsung": 1,
"nikon" : 4,
},
}
with open("my.json","w") as f:
json.dump(d,f)
Which returns
{"Laptop": {"sony": 1, "apple": 2, "asus": 5}, "Camera": {"sony": 2, "sumsung": 1, "nikon": 4}}
which looks like json, but has no schema at all.
i am looking to produce a json file more like this
{"devices": [
{"device": {
"deviceType": "Laptop",
"deviceBrands": [
{"deviceBrand": {
"deviceBrandName": "sony",
"deviceBrandCount": "1"
},
{"deviceBrand": {
"deviceBrandName": "apple",
"deviceBrandCount": "2"
},
{"deviceBrand": {
"deviceBrandName": "asus",
"deviceBrandCount": "5"
}
},
{"device": {
"deviceType": "Camera",
"deviceBrands": [
{"deviceBrand": {
"deviceBrandName": "sony",
"deviceBrandCount": "2"
},
{"deviceBrand": {
"deviceBrandName": "sumsung",
"deviceBrandCount": "1"
},
{"deviceBrand": {
"deviceBrandName": "nikon",
"deviceBrandCount": "5"
}
}
}
Any recommendations?
Create the structure you want using a dictionary comprehension before calling json.dump:
output = {"devices": [
{"device": {"deviceType": k,
"deviceBrands": [{"deviceBrand": {"deviceBrandName": k1,
"deviceBrandCount": v1}
} for k1, v1 in v.items()
]
}
}
for k,v in d.items()]}
with open("output.json","w") as f:
json.dump(output,f)
output.json:
{
"devices": [
{
"device": {
"deviceType": "Laptop",
"deviceBrands": [
{
"deviceBrand": {
"deviceBrandName": "sony",
"deviceBrandCount": 1
}
},
{
"deviceBrand": {
"deviceBrandName": "apple",
"deviceBrandCount": 2
}
},
{
"deviceBrand": {
"deviceBrandName": "asus",
"deviceBrandCount": 5
}
}
]
}
},
{
"device": {
"deviceType": "Camera",
"deviceBrands": [
{
"deviceBrand": {
"deviceBrandName": "sony",
"deviceBrandCount": 2
}
},
{
"deviceBrand": {
"deviceBrandName": "sumsung",
"deviceBrandCount": 1
}
},
{
"deviceBrand": {
"deviceBrandName": "nikon",
"deviceBrandCount": 4
}
}
]
}
}
]
}

How to remove parent json element in python3 if child is object is empty

I'm trying to move data from SQL to Mongo. Here is a challenge I'm facing, if any child object is empty I want to remove parent element. I want till insurance field to be removed.
Here is what I tried:
def remove_empty_elements(jsonData):
if(isinstance(jsonData, list) or isinstance(jsonData,dict)):
for elem in list(jsonData):
if not isinstance(elem, dict) and isinstance(jsonData[elem], list) and elem:
jsonData[elem] = [x for x in jsonData[elem] if x]
if(len(jsonData[elem])==0):
del jsonData[elem]
elif not isinstance(elem, dict) and isinstance(jsonData[elem], dict) and not jsonData[elem]:
del jsonData[elem]
else:
pass
return jsonData
sample data
{
"_id": "30546c62-8ea0-4f1a-a239-cc7508041a7b",
"IsActive": "True",
"name": "Pixel 3",
"phone": [
{
"Bill": 145,
"phonetype": "xyz",
"insurance": [
{
"year_one_claims": [
{
"2020": 200
},
{
},
{
},
{
},
{
}
]
},
{
"year_two_claims": [
{
},
{
},
{
},
{
},
{
}
]
},
]
}
],
"Provider": {
"agent": "aaadd",
}
}
Results should look like that
{
"_id": "30546c62-8ea0-4f1a-a239-cc7508041a7b",
"IsActive": "True",
"name": "Pixel 3",
"phone": [
{
"Bill": 145,
"phonetype": "xyz",
"insurance": [
{
"year_one_claims": [
{
"2020": 200
},
]
},
]
}
],
"Provider": {
"agent": "aaadd",
}
}
Your if statements are kind of confusing. I think you are looking for a recursion:
import json
# define which elements you want to remove:
to_be_deleted = [[], {}, "", None]
def remove_empty_elements(jsonData):
if isinstance(jsonData, list):
jsonData = [new_elem for elem in jsonData
if (new_elem := remove_empty_elements(elem)) not in to_be_deleted]
elif isinstance(jsonData,dict):
jsonData = {key: new_value for key, value in jsonData.items()
if (new_value := remove_empty_elements(value)) not in to_be_deleted}
return jsonData
print(json.dumps(remove_empty_elements(jsonData), indent=4))
Edit/Note: from Python3.8 you can use assignements (:=) in comprehensions
Output:
{
"_id": "30546c62-8ea0-4f1a-a239-cc7508041a7b",
"IsActive": "True",
"name": "Pixel 3",
"phone": [
{
"Bill": 145,
"phonetype": "xyz",
"insurance": [
{
"year_one_claims": [
{
"2020": 200
}
]
}
]
}
],
"Provider": {
"agent": "aaadd"
}
}
Try out this:
data = {
"_id": "30546c62-8ea0-4f1a-a239-cc7508041a7b",
"IsActive": "True",
"name": "Pixel 3",
"phone": [
{
"Bill": 145,
"phonetype": "xyz",
"insurance": [
{
"year_one_claims": [
{
"2020": 200
},
{
},
{
},
{
},
{
}
]
},
{
"year_two_claims": [
{
},
{
},
{
},
{
},
{
}
]
},
]
}
],
"Provider": {
"agent": "aaadd",
}
}
for phn_data in data['phone']:
for ins in phn_data['insurance']:
for key, val in list(ins.items()):
for ins_data in list(val):
if not ins_data:
val.remove(ins_data)
if not val:
del ins[key]
phn_data['insurance'].remove(ins)
print (data)
Output:
{
'_id': '30546c62-8ea0-4f1a-a239-cc7508041a7b',
'IsActive': 'True',
'name': 'Pixel 3',
'phone': [{
'Bill': 145,
'phonetype': 'xyz',
'insurance': [{
'year_one_claims': [{
'2020': 200
}]
}]
}],
'Provider': {
'agent': 'aaadd'
}
}

Parse the complex JSON in Python without storing in File

I'm trying the parse the following JSON data without storing it in a file, using Python.
{
"select": {
"value": "s_name"
},
"from": "student",
"where": {
"in": [
"s_id",
{
"select": {
"value": "s_id"
},
"from": "student_course",
"where": {
"in": [
"c_id",
{
"select": {
"value": "c_id"
},
"from": "course",
"where": {
"or": [
{
"and": [
{
"eq": [
"c_name",
{
"literal": "DSA"
}
]
},
{
"eq": [
"c_name",
{
"literal": "dbms"
}
]
}
]
},
{
"eq": [
"c_name",
{
"literal": "algorithm"
}
]
}
]
}
}
]
}
}
]
}
}
I'm using the following code:
import json
x = "JSON Data which is shared above"
y = json.dumps(x)
jsonDict = json.loads(y)
print (jsonDict['where'])
And not sure, how to proceed further, could you please advise, how it can be done?
I want to fetch the value of all objects, especially where clause.
json.dumps() takes an object and encodes it into a JSON string. But you are trying to take a JSON string and decode it into an object (a dict in this case). The method you should be applying against x therefore is json.loads(). You can then convert the resulting dict back into a JSON string, y, with json.dumps():
import json
x = """{
"select": {
"value": "s_name"
},
"from": "student",
"where": {
"in": [
"s_id",
{
"select": {
"value": "s_id"
},
"from": "student_course",
"where": {
"in": [
"c_id",
{
"select": {
"value": "c_id"
},
"from": "course",
"where": {
"or": [
{
"and": [
{
"eq": [
"c_name",
{
"literal": "DSA"
}
]
},
{
"eq": [
"c_name",
{
"literal": "dbms"
}
]
}
]
},
{
"eq": [
"c_name",
{
"literal": "algorithm"
}
]
}
]
}
}
]
}
}
]
}
}"""
jsonDict = json.loads(x) # from string to a dict
print(jsonDict['where'])
y = json.dumps(jsonDict) # from dict back to a string
Prints:
{'in': ['s_id', {'select': {'value': 's_id'}, 'from': 'student_course', 'where': {'in': ['c_id', {'select': {'value': 'c_id'}, 'from': 'course', 'where': {'or': [{'and': [{'eq': ['c_name', {'literal': 'DSA'}]}, {'eq': ['c_name', {'literal': 'dbms'}]}]}, {'eq': ['c_name', {'literal': 'algorithm'}]}]}}]}}]}

PySpark - Convert a heterogeneous array JSON array to Spark dataframe and flatten it

I have streaming data coming in as JSON array and I want flatten it out as a single row in a Spark dataframe using Python.
Here is how the JSON data looks like:
{
"event": [
{
"name": "QuizAnswer",
"count": 1
}
],
"custom": {
"dimensions": [
{
"title": "Are you:"
},
{
"question_id": "5965"
},
{
"option_id": "19029"
},
{
"option_title": "Non-binary"
},
{
"item": "Non-binary"
},
{
"tab_index": "3"
},
{
"tab_count": "4"
},
{
"tab_initial_index": "4"
},
{
"page": "home"
},
{
"environment": "testing"
},
{
"page_count": "0"
},
{
"widget_version": "2.2.44"
},
{
"session_count": "1"
},
{
"quiz_settings_id": "1020"
},
{
"quiz_session": "6e5a3b5c-9961-4c1b-a2af-3374bbeccede"
},
{
"shopify_customer_id": "noid"
},
{
"cart_token": ""
},
{
"app_version": "2.2.44"
},
{
"shop_name": "safety-valve.myshopify.com"
}
],
"metrics": []
}
}
}

python join sub-dictionaries

I'm trying to join sub-Dicts in python so that a valid json is composed
what I have is:
{
'ctx/language': 'en',
'ctx/territory': 'DE',
'composer_name': 'openEHR2study',
'Allergies': {
'adverse_reaction-allergy': [{
'reaction_event_summary': {
'clinical_impact': [{
'|code': 'at0035'
}
]
}
}, {
'recorded': ['2020-05-14T00:00:00.000Z']
}, {
'reaction_event_summary': {
'certainty': [{
'|code': 'at0024'
}
]
}
}, {
'substance_agent': ['s']
}, {
'reaction_reported': ['true']
}, {
'comment': ['c']
}
]
}
}
What I would like is a join over "reaction_event_summary" like this:
{
'ctx/language': 'en',
'ctx/territory': 'DE',
'composer_name': 'openEHR2study',
'Allergies': {
'adverse_reaction-allergy': [{
'reaction_event_summary': {
'clinical_impact': [{
'|code': 'at0035'
}
]
'certainty': [{
'|code': 'at0024'
}
]
}
}, {
'recorded': ['2020-05-14T00:00:00.000Z']
}, {
'substance_agent': ['s']
}, {
'reaction_reported': ['true']
}, {
'comment': ['c']
}
]
}
I have no Idea how I should loop through the json/list and dicts to get this done.
I have made a rough attempt, please check if this works. We are trying to iterate the dictionary with another deepcopy.
val = {
'ctx/language': 'en',
'ctx/territory': 'DE',
'composer_name': 'openEHR2study',
'Allergies': {
'adverse_reaction-allergy': [
{
'reaction_event_summary': {
'clinical_impact': [{
'|code': 'at0035'
}
]
}
}, {
'recorded': ['2020-05-14T00:00:00.000Z']
}, {
'reaction_event_summary': {
'certainty': [{
'|code': 'at0024'
}
]
}
}, {
'substance_agent': ['s']
}, {
'reaction_reported': ['true']
}, {
'comment': ['c']
}
]
}
}
import copy
val1 = copy.deepcopy(val)
del val1['Allergies']['adverse_reaction-allergy']
val1['Allergies']['adverse_reaction-allergy'] = []
reaction_count = 0
for _d in val['Allergies']['adverse_reaction-allergy']:
if _d.get('reaction_event_summary', False):
if reaction_count < 1:
reaction_count += 1
val1['Allergies']['adverse_reaction-allergy'].append(
{'reaction_event_summary': _d.get('reaction_event_summary')})
else:
print(_d.get('reaction_event_summary'))
_temp = val1['Allergies']['adverse_reaction-allergy'][0]['reaction_event_summary']
_temp['certainty'] = _d.get('reaction_event_summary',{}).get('certainty',{})
val1['Allergies']['adverse_reaction-allergy'][0]['reaction_event_summary'] = _temp
else:
val1['Allergies']['adverse_reaction-allergy'].append(_d)
import json
print(json.dumps(val1, indent=2))
Example output
{
"ctx/language": "en",
"ctx/territory": "DE",
"composer_name": "openEHR2study",
"Allergies": {
"adverse_reaction-allergy": [
{
"reaction_event_summary": {
"clinical_impact": [
{
"|code": "at0035"
}
],
"certainty": [
{
"|code": "at0024"
}
]
}
},
{
"recorded": [
"2020-05-14T00:00:00.000Z"
]
},
{
"substance_agent": [
"s"
]
},
{
"reaction_reported": [
"true"
]
},
{
"comment": [
"c"
]
}
]
}
}

Categories

Resources