Convert Pandas Dataframe or csv file to Custom Nested JSON - python

I have a csv file with a DF with structure as follows:
my dataframe:
I want to enter the data to the following JSON format using python. I looked to couple of links (but I got lost in the nested part). The links I checked:
How to convert pandas dataframe to uniquely structured nested json
convert dataframe to nested json
"PHI": 2,
"firstname": "john",
"medicalHistory": {
"allergies": "egg",
"event": {
"inPatient":{
"hospitalized": {
"visit" : "7-20-20",
"noofdays": "5",
"test": {
"modality": "xray"
}
"vitalSign": {
"temperature": "32",
"heartRate": "80"
},
"patientcondition": {
"headache": "1",
"cough": "0"
}
},
"icu": {
"visit" : "",
"noofdays": "",
},
},
"outpatient": {
"visit":"5-20-20",
"vitalSign": {
"temperature": "32",
"heartRate": "80"
},
"patientcondition": {
"headache": "1",
"cough": "1"
},
"test": {
"modality": "blood"
}
}
}
}
If anyone can help me with the nested array, that will be really helpful.

You need one or more helper functions to unpack the data in the table like this. Write main helper function to accept two arguments: 1. df and 2. schema. The schema will be used to unpack the df into a nested structure for each row in the df. The schema below is an example of how to achieve this for a subset of the logic you describe. Although not exactly what you specified in example, should be enough of hint for you to complete the rest of the task on your own.
from operator import itemgetter
groupby_idx = ['PHI', 'firstName']
groups = df.groupby(groupby_idx, as_index=False, drop=False)
schema = {
"event": {
"eventType": itemgetter('event'),
"visit": itemgetter('visit'),
"noOfDays": itemgetter('noofdays'),
"test": {
"modality": itemgetter('test')
},
"vitalSign": {
"temperature": itemgetter('temperature'),
"heartRate": itemgetter('heartRate')
},
"patientCondition": {
"headache": itemgetter('headache'),
"cough": itemgetter('cough')
}
}
}
def unpack(obj, schema):
tmp = {}
for k, v in schema.items():
if isinstance(v, (dict,)):
tmp[k] = unpack(obj, v)
if callable(v):
tmp[k] = v(obj)
return tmp
def apply_unpack(groups, schema):
results = {}
for gidx, df in groups:
events = []
for ridx, obj in df.iterrows():
d = unpack(obj, schema)
events.append(d)
results[gidx] = events
return results
unpacked = apply_unpack(groups, schema)

Related

How to select multiple JSON Objects using python

I have a Json data as following. The Json has many such objects with same NameId's:
[{
"NameId": "name1",
"exp": {
"exp1": "test1"
}
}, {
"NameId": "name1",
"exp": {
"exp2": "test2"
}
}
]
Now, what I am after is to create a new Json Object that has a merged exp and create a file something like below, so that I do not have multiple NameId:
[{
"NameId": "name1",
"exp": {
"exp1": "test1",
"exp2": "test2"
}
}
]
Is there a possibility I can achive it using Python?
You can do the manual work, merging the entries while rebuilding the structure. You can keep a dictionary with the exp to merge them.
import json
jsonData = [{
"NameId": "name1",
"exp": {
"exp1": "test1"
}
}, {
"NameId": "name1",
"exp": {
"exp2": "test2"
}
}, {
"NameId": "name2",
"exp": {
"exp3": "test3"
}
}]
result = []
expsDict = {}
for entry in jsonData:
nameId = entry["NameId"]
exp = entry["exp"]
if nameId in expsDict:
# Merge exp into resultExp.
# Note that resultExp belongs to both result and expsDict,
# changes made will be reflected in both containers!
resultExp = expsDict[nameId]
for (expName, expValue) in exp.items():
resultExp[expName] = expValue
else:
# Copy copy copy, otherwise merging would modify jsonData too!
exp = exp.copy()
entry = entry.copy()
entry["exp"] = exp
# Add a new item to the result
result.append(entry)
# Store exp to later merge other entries with the same name.
expsDict[nameId] = exp
print(result)
You can use itertools.groupby and functools.reduce
d = [{
"NameId": "name1",
"exp": {
"exp1": "test1"
}
}, {
"NameId": "name1",
"exp": {
"exp2": "test2"
}
}]
from itertools import groupby
[ {'NameId': k, 'exp': reduce(lambda x,y : {**x["exp"], **y["exp"]} , v) } for k,v in groupby(sorted(d, key=lambda x: x["NameId"]), lambda x: x["NameId"]) ]
#output
[{'NameId': 'name1', 'exp': {'exp1': 'test1', 'exp2': 'test2'}}]

Pandas to JSON not respecting DataFrame format

I have a Pandas DataFrame which I need to transform into a JSON object. I thought by grouping it, I would achieve this but this does not seem to yield the correct results. Further, I wouldnt know how to name the sub group.
My data frame as follows:
parent
name
age
nick
stef
10
nick
rob
12
And I do a groupby as I would like all children together under one parent in json:
df = df.groupby(['parent', 'name'])['age'].min()
And I would like it to yield the following:
{
"parent": "Nick",
"children": [
{
"name": "Rob",
"age": 10,
},
{
"name": "Stef",
"age": 15,
},,.. ]
}
When I do .to_json() it seems to regroup everything on age etc.
df.groupby(['parent'])[['name', 'age']].apply(list).to_json()
Given I wanted to add some styling, I ended up solving it as follows:
import json
df_grouped = df.groupby('parent')
new = []
for group_name, df_group in df_grouped:
base = {}
base['parent'] = group_name
children = []
for row_index, row in df_group.iterrows():
temp = {}
temp['name'] = row['name']
temp['age'] = row['age']
children.append(temp)
base['children'] = children
new.append(base)
json_format = json.dumps(new)
print(new)
Which yielded the following results:
[
{
"parent":"fee",
"children":[
{
"name":"bob",
"age":9
},
{
"name":"stef",
"age":10
}
]
},
{
"parent":"nick",
"children":[
{
"name":"stef",
"age":10
},
{
"name":"tobi",
"age":2
},
{
"name":"ralf",
"age":12
}
]
},
{
"parent":"patrick",
"children":[
{
"name":"marion",
"age":10
}
]
}
]

Pandas json normalize an object property containing an array of objects

If I have json data formatted like this:
{
"result": [
{
"id": 878787,
"name": "Testing",
"schema": {
"id": 3463463,
"smartElements": [
{
"svKey": "Model",
"value": {
"type": "type1",
"value": "ThisValue"
}
},
{
"svKey": "SecondKey",
"value": {
"type": "example",
"value": "ThisValue2"
}
}
]
}
},
{
"id": 333,
"name": "NameName",
"schema": {
"id": 1111,
"smartElements": [
{
"svKey": "Model",
"value": {
"type": "type1",
"value": "NewValue"
}
},
{
"svKey": "SecondKey",
"value": {
"type": "example",
"value": "ValueIs"
}
}
]
}
}
]
}
is there a way to normalize it so I end up with records:
name Model SecondKey
Testing ThisValue ThisValue2
NameName NewValue ValueIs
I can get the smartElements to a pandas series but I can't figure out a way to break out smartElements[x].svKey to a column header and smartElements[x].value.value to the value for that column and/or merge it.
I'd skip trying to use a pre-baked solution and just navigate the json yourself.
import json
import pandas as pd
data = json.load(open('my.json'))
records = []
for d in data['result']:
record = {}
record['name'] = d['name']
for ele in d['schema']['smartElements']:
record[ele['svKey']] = ele['value']['value']
records.append(record)
pd.DataFrame(records)
name Model SecondKey
0 Testing ThisValue ThisValue2
1 NameName NewValue ValueIs
My solution
import pandas as pd
import json
with open('test.json') as f:
a = json.load(f)
d = pd.json_normalize(data=a['result'], errors='ignore', record_path=['schema', 'smartElements'], meta=['name'])
print(d)
produces
svKey value.type value.value name
0 Model type1 ThisValue Testing
1 SecondKey example ThisValue2 Testing
2 Model type1 NewValue NameName
3 SecondKey example ValueIs NameName

Transforming JSON keys and values

I have a simple json in python that looks like :
{
"list": [{
"key1": "value1"
},
{
"key1": "value1"
}
]
}
I want to transform this to the following json. Any suggestions how I can do it with python without installing additional libraries?
{
"list": [{
"keys": {
"name": "key1",
"value": "value1"
}
}, {
"keys": {
"name": "key1",
"value": "value1"
}
}]
}
Not sure from your question if you already have the json read into a variable, or if it is in a file. This is assuming you have it in a variable already:
in_json = {
"list": [{
"key1": "value1"
},
{
"key2": "value2"
}
]
}
out_json = {"list":[]}
for kd in in_json["list"]:
sub_kd = {"keys": {}}
for k,v in kd.iteritems():
sub_kd["keys"]["name"] = k
sub_kd["keys"]["value"] = v
out_json["list"].append(sub_kd)
print(out_json)
It just loops through the json making dictionaries to append to the out_json dictionary. You could make this print pretty with the json library and also save to file with it
You didn't indicate exactly what contains the JSON data is in, so I've put it all in a string in the example code below and uses the json.loads() function to turn it into a Python dictionary. If it's in a file, you can use the module's json.load() function instead.
It also make the assume that each sub-JSON object in the "list" list consists of only one key/value pair as shown in your question.
The code below changes the deserialized dictionary in-place and pretty-prints the result of doing that by using the json.dumps() function to reserialize it.
Note that I changed the keys and values in sample input JSON slightly to make it easier to see the correspondence between it and the printed results.
import json
json_in = '''
{
"list": [
{
"key1": "value1"
},
{
"key2": "value2"
}
]
}
'''
json_data = json.loads(json_in) # Deserialize.
for i, obj in enumerate(json_data['list']):
# Assumes each object in list contains only one key, value pair.
newobj = { 'name': next(iter(obj.keys())),
'value': next(iter(obj.values()))}
json_data['list'][i] = {'keys': newobj}
print(json.dumps(json_data, indent=4)) # Reserialize and print.
Printed result:
{
"list": [
{
"keys": {
"name": "key1",
"value": "value1"
}
},
{
"keys": {
"name": "key2",
"value": "value2"
}
}
]
}

decoding json string in python

I have the following JSON string (from wikipedia http://en.wikipedia.org/wiki/JSON)
{
"name":"Product",
"properties":
{
"id":
{
"type":"number",
"description":"Product identifier",
"required":true
},
"name":
{
"type":"string",
"description":"Name of the product",
"required":true
},
"price":
{
"type":"number",
"minimum":0,
"required":true
},
"tags":
{
"type":"array",
"items":
{
"type":"string"
}
},
"stock":
{
"type":"object",
"properties":
{
"warehouse":
{
"type":"number"
},
"retail":
{
"type":"number"
}
}
}
}
}
I am trying to decode this string using Python json library. I would like to access the node
properties - > stock - > properties - > warehouse.
I understand that json.loads() function stores the json string as a dictionary. But in this case properties is my key and everything under that are values. How do I access the above node.
import json
jsonText=""
file = open("c:/dir/jsondec.json")
for line in file.xreadlines():
jsonText+=line
data = json.loads(jsonText)
for k,v in data.items():
print k // shows name and properties
file.close();
Thanks
You can load json straight from the file like this:
f = open("c:/dir/jsondec.json")
data = json.load(f)
Based on your input string, data is now a dictionary that contains other dictionaries. You can just navigate up the dictionaries like so:
node = data['properties']['stock']['properties']['warehouse']
print str(node)

Categories

Resources