Encoding python dictionary into JSON using a schema

Encoding python dictionary into JSON using a schema - python

I am struggling to encode json from a python dictionary. When using json.dumps on a dictionary there is no way to tell the function what objects and properties to map the dictionary keys and values to.
The result of the blind dump is that I end up with schemaless json with unique keys rather than a coherent json structure
import json
d = {
"Laptop": {
"sony": 1,
"apple": 2,
"asus": 5,
},
"Camera": {
"sony": 2,
"sumsung": 1,
"nikon" : 4,
},
}
with open("my.json","w") as f:
json.dump(d,f)
Which returns
{"Laptop": {"sony": 1, "apple": 2, "asus": 5}, "Camera": {"sony": 2, "sumsung": 1, "nikon": 4}}
which looks like json, but has no schema at all.
i am looking to produce a json file more like this
{"devices": [
{"device": {
"deviceType": "Laptop",
"deviceBrands": [
{"deviceBrand": {
"deviceBrandName": "sony",
"deviceBrandCount": "1"
},
{"deviceBrand": {
"deviceBrandName": "apple",
"deviceBrandCount": "2"
},
{"deviceBrand": {
"deviceBrandName": "asus",
"deviceBrandCount": "5"
}
},
{"device": {
"deviceType": "Camera",
"deviceBrands": [
{"deviceBrand": {
"deviceBrandName": "sony",
"deviceBrandCount": "2"
},
{"deviceBrand": {
"deviceBrandName": "sumsung",
"deviceBrandCount": "1"
},
{"deviceBrand": {
"deviceBrandName": "nikon",
"deviceBrandCount": "5"
}
}
}
Any recommendations?

Create the structure you want using a dictionary comprehension before calling json.dump:
output = {"devices": [
{"device": {"deviceType": k,
"deviceBrands": [{"deviceBrand": {"deviceBrandName": k1,
"deviceBrandCount": v1}
} for k1, v1 in v.items()
]
}
}
for k,v in d.items()]}
with open("output.json","w") as f:
json.dump(output,f)
output.json:
{
"devices": [
{
"device": {
"deviceType": "Laptop",
"deviceBrands": [
{
"deviceBrand": {
"deviceBrandName": "sony",
"deviceBrandCount": 1
}
},
{
"deviceBrand": {
"deviceBrandName": "apple",
"deviceBrandCount": 2
}
},
{
"deviceBrand": {
"deviceBrandName": "asus",
"deviceBrandCount": 5
}
}
]
}
},
{
"device": {
"deviceType": "Camera",
"deviceBrands": [
{
"deviceBrand": {
"deviceBrandName": "sony",
"deviceBrandCount": 2
}
},
{
"deviceBrand": {
"deviceBrandName": "sumsung",
"deviceBrandCount": 1
}
},
{
"deviceBrand": {
"deviceBrandName": "nikon",
"deviceBrandCount": 4
}
}
]
}
}
]
}

Related

Filter nested python dict by value

I have a python dictionary, where I don't exactly know, how deeply nested it is, but here is an example of such:
{
"name":"a_struct",
"type":"int",
"data":{
"type":"struct",
"elements":[
{
"data":[
{
"name":"test1",
"data_id":0,
"type":"uint8",
"wire_type":0,
"data":0
},
{
"name":"test2",
"data_id":2,
"type":"uint32",
"wire_type":2,
"data":0
},
{
"name":"test3",
"data_id":3,
"type":"int",
"wire_type":4,
"data":{
"type":"uint32",
"elements":[
]
}
},
{
"name":"test4",
"data_id":4,
"type":"uint32",
"wire_type":2,
"data":0
},
{
"name":"test5",
"data_id":5,
"type":"int",
"wire_type":4,
"data":{
"type":"uint32",
"elements":[
]
}
}
]
}
]
}
}
My goal is to filter out each dictionary that does not contains values ["test1", "test3", "test5"] by the name key. This shall be applicable to various deeply nested dictionaries.
So in that case, the result shall be a filtered dictionary:
{
"name":"a_struct",
"type":"int",
"data":{
"type":"struct",
"elements":[
{
"data":[
{
"name":"test1",
"data_id":0,
"type":"uint8",
"wire_type":0,
"data":0
},
{
"name":"test3",
"data_id":3,
"type":"int",
"wire_type":4,
"data":{
"type":"uint32",
"elements":[
]
}
},
{
"name":"test5",
"data_id":5,
"type":"int",
"wire_type":4,
"data":{
"type":"uint32",
"elements":[
]
}
}
]
}
]
}
}
I tried to use the dpath lib (https://pypi.org/project/dpath/), by providing a filter criteria like so:
def afilter(x):
if isinstance(x, dict):
if "name" in x:
if x["name"] in ["test1", "test3", "test5"]:
return True
else:
return False
else:
return False
result = dpath.util.search(my_dict, "**", afilter=afilter)
But I get a wrong result, so every other key, has been filtered out, which is not what I want:
{
"data":{
"elements":[
{
"data":[
{
"name":"test1",
"data_id":0,
"type":"uint8",
"wire_type":0,
"data":0
},
null,
{
"name":"test3",
"data_id":3,
"type":"int",
"wire_type":4,
"data":{
"type":"uint32",
"elements":[
]
}
},
null,
{
"name":"test5",
"data_id":5,
"type":"int",
"wire_type":4,
"data":{
"type":"uint32",
"elements":[
]
}
}
]
}
]
}
}
How to get this right?
PS: I'm not forced to use the dpath lib. So, the solution might be written in pure python.

You can recursively process your dictionary while filtering unneeded records:
def delete_keys(data, keys_to_keep):
res = {}
for k, v in data.items():
if isinstance(v, dict):
res[k] = delete_keys(v, keys_to_keep)
elif isinstance(v, list):
if k == "data":
res[k] = [delete_keys(obj, keys_to_keep) for obj in v if obj.get('name') in keys_to_keep]
else:
res[k] = [delete_keys(obj, keys_to_keep) for obj in v]
else:
res[k] = v
return res
keys_to_keep = {'test1', 'test3', 'test5'}
print(delete_keys(data, keys_to_keep))
For your input, it gives:
{
"name": "a_struct",
"type": "int",
"data": {
"type": "struct",
"elements": [
{
"data": [
{
"name": "test1",
"data_id": 0,
"type": "uint8",
"wire_type": 0,
"data": 0,
},
{
"name": "test3",
"data_id": 3,
"type": "int",
"wire_type": 4,
"data": {"type": "uint32", "elements": []},
},
{
"name": "test5",
"data_id": 5,
"type": "int",
"wire_type": 4,
"data": {"type": "uint32", "elements": []},
},
]
}
],
},
}

How to remove parent json element in python3 if child is object is empty

I'm trying to move data from SQL to Mongo. Here is a challenge I'm facing, if any child object is empty I want to remove parent element. I want till insurance field to be removed.
Here is what I tried:
def remove_empty_elements(jsonData):
if(isinstance(jsonData, list) or isinstance(jsonData,dict)):
for elem in list(jsonData):
if not isinstance(elem, dict) and isinstance(jsonData[elem], list) and elem:
jsonData[elem] = [x for x in jsonData[elem] if x]
if(len(jsonData[elem])==0):
del jsonData[elem]
elif not isinstance(elem, dict) and isinstance(jsonData[elem], dict) and not jsonData[elem]:
del jsonData[elem]
else:
pass
return jsonData
sample data
{
"_id": "30546c62-8ea0-4f1a-a239-cc7508041a7b",
"IsActive": "True",
"name": "Pixel 3",
"phone": [
{
"Bill": 145,
"phonetype": "xyz",
"insurance": [
{
"year_one_claims": [
{
"2020": 200
},
{
},
{
},
{
},
{
}
]
},
{
"year_two_claims": [
{
},
{
},
{
},
{
},
{
}
]
},
]
}
],
"Provider": {
"agent": "aaadd",
}
}
Results should look like that
{
"_id": "30546c62-8ea0-4f1a-a239-cc7508041a7b",
"IsActive": "True",
"name": "Pixel 3",
"phone": [
{
"Bill": 145,
"phonetype": "xyz",
"insurance": [
{
"year_one_claims": [
{
"2020": 200
},
]
},
]
}
],
"Provider": {
"agent": "aaadd",
}
}

Your if statements are kind of confusing. I think you are looking for a recursion:
import json
# define which elements you want to remove:
to_be_deleted = [[], {}, "", None]
def remove_empty_elements(jsonData):
if isinstance(jsonData, list):
jsonData = [new_elem for elem in jsonData
if (new_elem := remove_empty_elements(elem)) not in to_be_deleted]
elif isinstance(jsonData,dict):
jsonData = {key: new_value for key, value in jsonData.items()
if (new_value := remove_empty_elements(value)) not in to_be_deleted}
return jsonData
print(json.dumps(remove_empty_elements(jsonData), indent=4))
Edit/Note: from Python3.8 you can use assignements (:=) in comprehensions
Output:
{
"_id": "30546c62-8ea0-4f1a-a239-cc7508041a7b",
"IsActive": "True",
"name": "Pixel 3",
"phone": [
{
"Bill": 145,
"phonetype": "xyz",
"insurance": [
{
"year_one_claims": [
{
"2020": 200
}
]
}
]
}
],
"Provider": {
"agent": "aaadd"
}
}

Try out this:
data = {
"_id": "30546c62-8ea0-4f1a-a239-cc7508041a7b",
"IsActive": "True",
"name": "Pixel 3",
"phone": [
{
"Bill": 145,
"phonetype": "xyz",
"insurance": [
{
"year_one_claims": [
{
"2020": 200
},
{
},
{
},
{
},
{
}
]
},
{
"year_two_claims": [
{
},
{
},
{
},
{
},
{
}
]
},
]
}
],
"Provider": {
"agent": "aaadd",
}
}
for phn_data in data['phone']:
for ins in phn_data['insurance']:
for key, val in list(ins.items()):
for ins_data in list(val):
if not ins_data:
val.remove(ins_data)
if not val:
del ins[key]
phn_data['insurance'].remove(ins)
print (data)
Output:
{
'_id': '30546c62-8ea0-4f1a-a239-cc7508041a7b',
'IsActive': 'True',
'name': 'Pixel 3',
'phone': [{
'Bill': 145,
'phonetype': 'xyz',
'insurance': [{
'year_one_claims': [{
'2020': 200
}]
}]
}],
'Provider': {
'agent': 'aaadd'
}
}

Parse the complex JSON in Python without storing in File

I'm trying the parse the following JSON data without storing it in a file, using Python.
{
"select": {
"value": "s_name"
},
"from": "student",
"where": {
"in": [
"s_id",
{
"select": {
"value": "s_id"
},
"from": "student_course",
"where": {
"in": [
"c_id",
{
"select": {
"value": "c_id"
},
"from": "course",
"where": {
"or": [
{
"and": [
{
"eq": [
"c_name",
{
"literal": "DSA"
}
]
},
{
"eq": [
"c_name",
{
"literal": "dbms"
}
]
}
]
},
{
"eq": [
"c_name",
{
"literal": "algorithm"
}
]
}
]
}
}
]
}
}
]
}
}
I'm using the following code:
import json
x = "JSON Data which is shared above"
y = json.dumps(x)
jsonDict = json.loads(y)
print (jsonDict['where'])
And not sure, how to proceed further, could you please advise, how it can be done?
I want to fetch the value of all objects, especially where clause.

json.dumps() takes an object and encodes it into a JSON string. But you are trying to take a JSON string and decode it into an object (a dict in this case). The method you should be applying against x therefore is json.loads(). You can then convert the resulting dict back into a JSON string, y, with json.dumps():
import json
x = """{
"select": {
"value": "s_name"
},
"from": "student",
"where": {
"in": [
"s_id",
{
"select": {
"value": "s_id"
},
"from": "student_course",
"where": {
"in": [
"c_id",
{
"select": {
"value": "c_id"
},
"from": "course",
"where": {
"or": [
{
"and": [
{
"eq": [
"c_name",
{
"literal": "DSA"
}
]
},
{
"eq": [
"c_name",
{
"literal": "dbms"
}
]
}
]
},
{
"eq": [
"c_name",
{
"literal": "algorithm"
}
]
}
]
}
}
]
}
}
]
}
}"""
jsonDict = json.loads(x) # from string to a dict
print(jsonDict['where'])
y = json.dumps(jsonDict) # from dict back to a string
Prints:
{'in': ['s_id', {'select': {'value': 's_id'}, 'from': 'student_course', 'where': {'in': ['c_id', {'select': {'value': 'c_id'}, 'from': 'course', 'where': {'or': [{'and': [{'eq': ['c_name', {'literal': 'DSA'}]}, {'eq': ['c_name', {'literal': 'dbms'}]}]}, {'eq': ['c_name', {'literal': 'algorithm'}]}]}}]}}]}

PySpark - Convert a heterogeneous array JSON array to Spark dataframe and flatten it

I have streaming data coming in as JSON array and I want flatten it out as a single row in a Spark dataframe using Python.
Here is how the JSON data looks like:
{
"event": [
{
"name": "QuizAnswer",
"count": 1
}
],
"custom": {
"dimensions": [
{
"title": "Are you:"
},
{
"question_id": "5965"
},
{
"option_id": "19029"
},
{
"option_title": "Non-binary"
},
{
"item": "Non-binary"
},
{
"tab_index": "3"
},
{
"tab_count": "4"
},
{
"tab_initial_index": "4"
},
{
"page": "home"
},
{
"environment": "testing"
},
{
"page_count": "0"
},
{
"widget_version": "2.2.44"
},
{
"session_count": "1"
},
{
"quiz_settings_id": "1020"
},
{
"quiz_session": "6e5a3b5c-9961-4c1b-a2af-3374bbeccede"
},
{
"shopify_customer_id": "noid"
},
{
"cart_token": ""
},
{
"app_version": "2.2.44"
},
{
"shop_name": "safety-valve.myshopify.com"
}
],
"metrics": []
}
}
}

Obtain records based on matching key value pairs and comparing date in Python

I have a following collection in MongoDB:
{
"_id" : ObjectId("5bbc86e5c16a27f1e1bd39f8"),
"name" : "swetha",
"nameId" : 123,
"source" : "Blore",
"sourceId" : 10,
"LastUpdate" : "10-Oct-2018"
}
{
"_id" : ObjectId("5bbc86e5c16a27f1e1bd39f9"),
"name" : "swetha",
"nameId" : 123,
"source" : "Mlore",
"sourceId" : "11",
"LastUpdate" : "11-Oct-2018"
}
{
"_id" : ObjectId("5bbc86e5c16a27f1e1bd39fa"),
"name" : "swathi",
"nameId" : 124,
"source" : "Mlore",
"sourceId" : "11",
"LastUpdate" : "9-Oct-2018"
}
I am a beginner to Python and want to compare the 'LastUpdate' between the above records based on matching 'name' or 'nameId' and want to push the record with latest date to another collection. E.g. name:'Swetha' is same in first two records. So compare 'LastUpdate' between them and output the record with latest date.
I have written following code to read data records from MongoDB and to print. I didn't understand how to compare records within a same key and compare their timestamp though I referred few resources on Google.
import json
import pandas as pd
from pymongo import MongoClient
try:
client = MongoClient()
print("Connected successfully!!!")
except:
print("Could not connect to MongoDB")
# database
db = client.conn
collection = db.contactReg
df = collection.find()
for row in df:
print(row)
Links that are ref
Is there a better way to compare dictionary values
https://gis.stackexchange.com/questions/87276/how-to-compare-values-from-a-column-in-attribute-table-with-values-in-dictionary
Comparing two dictionaries and printing key value pair in python and few more.

I think what you need is an aggregation. This might look big but once you get the hang out of mongo aggregations you'll get comfortable.
df = collection.aggregate([
{
"$project": {
"_id": 0,
"name": 1,
"nameId": 1,
"source": 1,
"sourceId": 1,
"LastUpdate": 1,
"LastUpdateArray": {
"$split": [
"$LastUpdate",
"-"
]
}
}
},
{
"$project": {
"name": 1,
"nameId": 1,
"source": 1,
"sourceId": 1,
"LastUpdate": 1,
"LastUpdateArray": 1,
"LastUpdateMonth": {
"$arrayElemAt": [
"$LastUpdateArray",
1
]
}
}
},
{
"$project": {
"name": 1,
"nameId": 1,
"source": 1,
"sourceId": 1,
"LastUpdate": 1,
"Year": {
"$arrayElemAt": [
"$LastUpdateArray",
2
]
},
"Date": {
"$arrayElemAt": [
"$LastUpdateArray",
0
]
},
"Month": {
"$switch": {
"branches": [
{
"case": {
"$eq": [
"$LastUpdateMonth",
"Jan"
]
},
"then": "01"
},
{
"case": {
"$eq": [
"$LastUpdateMonth",
"Feb"
]
},
"then": "02"
},
{
"case": {
"$eq": [
"$LastUpdateMonth",
"Mar"
]
},
"then": "03"
},
{
"case": {
"$eq": [
"$LastUpdateMonth",
"Apr"
]
},
"then": "04"
},
{
"case": {
"$eq": [
"$LastUpdateMonth",
"May"
]
},
"then": "05"
},
{
"case": {
"$eq": [
"$LastUpdateMonth",
"Jun"
]
},
"then": "06"
},
{
"case": {
"$eq": [
"$LastUpdateMonth",
"Jul"
]
},
"then": "07"
},
{
"case": {
"$eq": [
"$LastUpdateMonth",
"Aug"
]
},
"then": "08"
},
{
"case": {
"$eq": [
"$LastUpdateMonth",
"Sep"
]
},
"then": "09"
},
{
"case": {
"$eq": [
"$LastUpdateMonth",
"Oct"
]
},
"then": "10"
},
{
"case": {
"$eq": [
"$LastUpdateMonth",
"Nov"
]
},
"then": "11"
},
{
"case": {
"$eq": [
"$LastUpdateMonth",
"Dec"
]
},
"then": "12"
}
],
"default": "01"
}
}
}
},
{
"$project": {
"name": 1,
"nameId": 1,
"source": 1,
"sourceId": 1,
"LastUpdate": 1,
"Year": 1,
"Date": 1,
"Month": 1,
"DateString": {
"$concat": [
"$Year",
"-",
"$Month",
"-",
"$Date"
]
}
}
},
{
"$project": {
"name": 1,
"nameId": 1,
"source": 1,
"sourceId": 1,
"LastUpdate": 1,
"Date": {
"$dateFromString": {
"dateString": "$DateString"
}
}
}
},
{
"$sort": {
"Date": -1
}
},
{
"$group": {
"_id": "$name",
"name": {
"$first": "$name"
},
"nameId": {
"$first": "$nameId"
},
"source": {
"$first": "$source"
},
"sourceId": {
"$first": "$sourceId"
},
"LastUpdate": {
"$first": "$LastUpdate"
},
"Date": {
"$first": "$Date"
}
}
},
{
"$project": {
"name": 1,
"nameId": 1,
"source": 1,
"sourceId": 1,
"LastUpdate": 1
}
}
])
The first 5 steps of aggregation, I tried to convert it into a date and then sort descending by date. In group by I grouped with name and took the first that comes with that name.
Hope this helps.
I'm assuming what you need is duplicate records and I'm taking the first one that comes. Reference : https://stackoverflow.com/a/26985011/7630071
df = collection.aggregate([
{
"$group": {
"_id": "$name",
"count": {
"$sum": 1
},
"data": {
"$push": {
"nameId": "$nameId",
"source": "$source",
"sourceId": "$sourceId",
"LastUpdate": "$LastUpdate"
}
}
}
},
{
"$match": {
"_id": {
"$ne": null
},
"count": {
"$gt": 1
}
}
}
])

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Encoding python dictionary into JSON using a schema - python

Related

Filter nested python dict by value

How to remove parent json element in python3 if child is object is empty

Parse the complex JSON in Python without storing in File

PySpark - Convert a heterogeneous array JSON array to Spark dataframe and flatten it

Obtain records based on matching key value pairs and comparing date in Python

Categories

Resources