I'm trying the parse the following JSON data without storing it in a file, using Python.
{
"select": {
"value": "s_name"
},
"from": "student",
"where": {
"in": [
"s_id",
{
"select": {
"value": "s_id"
},
"from": "student_course",
"where": {
"in": [
"c_id",
{
"select": {
"value": "c_id"
},
"from": "course",
"where": {
"or": [
{
"and": [
{
"eq": [
"c_name",
{
"literal": "DSA"
}
]
},
{
"eq": [
"c_name",
{
"literal": "dbms"
}
]
}
]
},
{
"eq": [
"c_name",
{
"literal": "algorithm"
}
]
}
]
}
}
]
}
}
]
}
}
I'm using the following code:
import json
x = "JSON Data which is shared above"
y = json.dumps(x)
jsonDict = json.loads(y)
print (jsonDict['where'])
And not sure, how to proceed further, could you please advise, how it can be done?
I want to fetch the value of all objects, especially where clause.
json.dumps() takes an object and encodes it into a JSON string. But you are trying to take a JSON string and decode it into an object (a dict in this case). The method you should be applying against x therefore is json.loads(). You can then convert the resulting dict back into a JSON string, y, with json.dumps():
import json
x = """{
"select": {
"value": "s_name"
},
"from": "student",
"where": {
"in": [
"s_id",
{
"select": {
"value": "s_id"
},
"from": "student_course",
"where": {
"in": [
"c_id",
{
"select": {
"value": "c_id"
},
"from": "course",
"where": {
"or": [
{
"and": [
{
"eq": [
"c_name",
{
"literal": "DSA"
}
]
},
{
"eq": [
"c_name",
{
"literal": "dbms"
}
]
}
]
},
{
"eq": [
"c_name",
{
"literal": "algorithm"
}
]
}
]
}
}
]
}
}
]
}
}"""
jsonDict = json.loads(x) # from string to a dict
print(jsonDict['where'])
y = json.dumps(jsonDict) # from dict back to a string
Prints:
{'in': ['s_id', {'select': {'value': 's_id'}, 'from': 'student_course', 'where': {'in': ['c_id', {'select': {'value': 'c_id'}, 'from': 'course', 'where': {'or': [{'and': [{'eq': ['c_name', {'literal': 'DSA'}]}, {'eq': ['c_name', {'literal': 'dbms'}]}]}, {'eq': ['c_name', {'literal': 'algorithm'}]}]}}]}}]}
Related
I am trying to remove elements from a json doc. Here's the input json.
{
"start": {
"docId": "3723",
"data": {
"dId": null,
"innerprops": {
"pId": "4573",
"stat": {
"statId": "7578",
"portaldata": [
{
"portalid": "67383",
"pairs": [
{
"id": "1111",
"loc": "denver"
},
{
"id": "2222",
"loc": "houston"
},
{
"id": "1111",
"loc": "austin"
},
{
"id": "3333",
"value": "miami"
}
],
"name": "popq"
}
]
},
"url": ""
}
}
}
}
I want to return json after removing the pairs from the 'pairs' property where the id is not equal to "1111". E.g. return value
{
"start": {
"docId": "3723",
"data": {
"dId": null,
"innerprops": {
"pId": "4573",
"stat": {
"statId": "7578",
"portaldata": [
{
"portalid": "67383",
"pairs": [
{
"id": "1111",
"loc": "denver"
},
{
"id": "1111",
"loc": "austin"
}
],
"name": "popq"
}
]
},
"url": ""
}
}
}
}
Here's my code
def main(req: func.HttpRequest) -> func.HttpResponse:
try:
data = req.get_json()
for entry in data["start"]["data"]["innerprops"]["stat"]["portaldata"]:
for pair in entry["pairs"]:
if pair["id"] != '1111':
del pair
except Exception as e:
logging.error(str(e))
return func.HttpResponse(str(e), status_code = 500)
return func.HttpResponse(str(data), status_code=200)
However the value is only deleted locally and the full document is being returned. What am I missing. Thanks.
You can do with list comprehension,
for entry in data["start"]["data"]["innerprops"]["stat"]["portaldata"]:
entry["pairs"] = [pair for pair in entry["pairs"] if pair["id"] == '1111']
Result:
{'start': {'data': {'dId': None,
'innerprops': {'pId': '4573',
'stat': {'portaldata': [{'name': 'popq',
'pairs': [{'id': '1111',
'loc': 'denver'},
{'id': '1111',
'loc': 'austin'}],
'portalid': '67383'}],
'statId': '7578'},
'url': ''}},
'docId': '3723'}}
As title suggests, I have this document structure:
{
"_id":ObjectId("61e53553ac31665894ebf6bc"),
"questionID":"8",
"questionContent":"find it",
"questoinAnswer":"it's here",
"questionStatus":"active",
"questionImage":"some image",
"hints":[
{
"hintID":"1",
"hintSubject":"in you pucket",
"hintContent":"bla bla bla",
"hintType":"private",
"hintStatus":"Active",
"time":"2022-01-23 11:02:41.976391"
},
{
"hintID":"2",
"hintSubject":"red sea",
"hintContent":"bla bla bla",
"hintMedia":"some media",
"hintType":"puplic",
"hintStatus":"Active",
"time":"2022-01-23 11:05:47.567226"
}
]
}
I want to retrieve only the values of hintSubject if the hintType is free and hintStatus is active and put it into a list
Use the below aggregation query where list of hintSubject is stored in hintSubject key in root dictionary key.
from pymongo import MongoClient
c = MongoClient()
db = c["db_name"]
col = db["sample_collection"]
for x in col.aggregate([
{
"$addFields": {
"hintSubject": {
"$reduce": {
"input": "$hints",
"initialValue": [],
"in": {
"$concatArrays": [
"$$value",
{
"$cond": {
"if": {
"$and": [
{
"$eq": [
"$$this.hintType",
"free"
]
},
{
"$eq": [
"$$this.hintStatus",
"Active"
]
},
]
},
"then": [
"$$this.hintSubject"
],
"else": [],
},
},
],
},
},
}
}
}
])
print(x["hintSubject"])
Mongo Playground Sample Execution
I am having problems getting some values using jmespath.search().
Just to put it in context, I am downloading all the information from my request in a CSV file. I then upload this as a JSON and using JMESPath, I wish to get the values.
I want to get the #value where '_instrumentIdScheme': 'mhi:MHILIST'
json fixed:
[
{
"_fpmlVersion": "5-6",
"header": {
"messageType": "PrevDayCloseBond",
"sendTo": [
{
"#value": "Anvil"
}
],
"creationTimestamp": "2021-09-28T06:00:00.000Z"
},
"m:asOfDate": {
"#value": "2021-09-28T00:00:00.000Z"
},
"_xmlns": "http://www.fpml.org/FpML-5/reporting",
"_xmlns:m": "urn:com.mizuho.bdm",
"_xmlns:mhi": "urn:com.mizuho.bdm.mhi",
"_xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance",
"_xsi:schemaLocation": "http://www.fpml.org/FpML-5/reporting http://svc-bdmentity01p:8080/schema/7.2.0/com/mizuho/bdm/fpml/fpml-5-6-reporting.xsd urn:com.mizuho.bdm http://svc-bdmentity01p:8080/schema/7.2.0/com/mizuho/bdm/fpml/mizuho-fpml.xsd urn:com.mizuho.bdm.mhi http://svc-bdmentity01p:8080/schema/7.2.0/com/mizuho/bdm/mhi/fpml/mhi-fpml.xsd",
"m:assetPricing": [
{
"m:pricingSource": [
{
"#value": "LON-XEN-BBG"
},
{
"#value": "BGN",
"_pricingSourceScheme": "mizuho:bloomberg-source"
}
],
"m:instrumentId": [
{
"#value": "100001380992",
"_instrumentIdScheme": "mhi:MHILIST"
},
{
"#value": "100001380992",
"_instrumentIdScheme": "mhsa:instrument-id"
}
],
"m:currency": {
"#value": "USD"
},
"m:price": [
{
"value": 140.78125,
"measureType": {
"#value": "Bid Price",
"_assetMeasureScheme": "mizuho:price-type"
}
},
{
"value": 140.875,
"measureType": {
"#value": "Mid Price",
"_assetMeasureScheme": "mizuho:price-type"
}
},
{
"value": 140.96875,
"measureType": {
"#value": "Offer Price",
"_assetMeasureScheme": "mizuho:price-type"
}
}
]
}
],
"m:pricingDate": "2021-09-28T00:00:00.000Z"
}
]
replace all simple quotes by double quotes
to select all #value with the condition:
def flatten(container):
for i in container:
if isinstance(i, (list,tuple)):
for j in flatten(i):
yield j
else:
yield i
str = """
[
{
"_fpmlVersion": "5-6",
"header": {
"messageType": "PrevDayCloseBond",
"sendTo": [
{
"#value": "Anvil"
}
],
"creationTimestamp": "2021-09-28T06:00:00.000Z"
},
"m:asOfDate": {
"#value": "2021-09-28T00:00:00.000Z"
},
"_xmlns": "http://www.fpml.org/FpML-5/reporting",
"_xmlns:m": "urn:com.mizuho.bdm",
"_xmlns:mhi": "urn:com.mizuho.bdm.mhi",
"_xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance",
"_xsi:schemaLocation": "http://www.fpml.org/FpML-5/reporting http://svc-bdmentity01p:8080/schema/7.2.0/com/mizuho/bdm/fpml/fpml-5-6-reporting.xsd urn:com.mizuho.bdm http://svc-bdmentity01p:8080/schema/7.2.0/com/mizuho/bdm/fpml/mizuho-fpml.xsd urn:com.mizuho.bdm.mhi http://svc-bdmentity01p:8080/schema/7.2.0/com/mizuho/bdm/mhi/fpml/mhi-fpml.xsd",
"m:assetPricing": [
{
"m:pricingSource": [
{
"#value": "LON-XEN-BBG"
},
{
"#value": "BGN",
"_pricingSourceScheme": "mizuho:bloomberg-source"
}
],
"m:instrumentId": [
{
"#value": "100001380992",
"_instrumentIdScheme": "mhi:MHILIST"
},
{
"#value": "100001380992",
"_instrumentIdScheme": "mhsa:instrument-id"
}
],
"m:currency": {
"#value": "USD"
},
"m:price": [
{
"value": 140.78125,
"measureType": {
"#value": "Bid Price",
"_assetMeasureScheme": "mizuho:price-type"
}
},
{
"value": 140.875,
"measureType": {
"#value": "Mid Price",
"_assetMeasureScheme": "mizuho:price-type"
}
},
{
"value": 140.96875,
"measureType": {
"#value": "Offer Price",
"_assetMeasureScheme": "mizuho:price-type"
}
}
]
}
],
"m:pricingDate": "2021-09-28T00:00:00.000Z"
}
]
"""
str = str.replace("\n", "").replace("\t", "")
str = json.loads(str)
#print(str)
valueslist = jmespath.search('[]["m:assetPricing"][][]."m:instrumentId"[?"_instrumentIdScheme" == `mhi:MHILIST`].["#value"]', str)
#print(valueslist)
values = list(flatten(valueslist))
print(values)
result:
['100001380992']
I'm trying to move data from SQL to Mongo. Here is a challenge I'm facing, if any child object is empty I want to remove parent element. I want till insurance field to be removed.
Here is what I tried:
def remove_empty_elements(jsonData):
if(isinstance(jsonData, list) or isinstance(jsonData,dict)):
for elem in list(jsonData):
if not isinstance(elem, dict) and isinstance(jsonData[elem], list) and elem:
jsonData[elem] = [x for x in jsonData[elem] if x]
if(len(jsonData[elem])==0):
del jsonData[elem]
elif not isinstance(elem, dict) and isinstance(jsonData[elem], dict) and not jsonData[elem]:
del jsonData[elem]
else:
pass
return jsonData
sample data
{
"_id": "30546c62-8ea0-4f1a-a239-cc7508041a7b",
"IsActive": "True",
"name": "Pixel 3",
"phone": [
{
"Bill": 145,
"phonetype": "xyz",
"insurance": [
{
"year_one_claims": [
{
"2020": 200
},
{
},
{
},
{
},
{
}
]
},
{
"year_two_claims": [
{
},
{
},
{
},
{
},
{
}
]
},
]
}
],
"Provider": {
"agent": "aaadd",
}
}
Results should look like that
{
"_id": "30546c62-8ea0-4f1a-a239-cc7508041a7b",
"IsActive": "True",
"name": "Pixel 3",
"phone": [
{
"Bill": 145,
"phonetype": "xyz",
"insurance": [
{
"year_one_claims": [
{
"2020": 200
},
]
},
]
}
],
"Provider": {
"agent": "aaadd",
}
}
Your if statements are kind of confusing. I think you are looking for a recursion:
import json
# define which elements you want to remove:
to_be_deleted = [[], {}, "", None]
def remove_empty_elements(jsonData):
if isinstance(jsonData, list):
jsonData = [new_elem for elem in jsonData
if (new_elem := remove_empty_elements(elem)) not in to_be_deleted]
elif isinstance(jsonData,dict):
jsonData = {key: new_value for key, value in jsonData.items()
if (new_value := remove_empty_elements(value)) not in to_be_deleted}
return jsonData
print(json.dumps(remove_empty_elements(jsonData), indent=4))
Edit/Note: from Python3.8 you can use assignements (:=) in comprehensions
Output:
{
"_id": "30546c62-8ea0-4f1a-a239-cc7508041a7b",
"IsActive": "True",
"name": "Pixel 3",
"phone": [
{
"Bill": 145,
"phonetype": "xyz",
"insurance": [
{
"year_one_claims": [
{
"2020": 200
}
]
}
]
}
],
"Provider": {
"agent": "aaadd"
}
}
Try out this:
data = {
"_id": "30546c62-8ea0-4f1a-a239-cc7508041a7b",
"IsActive": "True",
"name": "Pixel 3",
"phone": [
{
"Bill": 145,
"phonetype": "xyz",
"insurance": [
{
"year_one_claims": [
{
"2020": 200
},
{
},
{
},
{
},
{
}
]
},
{
"year_two_claims": [
{
},
{
},
{
},
{
},
{
}
]
},
]
}
],
"Provider": {
"agent": "aaadd",
}
}
for phn_data in data['phone']:
for ins in phn_data['insurance']:
for key, val in list(ins.items()):
for ins_data in list(val):
if not ins_data:
val.remove(ins_data)
if not val:
del ins[key]
phn_data['insurance'].remove(ins)
print (data)
Output:
{
'_id': '30546c62-8ea0-4f1a-a239-cc7508041a7b',
'IsActive': 'True',
'name': 'Pixel 3',
'phone': [{
'Bill': 145,
'phonetype': 'xyz',
'insurance': [{
'year_one_claims': [{
'2020': 200
}]
}]
}],
'Provider': {
'agent': 'aaadd'
}
}
test_cursor = db.command({
"aggregate": "New_layout",
"pipeline": [
{ "$match": { "$and": [
{ "FIRST_DATE": { "$gte": new_date } },
{ "CHAIN_ID": { "$ne": "" } }
] } },
{ "$unwind": { "path": "$ENTERS", "includeArrayIndex": "Date" } },
{ "$project": {
"_id": 0,
"SITE_ID": "$SITE_ID",
"CHAIN_ID": "$CHAIN_ID",
"SEGMENT_ID": "$SEGMENT_ID",
"ZIP": "$ZIP",
"ZIP3": "$ZIP3",
"MARKET_ID": "$MARKET_ID",
"REGION": "$REGION",
"MALL_CODE": "$MALL_CODE",
"MALL_AREA": "$MALL_AREA",
"MALL_NAME": "$MALL_NAME",
"FIRST_DATE": "$FIRST_DATE",
"MARKET_AREA": "$MARKET_AREA",
"REGION_AREA": "$REGION_AREA",
"ZIP_AREA": "$ZIP_AREA",
"ZIP3_AREA": "$ZIP3_AREA",
"DATE": "$Date",
"ENTERS": "$ENTERS"
} }
],
"allowDiskUse": bool(1),
"cursor": {}
})
asd=list(test_cursor)
The contents of the cursor are as below :-
[u'cursor', u'ok', u'waitedMS'] .
However with an $out statement, the output collection has the expected contents.
I am running pymongo v3.2.2 and mongo 3.2. I was told this problem is experienced with v3.0 or lesser, but this is something I am not able to figure out
You should use aggregate() instead of command().
test_cursor = db.New_layout.aggregate([
{ "$match": { "$and": [
{ "FIRST_DATE": { "$gte": new_date } },
{ "CHAIN_ID": { "$ne": "" } }
] } },
{ "$unwind": { "path": "$ENTERS", "includeArrayIndex": "Date" } },
{ "$project": {
"_id": 0,
"SITE_ID": "$SITE_ID",
"CHAIN_ID": "$CHAIN_ID",
"SEGMENT_ID": "$SEGMENT_ID",
"ZIP": "$ZIP",
"ZIP3": "$ZIP3",
"MARKET_ID": "$MARKET_ID",
"REGION": "$REGION",
"MALL_CODE": "$MALL_CODE",
"MALL_AREA": "$MALL_AREA",
"MALL_NAME": "$MALL_NAME",
"FIRST_DATE": "$FIRST_DATE",
"MARKET_AREA": "$MARKET_AREA",
"REGION_AREA": "$REGION_AREA",
"ZIP_AREA": "$ZIP_AREA",
"ZIP3_AREA": "$ZIP3_AREA",
"DATE": "$Date",
"ENTERS": "$ENTERS"
} }
],
allowDiskUse=True)