I'm trying to join sub-Dicts in python so that a valid json is composed
what I have is:
{
'ctx/language': 'en',
'ctx/territory': 'DE',
'composer_name': 'openEHR2study',
'Allergies': {
'adverse_reaction-allergy': [{
'reaction_event_summary': {
'clinical_impact': [{
'|code': 'at0035'
}
]
}
}, {
'recorded': ['2020-05-14T00:00:00.000Z']
}, {
'reaction_event_summary': {
'certainty': [{
'|code': 'at0024'
}
]
}
}, {
'substance_agent': ['s']
}, {
'reaction_reported': ['true']
}, {
'comment': ['c']
}
]
}
}
What I would like is a join over "reaction_event_summary" like this:
{
'ctx/language': 'en',
'ctx/territory': 'DE',
'composer_name': 'openEHR2study',
'Allergies': {
'adverse_reaction-allergy': [{
'reaction_event_summary': {
'clinical_impact': [{
'|code': 'at0035'
}
]
'certainty': [{
'|code': 'at0024'
}
]
}
}, {
'recorded': ['2020-05-14T00:00:00.000Z']
}, {
'substance_agent': ['s']
}, {
'reaction_reported': ['true']
}, {
'comment': ['c']
}
]
}
I have no Idea how I should loop through the json/list and dicts to get this done.
I have made a rough attempt, please check if this works. We are trying to iterate the dictionary with another deepcopy.
val = {
'ctx/language': 'en',
'ctx/territory': 'DE',
'composer_name': 'openEHR2study',
'Allergies': {
'adverse_reaction-allergy': [
{
'reaction_event_summary': {
'clinical_impact': [{
'|code': 'at0035'
}
]
}
}, {
'recorded': ['2020-05-14T00:00:00.000Z']
}, {
'reaction_event_summary': {
'certainty': [{
'|code': 'at0024'
}
]
}
}, {
'substance_agent': ['s']
}, {
'reaction_reported': ['true']
}, {
'comment': ['c']
}
]
}
}
import copy
val1 = copy.deepcopy(val)
del val1['Allergies']['adverse_reaction-allergy']
val1['Allergies']['adverse_reaction-allergy'] = []
reaction_count = 0
for _d in val['Allergies']['adverse_reaction-allergy']:
if _d.get('reaction_event_summary', False):
if reaction_count < 1:
reaction_count += 1
val1['Allergies']['adverse_reaction-allergy'].append(
{'reaction_event_summary': _d.get('reaction_event_summary')})
else:
print(_d.get('reaction_event_summary'))
_temp = val1['Allergies']['adverse_reaction-allergy'][0]['reaction_event_summary']
_temp['certainty'] = _d.get('reaction_event_summary',{}).get('certainty',{})
val1['Allergies']['adverse_reaction-allergy'][0]['reaction_event_summary'] = _temp
else:
val1['Allergies']['adverse_reaction-allergy'].append(_d)
import json
print(json.dumps(val1, indent=2))
Example output
{
"ctx/language": "en",
"ctx/territory": "DE",
"composer_name": "openEHR2study",
"Allergies": {
"adverse_reaction-allergy": [
{
"reaction_event_summary": {
"clinical_impact": [
{
"|code": "at0035"
}
],
"certainty": [
{
"|code": "at0024"
}
]
}
},
{
"recorded": [
"2020-05-14T00:00:00.000Z"
]
},
{
"substance_agent": [
"s"
]
},
{
"reaction_reported": [
"true"
]
},
{
"comment": [
"c"
]
}
]
}
}
Related
I am having problems getting some values using jmespath.search().
Just to put it in context, I am downloading all the information from my request in a CSV file. I then upload this as a JSON and using JMESPath, I wish to get the values.
I want to get the #value where '_instrumentIdScheme': 'mhi:MHILIST'
json fixed:
[
{
"_fpmlVersion": "5-6",
"header": {
"messageType": "PrevDayCloseBond",
"sendTo": [
{
"#value": "Anvil"
}
],
"creationTimestamp": "2021-09-28T06:00:00.000Z"
},
"m:asOfDate": {
"#value": "2021-09-28T00:00:00.000Z"
},
"_xmlns": "http://www.fpml.org/FpML-5/reporting",
"_xmlns:m": "urn:com.mizuho.bdm",
"_xmlns:mhi": "urn:com.mizuho.bdm.mhi",
"_xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance",
"_xsi:schemaLocation": "http://www.fpml.org/FpML-5/reporting http://svc-bdmentity01p:8080/schema/7.2.0/com/mizuho/bdm/fpml/fpml-5-6-reporting.xsd urn:com.mizuho.bdm http://svc-bdmentity01p:8080/schema/7.2.0/com/mizuho/bdm/fpml/mizuho-fpml.xsd urn:com.mizuho.bdm.mhi http://svc-bdmentity01p:8080/schema/7.2.0/com/mizuho/bdm/mhi/fpml/mhi-fpml.xsd",
"m:assetPricing": [
{
"m:pricingSource": [
{
"#value": "LON-XEN-BBG"
},
{
"#value": "BGN",
"_pricingSourceScheme": "mizuho:bloomberg-source"
}
],
"m:instrumentId": [
{
"#value": "100001380992",
"_instrumentIdScheme": "mhi:MHILIST"
},
{
"#value": "100001380992",
"_instrumentIdScheme": "mhsa:instrument-id"
}
],
"m:currency": {
"#value": "USD"
},
"m:price": [
{
"value": 140.78125,
"measureType": {
"#value": "Bid Price",
"_assetMeasureScheme": "mizuho:price-type"
}
},
{
"value": 140.875,
"measureType": {
"#value": "Mid Price",
"_assetMeasureScheme": "mizuho:price-type"
}
},
{
"value": 140.96875,
"measureType": {
"#value": "Offer Price",
"_assetMeasureScheme": "mizuho:price-type"
}
}
]
}
],
"m:pricingDate": "2021-09-28T00:00:00.000Z"
}
]
replace all simple quotes by double quotes
to select all #value with the condition:
def flatten(container):
for i in container:
if isinstance(i, (list,tuple)):
for j in flatten(i):
yield j
else:
yield i
str = """
[
{
"_fpmlVersion": "5-6",
"header": {
"messageType": "PrevDayCloseBond",
"sendTo": [
{
"#value": "Anvil"
}
],
"creationTimestamp": "2021-09-28T06:00:00.000Z"
},
"m:asOfDate": {
"#value": "2021-09-28T00:00:00.000Z"
},
"_xmlns": "http://www.fpml.org/FpML-5/reporting",
"_xmlns:m": "urn:com.mizuho.bdm",
"_xmlns:mhi": "urn:com.mizuho.bdm.mhi",
"_xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance",
"_xsi:schemaLocation": "http://www.fpml.org/FpML-5/reporting http://svc-bdmentity01p:8080/schema/7.2.0/com/mizuho/bdm/fpml/fpml-5-6-reporting.xsd urn:com.mizuho.bdm http://svc-bdmentity01p:8080/schema/7.2.0/com/mizuho/bdm/fpml/mizuho-fpml.xsd urn:com.mizuho.bdm.mhi http://svc-bdmentity01p:8080/schema/7.2.0/com/mizuho/bdm/mhi/fpml/mhi-fpml.xsd",
"m:assetPricing": [
{
"m:pricingSource": [
{
"#value": "LON-XEN-BBG"
},
{
"#value": "BGN",
"_pricingSourceScheme": "mizuho:bloomberg-source"
}
],
"m:instrumentId": [
{
"#value": "100001380992",
"_instrumentIdScheme": "mhi:MHILIST"
},
{
"#value": "100001380992",
"_instrumentIdScheme": "mhsa:instrument-id"
}
],
"m:currency": {
"#value": "USD"
},
"m:price": [
{
"value": 140.78125,
"measureType": {
"#value": "Bid Price",
"_assetMeasureScheme": "mizuho:price-type"
}
},
{
"value": 140.875,
"measureType": {
"#value": "Mid Price",
"_assetMeasureScheme": "mizuho:price-type"
}
},
{
"value": 140.96875,
"measureType": {
"#value": "Offer Price",
"_assetMeasureScheme": "mizuho:price-type"
}
}
]
}
],
"m:pricingDate": "2021-09-28T00:00:00.000Z"
}
]
"""
str = str.replace("\n", "").replace("\t", "")
str = json.loads(str)
#print(str)
valueslist = jmespath.search('[]["m:assetPricing"][][]."m:instrumentId"[?"_instrumentIdScheme" == `mhi:MHILIST`].["#value"]', str)
#print(valueslist)
values = list(flatten(valueslist))
print(values)
result:
['100001380992']
I'm trying to move data from SQL to Mongo. Here is a challenge I'm facing, if any child object is empty I want to remove parent element. I want till insurance field to be removed.
Here is what I tried:
def remove_empty_elements(jsonData):
if(isinstance(jsonData, list) or isinstance(jsonData,dict)):
for elem in list(jsonData):
if not isinstance(elem, dict) and isinstance(jsonData[elem], list) and elem:
jsonData[elem] = [x for x in jsonData[elem] if x]
if(len(jsonData[elem])==0):
del jsonData[elem]
elif not isinstance(elem, dict) and isinstance(jsonData[elem], dict) and not jsonData[elem]:
del jsonData[elem]
else:
pass
return jsonData
sample data
{
"_id": "30546c62-8ea0-4f1a-a239-cc7508041a7b",
"IsActive": "True",
"name": "Pixel 3",
"phone": [
{
"Bill": 145,
"phonetype": "xyz",
"insurance": [
{
"year_one_claims": [
{
"2020": 200
},
{
},
{
},
{
},
{
}
]
},
{
"year_two_claims": [
{
},
{
},
{
},
{
},
{
}
]
},
]
}
],
"Provider": {
"agent": "aaadd",
}
}
Results should look like that
{
"_id": "30546c62-8ea0-4f1a-a239-cc7508041a7b",
"IsActive": "True",
"name": "Pixel 3",
"phone": [
{
"Bill": 145,
"phonetype": "xyz",
"insurance": [
{
"year_one_claims": [
{
"2020": 200
},
]
},
]
}
],
"Provider": {
"agent": "aaadd",
}
}
Your if statements are kind of confusing. I think you are looking for a recursion:
import json
# define which elements you want to remove:
to_be_deleted = [[], {}, "", None]
def remove_empty_elements(jsonData):
if isinstance(jsonData, list):
jsonData = [new_elem for elem in jsonData
if (new_elem := remove_empty_elements(elem)) not in to_be_deleted]
elif isinstance(jsonData,dict):
jsonData = {key: new_value for key, value in jsonData.items()
if (new_value := remove_empty_elements(value)) not in to_be_deleted}
return jsonData
print(json.dumps(remove_empty_elements(jsonData), indent=4))
Edit/Note: from Python3.8 you can use assignements (:=) in comprehensions
Output:
{
"_id": "30546c62-8ea0-4f1a-a239-cc7508041a7b",
"IsActive": "True",
"name": "Pixel 3",
"phone": [
{
"Bill": 145,
"phonetype": "xyz",
"insurance": [
{
"year_one_claims": [
{
"2020": 200
}
]
}
]
}
],
"Provider": {
"agent": "aaadd"
}
}
Try out this:
data = {
"_id": "30546c62-8ea0-4f1a-a239-cc7508041a7b",
"IsActive": "True",
"name": "Pixel 3",
"phone": [
{
"Bill": 145,
"phonetype": "xyz",
"insurance": [
{
"year_one_claims": [
{
"2020": 200
},
{
},
{
},
{
},
{
}
]
},
{
"year_two_claims": [
{
},
{
},
{
},
{
},
{
}
]
},
]
}
],
"Provider": {
"agent": "aaadd",
}
}
for phn_data in data['phone']:
for ins in phn_data['insurance']:
for key, val in list(ins.items()):
for ins_data in list(val):
if not ins_data:
val.remove(ins_data)
if not val:
del ins[key]
phn_data['insurance'].remove(ins)
print (data)
Output:
{
'_id': '30546c62-8ea0-4f1a-a239-cc7508041a7b',
'IsActive': 'True',
'name': 'Pixel 3',
'phone': [{
'Bill': 145,
'phonetype': 'xyz',
'insurance': [{
'year_one_claims': [{
'2020': 200
}]
}]
}],
'Provider': {
'agent': 'aaadd'
}
}
I'm trying the parse the following JSON data without storing it in a file, using Python.
{
"select": {
"value": "s_name"
},
"from": "student",
"where": {
"in": [
"s_id",
{
"select": {
"value": "s_id"
},
"from": "student_course",
"where": {
"in": [
"c_id",
{
"select": {
"value": "c_id"
},
"from": "course",
"where": {
"or": [
{
"and": [
{
"eq": [
"c_name",
{
"literal": "DSA"
}
]
},
{
"eq": [
"c_name",
{
"literal": "dbms"
}
]
}
]
},
{
"eq": [
"c_name",
{
"literal": "algorithm"
}
]
}
]
}
}
]
}
}
]
}
}
I'm using the following code:
import json
x = "JSON Data which is shared above"
y = json.dumps(x)
jsonDict = json.loads(y)
print (jsonDict['where'])
And not sure, how to proceed further, could you please advise, how it can be done?
I want to fetch the value of all objects, especially where clause.
json.dumps() takes an object and encodes it into a JSON string. But you are trying to take a JSON string and decode it into an object (a dict in this case). The method you should be applying against x therefore is json.loads(). You can then convert the resulting dict back into a JSON string, y, with json.dumps():
import json
x = """{
"select": {
"value": "s_name"
},
"from": "student",
"where": {
"in": [
"s_id",
{
"select": {
"value": "s_id"
},
"from": "student_course",
"where": {
"in": [
"c_id",
{
"select": {
"value": "c_id"
},
"from": "course",
"where": {
"or": [
{
"and": [
{
"eq": [
"c_name",
{
"literal": "DSA"
}
]
},
{
"eq": [
"c_name",
{
"literal": "dbms"
}
]
}
]
},
{
"eq": [
"c_name",
{
"literal": "algorithm"
}
]
}
]
}
}
]
}
}
]
}
}"""
jsonDict = json.loads(x) # from string to a dict
print(jsonDict['where'])
y = json.dumps(jsonDict) # from dict back to a string
Prints:
{'in': ['s_id', {'select': {'value': 's_id'}, 'from': 'student_course', 'where': {'in': ['c_id', {'select': {'value': 'c_id'}, 'from': 'course', 'where': {'or': [{'and': [{'eq': ['c_name', {'literal': 'DSA'}]}, {'eq': ['c_name', {'literal': 'dbms'}]}]}, {'eq': ['c_name', {'literal': 'algorithm'}]}]}}]}}]}
dictionary is below. Below is sample dictionary of length 2 and 3. By checking the condition need to generate the query dynamically
a = [{'data': 'abc'}, {'prod': 'def'}]
if len(a) = 2:
#below query has to generate
"query": {
"bool": {
"should": [
{
"query_string": {
"query": "*abc*",
"fields": [
"data"
]
}
},
{
"query_string": {
"query": "*def*",
"fields": [
"prod"
]
}
}
]
}
}
}
a = [{'data': 'abc'}, {'prod': 'def'},{'email': '#gmail'}]
if len(a) = 3
#below is the query
"query": {
"bool": {
"should": [
{
"query_string": {
"query": "*abc*",
"fields": [
"data"
]
}
},
{
"query_string": {
"query": "*def*",
"fields": [
"prod"
]
}
},
{
"query_string": {
"query": "*#gmail.com*",
"fields": [
"email"
]
}
}
]
}
}
}```
Basically if dictionary keep on adding {"query_string": {"query": "*#gmail.com*","fields": ["email"]}} the query also keep on adding
Using a simple iteration.
Ex:
a = [{'data': 'abc'}, {'prod': 'def'}]
result = {"query": {
"bool": {
"should": []
}
}
}
for item in a:
for k, v in item.items():
result['query']['bool']['should'].append({
"query_string": {
"query": f"*{v}*",
"fields": [
k
]}
})
print(result)
Output:
{'query': {'bool': {'should': [{'query_string': {'fields': ['data'],
'query': '*abc*'}},
{'query_string': {'fields': ['prod'],
'query': '*def*'}}]}}}
I have a large collection that can be modeled more or less as the one created by the following code:
import string
from random import randint, random, choice
documents = []
for i in range(100):
letters = choice(string.letters[0:15])
documents.append({'hgvs_id': "".join([str(randint(0,9)), letters]),
'sample_id': "CDE",
'number': i*random()*50 - 30 })
documents.append({'hgvs_id': "".join([str(randint(0,9)), letters]),
'sample_id': 'ABC',
'number': i*random()*50 - 30 })
documents.append({'hgvs_id': "".join([str(randint(0,9)), letters]),
'sample_id': 'GEF',
'number': i*random()*50 - 30 })
for i in range(10): # add some unique values for sample_id 'ABC'
letters = choice(string.letters[0:15])
documents.append({'hgvs_id': "55" + letters,
'sample_id': 'ABC',
'number': i*random()*50 - 30 })
collection.insert_many(documents)
I am trying to retrieve the unique hgvs_id's that occur within documents that have a specific sample_id (ABC here) but not in documents containing the other two. Usually, there will be many more sample_id than just three.
It sounds pretty simple, but so far I have been unsuccessful. Given the size of the collection I'm working with (~30GB), I've been trying to use the aggregate framework as follows:
sample_1 = collection.aggregate(
[
{'$group':
{
'_id': '$hgvs_id',
#'sample_id' : {"addToSet": '$hgvs_id'},
'matchedDocuments':
{'$push':
{
'id': '$_id',
'sample_name': "$sample_id",
'hgvs_ids': "$hgvs_id"
}
},
}
},
{'$match': {
"$and": [
{'matchedDocuments': {"$elemMatch": {'sample_name': 'ABC'}}},
# Some other operation????
]
}
}
]) #, allowDiskUse=True) may be needed
This returns (understandably) all the hgvs_id's having sample_id equal ABC. Any leads would be more than appreciated.
If it's the only sample_id in the "set" of grouped values then the $size will be one:
With MongoDB 3.4 you can use $in in combination:
[
{ "$group": {
"_id": "$hgvs_id",
"samples": { "$addToSet": "$sample_id" }
}},
{ "$redact": {
"$cond": {
"if": {
"$and": [
{ "$in": [ "ABC", "$samples" ] },
{ "$eq": [ { "$size": "$samples" }, 1 ] }
]
},
"then": "$$KEEP",
"else": "$$PRUNE"
}
}}
]
Otherwise use $setIntersection which is just a little longer in syntax:
[
{ "$group": {
"_id": "$hgvs_id",
"samples": { "$addToSet": "$sample_id" }
}},
{ "$redact": {
"$cond": {
"if": {
"$and": [
{ "$eq": [ { "$size": { "$setIntersection": [ "$samples", ["ABC"] ] } }, 1 ] },
{ "$eq": [ { "$size": "$samples" }, 1 ] }
]
},
"then": "$$KEEP",
"else": "$$PRUNE"
}
}}
]
Or probably in the simplest form for all versions supporting aggregation anyway:
{ "$group": {
"_id": "$hgvs_id",
"samples": { "$addToSet": "$sample_id" }
}},
{ "$match": {
"$and": [{ "samples": "ABC" },{ "samples": { "$size": 1 } }]
}}
]
The same principle applies to any number of arguments in that the "set" produced much much the size of the arguments given as well as containing the specific value.