I have a scenario where I have below JSON data, which I want to parse and store the results in a dict under these conditions :
Condt --> Parse through json and find under data if groupProperty is equal to Tests then return that groupValue and value in a dict.
{
"dateFrom": "2020-03-26 07:35:00",
"dateTo": "2020-03-26 07:40:00",
"groupLabels": [
{
"groupProperty": "Tests",
"groupLabels": [
{
"groupId": "1053777",
"groupLabel": "testappzxco"
},
{
"groupId": "570009",
"groupLabel": "testappzkbo"
}
]
}
],
"binSize": 300,
"data": {
"points": [
{
"timestamp": 1585208100,
"numberOfDataPoints": 24,
"value": 0,
"groups": [
{
"groupProperty": "Tests",
"groupValue": "1053777"
},
{
"groupProperty": "Test Labels",
"groupValue": "61776"
}
]
},
{
"timestamp": 1585208100,
"numberOfDataPoints": 5,
"value": 4.888970,
"groups": [
{
"groupProperty": "Tests",
"groupValue": "1241460"
},
{
"groupProperty": "Test Labels",
"groupValue": "61710"
}
]
},
{
"timestamp": 1585208100,
"numberOfDataPoints": 96,
"value": 0,
"groups": [
{
"groupProperty": "Test Labels",
"groupValue": "61770"
}
]
},
{
"timestamp": 1585208100,
"numberOfDataPoints": 101,
"value": 0.01980198019801982,
"groups": [
{
"groupProperty": "Test Labels",
"groupValue": "61773"
}
]
},
{
"timestamp": 1585208100,
"numberOfDataPoints": 104,
"value": 0,
"groups": [
{
"groupProperty": "Test Labels",
"groupValue": "61776"
}
]
}
]
}
}
What I have tried and it doesn't even get the right details :
dat = json.loads(original_data)
testl=[]
for key in dat:
temp=key['data']['points']
for key1 in temp:
if key1['groups']['groupProperty'] == "Tests":
testl.append({key1['groupValue'], key['value']
})
Since the json is very complex I am not sure how to get the desired output.
Below is the desired O/P :
[{"tname":1241460, "tvalue":4.888970},{"tname":1053777, "tvalue":0}]
Any help would be great !
There is no special problem here: you just have to be very cautious in writing the comprehension:
[{"tname": g['groupValue'], 'tvalue': da['value']}
for da in d['data']['points'] for g in da['groups'] if g['groupProperty'] == 'Tests']
with the provided sample, it gives as expected:
[{'tname': '1053777', 'tvalue': 0}, {'tname': '1241460', 'tvalue': 4.88897}]
You have not noticed that groups is also an array, this way it should work:
points=dat['data']['points']
for key1 in points:
groups = key1['groups']
value = key1['value']
for group in groups:
if group['groupProperty'] =='Tests':
testl.append({'tname':group['groupValue'], 'value':value})
print (testl)
A recursive search making fewer assumptions on the structure of dat:
testl = []
def search(obj, seen_data, value):
if isinstance(obj, list):
for x in obj:
search(x, seen_data, value)
elif isinstance(obj, dict):
if not seen_data:
if 'data' in obj:
seen_data = True
else:
if value is not None:
if 'groupProperty' in obj and obj['groupProperty'] == 'Tests':
if 'groupValue' in obj:
tests = obj['groupValue']
testl.append({'tname': tests, 'tvalue': value})
value = None
elif 'value' in obj:
value = obj['value']
for x in obj.values():
search(x, seen_data, value)
search(dat, False, None)
print(testl)
Prints:
[{'tname': '1053777', 'tvalue': 0}, {'tname': '1241460', 'tvalue': 4.88897}]
See Python Demo
Related
this is json file that i want to convert to python
{
"UniqueId": "PO3589472",
"FareType": 2,
"BookedBy": "Api ",
"OrderBy": "Api ",
"ClientBalance": 0,
"Error": null,
"Success": true,
"TktTimeLimit": "2022-08-10T14:11:45",
"Category": 21,
"Status": 21,
"RefundMethod": 1,
"TravelItinerary": {
"ItineraryInfo": {
"ItineraryPricing": {
"BaseFare": 8469250,
"ServiceTax": 0,
"TotalTax": 993000,
"TotalFare": 9462250,
"TotalCommission": 0,
"Currency": "IRR"
},
"CustomerInfoes": [
{
"Customer": {
"Gender": 0,
"PassengerType": 1,
"PassportNumber": "",
"NationalId": "1829961233",
"Nationality": "IR",
"DateOfBirth": "1996-07-08T00:00:00",
"PassportExpireDate": "0001-01-01T00:00:00",
"PassportIssueCountry": "IR",
"PassportIssueDate": "2022-08-10T00:00:00",
"PaxName": {
"PassengerFirstName": "MAJID",
"PassengerMiddleName": null,
"PassengerLastName": "MAJIDIFAR",
"PassengerTitle": 0
}
},
"ETickets": "8151405444745",
"ETicketNumbers": [
{
"ETicketNumber": "8151405444745",
"EticketStatus": 1,
"IsRefunded": false,
"DateOfIssue": "2022-08-10T13:58:47",
"AirlinePnr": "TXNXM",
"TotalRefund": 0
}
]
}
],
"ReservationItems": [
{
"AirEquipmentType": "737",
"AirlinePnr": "TXNXM",
"ArrivalAirportLocationCode": "ABD",
"ArrivalDateTime": "2022-08-17T23:25:00",
"ArrivalTerminal": "",
"Baggage": "20KG",
"DepartureAirportLocationCode": "THR",
"DepartureDateTime": "2022-08-17T22:05:00",
"DepartureTerminal": "Terminal 4",
"FlightNumber": "3750",
"JourneyDuration": "01:20",
"JourneyDurationPerMinute": 0,
"MarketingAirlineCode": "EP",
"OperatingAirlineCode": "EP",
"ResBookDesigCode": "Y",
"StopQuantity": 0,
"IsCharter": false,
"TechnicalStops": [],
"IsReturn": false,
"CabinClassCode": 1
}
],
"TripDetailPtcFareBreakdowns": [
{
"PassengerTypeQuantity": {
"PassengerType": 1,
"Quantity": 1
},
"TripDetailPassengerFare": {
"BaseFare": 8469250,
"ServiceTax": 0,
"Tax": 993000,
"TotalFare": 9462250,
"Commission": 0,
"Currency": "IRR"
}
}
],
"PhoneNumber": "09359276735",
"Email": "info#iran-tech.com",
"ItineraryFareFamily": null
},
"BookingNotes": [],
"Services": []
},
"ValidatingAirlineCode": "EP",
"DirectionInd": 1,
"OnlineCheckIn": false,
"AirRemark": [],
"curl_error": false
}
As mentioned already, this begs for recursion. Here is an example:
import json
from collections.abc import Callable, Hashable
from typing import Any
def filter_dict(
dictionary: dict[Hashable, Any],
exclude_func: Callable[[Any], bool],
) -> None:
discard = set()
for key, value in dictionary.items():
if isinstance(value, dict):
filter_dict(value, exclude_func)
elif exclude_func(value):
discard.add(key)
for key in discard:
del dictionary[key]
def is_nothing(value: Any) -> bool:
return value is None or value == 0 or value == ""
def main() -> None:
j = "{}" # Your JSON string here
d = json.loads(j)
filter_dict(d, is_nothing)
print(json.dumps(d, indent=4))
if __name__ == '__main__':
main()
It does not handle JSON objects nested inside arrays (i.e. dictionaries nested inside lists), but I think you can build on that yourself.
If data contains your parsed Json string from the question you can do:
def change(o):
if isinstance(o, dict):
for k, v in tuple(o.items()):
if v is None or v == 0 or v == "":
del o[k]
else:
change(v)
elif isinstance(o, list):
for v in o:
change(v)
change(data)
print(data)
Prints:
{
"UniqueId": "PO3589472",
"FareType": 2,
"BookedBy": "Api ",
"OrderBy": "Api ",
"Success": True,
"TktTimeLimit": "2022-08-10T14:11:45",
"Category": 21,
"Status": 21,
"RefundMethod": 1,
"TravelItinerary": {
"ItineraryInfo": {
"ItineraryPricing": {
"BaseFare": 8469250,
"TotalTax": 993000,
"TotalFare": 9462250,
"Currency": "IRR",
},
"CustomerInfoes": [
{
"Customer": {
"PassengerType": 1,
"NationalId": "1829961233",
"Nationality": "IR",
"DateOfBirth": "1996-07-08T00:00:00",
"PassportExpireDate": "0001-01-01T00:00:00",
"PassportIssueCountry": "IR",
"PassportIssueDate": "2022-08-10T00:00:00",
"PaxName": {
"PassengerFirstName": "MAJID",
"PassengerLastName": "MAJIDIFAR",
},
},
"ETickets": "8151405444745",
"ETicketNumbers": [
{
"ETicketNumber": "8151405444745",
"EticketStatus": 1,
"DateOfIssue": "2022-08-10T13:58:47",
"AirlinePnr": "TXNXM",
}
],
}
],
"ReservationItems": [
{
"AirEquipmentType": "737",
"AirlinePnr": "TXNXM",
"ArrivalAirportLocationCode": "ABD",
"ArrivalDateTime": "2022-08-17T23:25:00",
"Baggage": "20KG",
"DepartureAirportLocationCode": "THR",
"DepartureDateTime": "2022-08-17T22:05:00",
"DepartureTerminal": "Terminal 4",
"FlightNumber": "3750",
"JourneyDuration": "01:20",
"MarketingAirlineCode": "EP",
"OperatingAirlineCode": "EP",
"ResBookDesigCode": "Y",
"TechnicalStops": [],
"CabinClassCode": 1,
}
],
"TripDetailPtcFareBreakdowns": [
{
"PassengerTypeQuantity": {
"PassengerType": 1,
"Quantity": 1,
},
"TripDetailPassengerFare": {
"BaseFare": 8469250,
"Tax": 993000,
"TotalFare": 9462250,
"Currency": "IRR",
},
}
],
"PhoneNumber": "09359276735",
"Email": "info#iran-tech.com",
},
"BookingNotes": [],
"Services": [],
},
"ValidatingAirlineCode": "EP",
"DirectionInd": 1,
"AirRemark": [],
}
This function will recursively iterate over dictionary and remove keys with empty values. However, there may be some issues with nested lists, I will try to finish it later.
def remove_keys_in_one_level(d):
if isinstance(d, dict):
for key, value in list(d.items()):
if not value: # configure this condition if you don't want to remove empty lists, dictionaries...
d.pop(key)
else:
remove_keys_in_one_level(value)
elif isinstance(d, list):
for lst_indx, lst_item in enumerate(d):
remove_keys_in_one_level(lst_item)
return d
remove_keys_in_one_level(jsn)
I have the following dictionary:
{
"Land": {
"2018": {
"VALUE:Avg": 49.0,
"VALUE:Sum": 49.0
},
"2008": {
"VALUE:Avg": 27.24,
"VALUE:Sum": 27.24
}
},
"Air": {
"2010": {
"VALUE:Avg": 57.4,
"VALUE:Sum": 57.4
},
"2017": {
"VALUE:Avg": 30.72,
"VALUE:Sum": 61.44
}
}
}
I have to change it to following format with parent keys as labels and the values as children:
[
{
"label": "Land",
"children": [
{
"label": "2018",
"children": [
{
"label": "VALUE:Avg"
},
{
"label": "VALUE:Sum"
}
]
},
{
"label": "2008",
"children": [
{
"label": "VALUE:Avg"
},
{
"label": "VALUE:Sum"
}
]
}
]
},
]
I tried to achieve this recursion but not working
Recursion should work:
def transfer(mydict):
result = []
for key, value in mydict.items():
temp = {"label":key}
if isinstance(value, dict):
temp["children"] = transfer(value)
result.append(temp)
return result
I have a python dictionary, where I don't exactly know, how deeply nested it is, but here is an example of such:
{
"name":"a_struct",
"type":"int",
"data":{
"type":"struct",
"elements":[
{
"data":[
{
"name":"test1",
"data_id":0,
"type":"uint8",
"wire_type":0,
"data":0
},
{
"name":"test2",
"data_id":2,
"type":"uint32",
"wire_type":2,
"data":0
},
{
"name":"test3",
"data_id":3,
"type":"int",
"wire_type":4,
"data":{
"type":"uint32",
"elements":[
]
}
},
{
"name":"test4",
"data_id":4,
"type":"uint32",
"wire_type":2,
"data":0
},
{
"name":"test5",
"data_id":5,
"type":"int",
"wire_type":4,
"data":{
"type":"uint32",
"elements":[
]
}
}
]
}
]
}
}
My goal is to filter out each dictionary that does not contains values ["test1", "test3", "test5"] by the name key. This shall be applicable to various deeply nested dictionaries.
So in that case, the result shall be a filtered dictionary:
{
"name":"a_struct",
"type":"int",
"data":{
"type":"struct",
"elements":[
{
"data":[
{
"name":"test1",
"data_id":0,
"type":"uint8",
"wire_type":0,
"data":0
},
{
"name":"test3",
"data_id":3,
"type":"int",
"wire_type":4,
"data":{
"type":"uint32",
"elements":[
]
}
},
{
"name":"test5",
"data_id":5,
"type":"int",
"wire_type":4,
"data":{
"type":"uint32",
"elements":[
]
}
}
]
}
]
}
}
I tried to use the dpath lib (https://pypi.org/project/dpath/), by providing a filter criteria like so:
def afilter(x):
if isinstance(x, dict):
if "name" in x:
if x["name"] in ["test1", "test3", "test5"]:
return True
else:
return False
else:
return False
result = dpath.util.search(my_dict, "**", afilter=afilter)
But I get a wrong result, so every other key, has been filtered out, which is not what I want:
{
"data":{
"elements":[
{
"data":[
{
"name":"test1",
"data_id":0,
"type":"uint8",
"wire_type":0,
"data":0
},
null,
{
"name":"test3",
"data_id":3,
"type":"int",
"wire_type":4,
"data":{
"type":"uint32",
"elements":[
]
}
},
null,
{
"name":"test5",
"data_id":5,
"type":"int",
"wire_type":4,
"data":{
"type":"uint32",
"elements":[
]
}
}
]
}
]
}
}
How to get this right?
PS: I'm not forced to use the dpath lib. So, the solution might be written in pure python.
You can recursively process your dictionary while filtering unneeded records:
def delete_keys(data, keys_to_keep):
res = {}
for k, v in data.items():
if isinstance(v, dict):
res[k] = delete_keys(v, keys_to_keep)
elif isinstance(v, list):
if k == "data":
res[k] = [delete_keys(obj, keys_to_keep) for obj in v if obj.get('name') in keys_to_keep]
else:
res[k] = [delete_keys(obj, keys_to_keep) for obj in v]
else:
res[k] = v
return res
keys_to_keep = {'test1', 'test3', 'test5'}
print(delete_keys(data, keys_to_keep))
For your input, it gives:
{
"name": "a_struct",
"type": "int",
"data": {
"type": "struct",
"elements": [
{
"data": [
{
"name": "test1",
"data_id": 0,
"type": "uint8",
"wire_type": 0,
"data": 0,
},
{
"name": "test3",
"data_id": 3,
"type": "int",
"wire_type": 4,
"data": {"type": "uint32", "elements": []},
},
{
"name": "test5",
"data_id": 5,
"type": "int",
"wire_type": 4,
"data": {"type": "uint32", "elements": []},
},
]
}
],
},
}
I'm trying to move data from SQL to Mongo. Here is a challenge I'm facing, if any child object is empty I want to remove parent element. I want till insurance field to be removed.
Here is what I tried:
def remove_empty_elements(jsonData):
if(isinstance(jsonData, list) or isinstance(jsonData,dict)):
for elem in list(jsonData):
if not isinstance(elem, dict) and isinstance(jsonData[elem], list) and elem:
jsonData[elem] = [x for x in jsonData[elem] if x]
if(len(jsonData[elem])==0):
del jsonData[elem]
elif not isinstance(elem, dict) and isinstance(jsonData[elem], dict) and not jsonData[elem]:
del jsonData[elem]
else:
pass
return jsonData
sample data
{
"_id": "30546c62-8ea0-4f1a-a239-cc7508041a7b",
"IsActive": "True",
"name": "Pixel 3",
"phone": [
{
"Bill": 145,
"phonetype": "xyz",
"insurance": [
{
"year_one_claims": [
{
"2020": 200
},
{
},
{
},
{
},
{
}
]
},
{
"year_two_claims": [
{
},
{
},
{
},
{
},
{
}
]
},
]
}
],
"Provider": {
"agent": "aaadd",
}
}
Results should look like that
{
"_id": "30546c62-8ea0-4f1a-a239-cc7508041a7b",
"IsActive": "True",
"name": "Pixel 3",
"phone": [
{
"Bill": 145,
"phonetype": "xyz",
"insurance": [
{
"year_one_claims": [
{
"2020": 200
},
]
},
]
}
],
"Provider": {
"agent": "aaadd",
}
}
Your if statements are kind of confusing. I think you are looking for a recursion:
import json
# define which elements you want to remove:
to_be_deleted = [[], {}, "", None]
def remove_empty_elements(jsonData):
if isinstance(jsonData, list):
jsonData = [new_elem for elem in jsonData
if (new_elem := remove_empty_elements(elem)) not in to_be_deleted]
elif isinstance(jsonData,dict):
jsonData = {key: new_value for key, value in jsonData.items()
if (new_value := remove_empty_elements(value)) not in to_be_deleted}
return jsonData
print(json.dumps(remove_empty_elements(jsonData), indent=4))
Edit/Note: from Python3.8 you can use assignements (:=) in comprehensions
Output:
{
"_id": "30546c62-8ea0-4f1a-a239-cc7508041a7b",
"IsActive": "True",
"name": "Pixel 3",
"phone": [
{
"Bill": 145,
"phonetype": "xyz",
"insurance": [
{
"year_one_claims": [
{
"2020": 200
}
]
}
]
}
],
"Provider": {
"agent": "aaadd"
}
}
Try out this:
data = {
"_id": "30546c62-8ea0-4f1a-a239-cc7508041a7b",
"IsActive": "True",
"name": "Pixel 3",
"phone": [
{
"Bill": 145,
"phonetype": "xyz",
"insurance": [
{
"year_one_claims": [
{
"2020": 200
},
{
},
{
},
{
},
{
}
]
},
{
"year_two_claims": [
{
},
{
},
{
},
{
},
{
}
]
},
]
}
],
"Provider": {
"agent": "aaadd",
}
}
for phn_data in data['phone']:
for ins in phn_data['insurance']:
for key, val in list(ins.items()):
for ins_data in list(val):
if not ins_data:
val.remove(ins_data)
if not val:
del ins[key]
phn_data['insurance'].remove(ins)
print (data)
Output:
{
'_id': '30546c62-8ea0-4f1a-a239-cc7508041a7b',
'IsActive': 'True',
'name': 'Pixel 3',
'phone': [{
'Bill': 145,
'phonetype': 'xyz',
'insurance': [{
'year_one_claims': [{
'2020': 200
}]
}]
}],
'Provider': {
'agent': 'aaadd'
}
}
I have this JSON :
{
"duration": 1942,
"frame_id": 0,
"detect1": [
{
"type": {
"s_type1": [
{
"confidence": 98.70016,
"klass": -1,
"name": "c*****"
},
{
"confidence": 1.042385,
"klass": -1,
"name": "c*****"
},
{
"confidence": 0.1587732,
"klass": -1,
"name": "s*****"
}
],
"s_type2": [
{
"confidence": 92.82484,
"klass": -1,
"name": "b*****"
},
{
"confidence": 7.098834,
"klass": -1,
"name": "b*****"
},
{
"confidence": 0.02423214,
"klass": -1,
"name": "p*****"
},
],
"Box": [
80.80994,
170.0965,
1091.778
]
},
"confidences": [
90.08681,
99.91595,
90.12489
]
}
]
}
And i would like to save some of key and values from this JSON to another JSON. The new JSON will keep :
duration (k,v),
frame_id (k,v),
detect1 :
type : s_type1 and s_type2 only the first dict will be keped and the klass (k,v) will be removed,
Box (k,v)
confidences (k,v)
The final result :
{
"duration": 1942,
"frame_id": 0,
"detect1": [
{
"type": {
"s_type1": [
{
"confidence": 98.70016,
"name": "c*****"
},
],
"s_type2": [
{
"confidence": 92.82484,
"name": "b*****"
}
],
"Box": [
80.80994,
170.0965,
1091.778
]
},
"confidences": [
90.08681,
99.91595,
90.12489
]
}
]
}
I was trying to do it with the JMESPath library but I can't get a good result.
Have someone any idea to do this ?
Thanks
Using jmespath to get your desired output:
import jmespath
expression = """{duration: duration,
frame_id: frame_id,
detect1: [{type:{s_type1: [detect1[].type.s_type1[].merge({confidence: confidence, name: name})|[0]],
s_type2: [detect1[].type.s_type2[].merge({confidence: confidence, name: name})|[0]],
Box: detect1[].type.Box[]},
confidences: detect1[].confidences[]
}
]}
"""
expression = jmespath.compile(expression)
expression.search(json)
{'duration': 1942,
'frame_id': 0,
'detect1': [{'type': {'s_type1': [{'confidence': 98.70016, 'name': 'c*****'}],
's_type2': [{'confidence': 92.82484, 'name': 'b*****'}],
'Box': [80.80994, 170.0965, 1091.778]},
'confidences': [90.08681, 99.91595, 90.12489]}]}