Recursively search and modify complex object (dict of ists of dicts) - python

I have a complex object.
Currently it is Dict of Lists of Dicts. But could change in the future while still only using lists and dictionaries.
I would like to find all list elements or dictionary values with type "datetime" and change them to string.
The recursive search seems to be working but can't figure out how to write the modified value back to the corresponding (dict val or list item).
def mod_complex(c_obj):
if isinstance(c_obj, datetime.datetime):
print(c_obj)
c_obj = str(c_obj) ???
elif isinstance(c_obj, list):
for v in c_obj:
mod_complex(v)
elif isinstance(c_obj, dict):
for k, v in c_obj.items():
mod_complex(v)

You have simple way that consists in returning a new object, each sub-object also being a new one etc.
This look like this:
def mod_complex(c_obj):
if isinstance(c_obj, int):
return c_obj*2
elif isinstance(c_obj, list):
return [mod_complex(v) for v in c_obj]
elif isinstance(c_obj, dict):
return {f"new {k}": mod_complex(v) for k, v in c_obj.items()}
ob = {
"1st list": [
{ "A": 1, "B": 2 },
{ "C": 3, "D": 4 },
],
"2nd list": [
{ "E": 5, "F": 6 },
{ "G": 7, "H": 8 },
]
}
print(mod_complex(ob))
Which prints:
{
"new 1st list": [
{ "new A": 2, "new B": 4 },
{ "new C": 6, "new D": 8 }
],
"new 2nd list": [
{ "new E": 10, "new F": 12 },
{ "new G": 14, "new H": 16 }
]
}
The advantage of this is that you are sure that nothing will be mutated, and therefore print(ob) outputs:
{
"1st list": [
{ "A": 1, "B": 2 },
{ "C": 3, "D": 4 },
],
"2nd list": [
{ "E": 5, "F": 6 },
{ "G": 7, "H": 8 },
]
}

def mod_complex(c_obj):
if isinstance(c_obj, datetime.datetime):
print(c_obj)
return str(c_obj)
elif isinstance(c_obj, list):
for i, v in enumerate(c_obj):
c_obj[i] = mod_complex(v)
elif isinstance(c_obj, dict):
for k, v in c_obj.items():
c_obj[k] = mod_complex(v)
return c_obj

Related

is there a way to check nested dictionary values and if they have 0 or null or empty string then delete them in python

this is json file that i want to convert to python
{
"UniqueId": "PO3589472",
"FareType": 2,
"BookedBy": "Api ",
"OrderBy": "Api ",
"ClientBalance": 0,
"Error": null,
"Success": true,
"TktTimeLimit": "2022-08-10T14:11:45",
"Category": 21,
"Status": 21,
"RefundMethod": 1,
"TravelItinerary": {
"ItineraryInfo": {
"ItineraryPricing": {
"BaseFare": 8469250,
"ServiceTax": 0,
"TotalTax": 993000,
"TotalFare": 9462250,
"TotalCommission": 0,
"Currency": "IRR"
},
"CustomerInfoes": [
{
"Customer": {
"Gender": 0,
"PassengerType": 1,
"PassportNumber": "",
"NationalId": "1829961233",
"Nationality": "IR",
"DateOfBirth": "1996-07-08T00:00:00",
"PassportExpireDate": "0001-01-01T00:00:00",
"PassportIssueCountry": "IR",
"PassportIssueDate": "2022-08-10T00:00:00",
"PaxName": {
"PassengerFirstName": "MAJID",
"PassengerMiddleName": null,
"PassengerLastName": "MAJIDIFAR",
"PassengerTitle": 0
}
},
"ETickets": "8151405444745",
"ETicketNumbers": [
{
"ETicketNumber": "8151405444745",
"EticketStatus": 1,
"IsRefunded": false,
"DateOfIssue": "2022-08-10T13:58:47",
"AirlinePnr": "TXNXM",
"TotalRefund": 0
}
]
}
],
"ReservationItems": [
{
"AirEquipmentType": "737",
"AirlinePnr": "TXNXM",
"ArrivalAirportLocationCode": "ABD",
"ArrivalDateTime": "2022-08-17T23:25:00",
"ArrivalTerminal": "",
"Baggage": "20KG",
"DepartureAirportLocationCode": "THR",
"DepartureDateTime": "2022-08-17T22:05:00",
"DepartureTerminal": "Terminal 4",
"FlightNumber": "3750",
"JourneyDuration": "01:20",
"JourneyDurationPerMinute": 0,
"MarketingAirlineCode": "EP",
"OperatingAirlineCode": "EP",
"ResBookDesigCode": "Y",
"StopQuantity": 0,
"IsCharter": false,
"TechnicalStops": [],
"IsReturn": false,
"CabinClassCode": 1
}
],
"TripDetailPtcFareBreakdowns": [
{
"PassengerTypeQuantity": {
"PassengerType": 1,
"Quantity": 1
},
"TripDetailPassengerFare": {
"BaseFare": 8469250,
"ServiceTax": 0,
"Tax": 993000,
"TotalFare": 9462250,
"Commission": 0,
"Currency": "IRR"
}
}
],
"PhoneNumber": "09359276735",
"Email": "info#iran-tech.com",
"ItineraryFareFamily": null
},
"BookingNotes": [],
"Services": []
},
"ValidatingAirlineCode": "EP",
"DirectionInd": 1,
"OnlineCheckIn": false,
"AirRemark": [],
"curl_error": false
}
As mentioned already, this begs for recursion. Here is an example:
import json
from collections.abc import Callable, Hashable
from typing import Any
def filter_dict(
dictionary: dict[Hashable, Any],
exclude_func: Callable[[Any], bool],
) -> None:
discard = set()
for key, value in dictionary.items():
if isinstance(value, dict):
filter_dict(value, exclude_func)
elif exclude_func(value):
discard.add(key)
for key in discard:
del dictionary[key]
def is_nothing(value: Any) -> bool:
return value is None or value == 0 or value == ""
def main() -> None:
j = "{}" # Your JSON string here
d = json.loads(j)
filter_dict(d, is_nothing)
print(json.dumps(d, indent=4))
if __name__ == '__main__':
main()
It does not handle JSON objects nested inside arrays (i.e. dictionaries nested inside lists), but I think you can build on that yourself.
If data contains your parsed Json string from the question you can do:
def change(o):
if isinstance(o, dict):
for k, v in tuple(o.items()):
if v is None or v == 0 or v == "":
del o[k]
else:
change(v)
elif isinstance(o, list):
for v in o:
change(v)
change(data)
print(data)
Prints:
{
"UniqueId": "PO3589472",
"FareType": 2,
"BookedBy": "Api ",
"OrderBy": "Api ",
"Success": True,
"TktTimeLimit": "2022-08-10T14:11:45",
"Category": 21,
"Status": 21,
"RefundMethod": 1,
"TravelItinerary": {
"ItineraryInfo": {
"ItineraryPricing": {
"BaseFare": 8469250,
"TotalTax": 993000,
"TotalFare": 9462250,
"Currency": "IRR",
},
"CustomerInfoes": [
{
"Customer": {
"PassengerType": 1,
"NationalId": "1829961233",
"Nationality": "IR",
"DateOfBirth": "1996-07-08T00:00:00",
"PassportExpireDate": "0001-01-01T00:00:00",
"PassportIssueCountry": "IR",
"PassportIssueDate": "2022-08-10T00:00:00",
"PaxName": {
"PassengerFirstName": "MAJID",
"PassengerLastName": "MAJIDIFAR",
},
},
"ETickets": "8151405444745",
"ETicketNumbers": [
{
"ETicketNumber": "8151405444745",
"EticketStatus": 1,
"DateOfIssue": "2022-08-10T13:58:47",
"AirlinePnr": "TXNXM",
}
],
}
],
"ReservationItems": [
{
"AirEquipmentType": "737",
"AirlinePnr": "TXNXM",
"ArrivalAirportLocationCode": "ABD",
"ArrivalDateTime": "2022-08-17T23:25:00",
"Baggage": "20KG",
"DepartureAirportLocationCode": "THR",
"DepartureDateTime": "2022-08-17T22:05:00",
"DepartureTerminal": "Terminal 4",
"FlightNumber": "3750",
"JourneyDuration": "01:20",
"MarketingAirlineCode": "EP",
"OperatingAirlineCode": "EP",
"ResBookDesigCode": "Y",
"TechnicalStops": [],
"CabinClassCode": 1,
}
],
"TripDetailPtcFareBreakdowns": [
{
"PassengerTypeQuantity": {
"PassengerType": 1,
"Quantity": 1,
},
"TripDetailPassengerFare": {
"BaseFare": 8469250,
"Tax": 993000,
"TotalFare": 9462250,
"Currency": "IRR",
},
}
],
"PhoneNumber": "09359276735",
"Email": "info#iran-tech.com",
},
"BookingNotes": [],
"Services": [],
},
"ValidatingAirlineCode": "EP",
"DirectionInd": 1,
"AirRemark": [],
}
This function will recursively iterate over dictionary and remove keys with empty values. However, there may be some issues with nested lists, I will try to finish it later.
def remove_keys_in_one_level(d):
if isinstance(d, dict):
for key, value in list(d.items()):
if not value: # configure this condition if you don't want to remove empty lists, dictionaries...
d.pop(key)
else:
remove_keys_in_one_level(value)
elif isinstance(d, list):
for lst_indx, lst_item in enumerate(d):
remove_keys_in_one_level(lst_item)
return d
remove_keys_in_one_level(jsn)

Replace all the keys in nested dictionary and merge duplicate keys in python

I have a nested dictionary that represents parent-child relationships. For example:
{
"45273425f5abc05b->s":
{
"12864f455e7c86bb->s": {
"12864f455e7c86bbexternal_call->c": {}
}
},
"c69aead72fcd6ec1->d":
{
"8ade76728bdddf27->d": {
"8ade76728bdddf27external_call->i": {}
},
"b29f07de47c5841f->d": {
"107bec1baede1bff->l": {
"e14ebabea4785c3f->l": {
"e14ebabea4785c3fexternal_call->r": {}
},
"e36b35daa794bd50->l": {
"e36b35daa794bd50external_call->a": {}
}
},
"b29f07de47c5841fexternal_call->l": {}
},
"1906ef2c2897ac01->d": {
"1906ef2c2897ac01external_call->e": {}
}
}
}
I want to do two things with this dictionary. Firstly I want to remove everything before and including "->" i.e I want to update the keys. Secondly, after renaming there will be duplicate values in the nested dictionary. for example the second element in the dictionary. If there are two keys with the same name I want to merge them into one. So, the result will look like the following:
{
"s":
{
"s": {
"c"
}
},
"d":
{
"d": {
"i",
"l": {
"l": {
"r",
"a"
}
},
"e"
}
}
}
How can I achieve this? I have written this code so far.
def alter_dict(nested_dict):
new_dict = {}
for k, v in nested_dict.items():
if isinstance(v, dict):
v = alter_dict(v)
new_key = k.split("->")[1]
new_dict[new_key] = v
return new_dict
It works for a simple one like the first element but doesn't work for the second one. It loses some information. The purpose of this is to create a graph with the dictionary.
You can use recursion:
import json
from collections import defaultdict
def merge(d):
r = defaultdict(list)
for i in d:
for a, b in i.items():
r[a.split('->')[-1]].append(b)
return {a:merge(b) for a, b in r.items()}
data = {'45273425f5abc05b->s': {'12864f455e7c86bb->s': {'12864f455e7c86bbexternal_call->c': {}}}, 'c69aead72fcd6ec1->d': {'8ade76728bdddf27->d': {'8ade76728bdddf27external_call->i': {}}, 'b29f07de47c5841f->d': {'107bec1baede1bff->l': {'e14ebabea4785c3f->l': {'e14ebabea4785c3fexternal_call->r': {}}, 'e36b35daa794bd50->l': {'e36b35daa794bd50external_call->a': {}}}, 'b29f07de47c5841fexternal_call->l': {}}, '1906ef2c2897ac01->d': {'1906ef2c2897ac01external_call->e': {}}}}
print(json.dumps(merge([data]), indent=4))
Output:
{
"s": {
"s": {
"c": {}
}
},
"d": {
"d": {
"i": {},
"l": {
"l": {
"r": {},
"a": {}
}
},
"e": {}
}
}
}

How to parse ['A:B:C', 'A:B:D', 'A:C'] into {'A': ['C', {'B': ['D', 'C']} ] }

Essentially I need to write a parser of some product of a markup. It's a list of strings formatted like such:
x = [
'A:B:C:D:E',
'A:B:D',
'A:C:E:F',
'B:D:E',
'B:C',
'A:C:F',
]
I need to turn it into a python object like so:
{
"B": [
"C",
{
"D": "E"
}
],
"A": [
{
"B": [
"D",
{
"C": {
"D": "E"
}
}
]
},
{
"C": [
"F",
{
"E": "F"
}
]
}
]
}
You can copy above and paste into this inspector to look at the object hierarchy, and understand what I'm going after. In any regards, it's a nested dictionary combining common keys, and putting items in lists sometimes.
TL;DR -
I have written a function below
splits = [l.split(':') for l in x]
def DictDrill(o):
# list of lists
if type(o)==type([]) and all([type(l)==type([]) for l in o]):
d = dict()
for group in o:
if type(group)==type([]) and len(group)>1:
d[group[0]] = d.get(group[0],[]) + [group[1:]]
if type(group)==type([]) and len(group)==1:
d[group[0]] = d.get(group[0],[]) + []
return DictDrill(d)
# a dictionary
elif type(o)==type({}):
next = dict(o)
for k,groups in next.items():
next[k] = DictDrill(groups)
return next
But you'll see that this script is only returning dictionaries and the last item is placed on as a key again with an empty dict() as value. If you run my script like DictDrill(splits) on the example you will see this:
{
"B": {
"C": {},
"D": {
"E": {}
}
},
"A": {
"C": {
"E": {
"F": {}
},
"F": {}
},
"B": {
"C": {
"D": {
"E": {}
}
},
"D": {}
}
}
}
Notice the useless {} as values
Preferably I need to solve this in python. I know a little C# but it seems very cumbersome to move data around between lists and dictionaries...
You can use itertools.groupby with recursion:
from itertools import groupby as gb
data = ['A:B:C:D:E', 'A:B:D', 'A:C:E:F', 'B:D:E', 'B:C', 'A:C:F']
def to_dict(d):
if isinstance(d, dict) or not d or any(isinstance(i, (dict, list)) for i in d):
return d
return d[0] if len(d) == 1 else {d[0]:to_dict(d[1:])}
def group(d):
_d = [(a, [c for _, *c in b]) for a, b in gb(sorted(d, key=lambda x:x[0]), key=lambda x:x[0])]
new_d =[{a:to_dict(b[0] if len(b) == 1 else group(b))} for a, b in _d]
return [i for b in new_d for i in (b if not all(b.values()) else [b])]
import json
print(json.dumps(group([i.split(':') for i in data]), indent=4))
Output:
[
{
"A": [
{
"B": [
{
"C": {
"D": "E"
}
},
"D"
]
},
{
"C": [
{
"E": "F"
},
"F"
]
}
]
},
{
"B": [
"C",
{
"D": "E"
}
]
}
]

How to convert json to csv when there are multiple nested structures in json? [duplicate]

I'm struggling with this problem. I have a JSON file and needs ti put it out to CSV, its fine if the structure is kind of flat with no deep nested items.
But in this case the nested RACES is messing me up.
How would I go about getting the data in a format like this:
VENUE, COUNTRY, ITW, RACES__NO, RACES__TIME
for each object and each race in the object?
{
"1": {
"VENUE": "JOEBURG",
"COUNTRY": "HAE",
"ITW": "XAD",
"RACES": {
"1": {
"NO": 1,
"TIME": "12:35"
},
"2": {
"NO": 2,
"TIME": "13:10"
},
"3": {
"NO": 3,
"TIME": "13:40"
},
"4": {
"NO": 4,
"TIME": "14:10"
},
"5": {
"NO": 5,
"TIME": "14:55"
},
"6": {
"NO": 6,
"TIME": "15:30"
},
"7": {
"NO": 7,
"TIME": "16:05"
},
"8": {
"NO": 8,
"TIME": "16:40"
}
}
},
"2": {
"VENUE": "FOOBURG",
"COUNTRY": "ABA",
"ITW": "XAD",
"RACES": {
"1": {
"NO": 1,
"TIME": "12:35"
},
"2": {
"NO": 2,
"TIME": "13:10"
},
"3": {
"NO": 3,
"TIME": "13:40"
},
"4": {
"NO": 4,
"TIME": "14:10"
},
"5": {
"NO": 5,
"TIME": "14:55"
},
"6": {
"NO": 6,
"TIME": "15:30"
},
"7": {
"NO": 7,
"TIME": "16:05"
},
"8": {
"NO": 8,
"TIME": "16:40"
}
}
}, ...
}
I would like to output this to CSV like this:
VENUE, COUNTRY, ITW, RACES__NO, RACES__TIME
JOEBERG, HAE, XAD, 1, 12:35
JOEBERG, HAE, XAD, 2, 13:10
JOEBERG, HAE, XAD, 3, 13:40
...
...
FOOBURG, ABA, XAD, 1, 12:35
FOOBURG, ABA, XAD, 2, 13:10
So first I get the correct keys:
self.keys = self.data.keys()
keys = ["DATA_KEY"]
for key in self.keys:
if type(self.data[key]) == dict:
for k in self.data[key].keys():
if k not in keys:
if type(self.data[key][k]) == unicode:
keys.append(k)
elif type(self.data[key][k]) == dict:
self.subkey = k
for sk in self.data[key][k].values():
for subkey in sk.keys():
subkey = "%s__%s" % (self.subkey, subkey)
if subkey not in keys:
keys.append(subkey)
Then add the data:
But how?
This should be a fun one for you skilled forloopers. ;-)
I'd collect keys only for the first object, then assume that the rest of the format is consistent.
The following code also limits the nested object to just one; you did not specify what should happen when there is more than one. Having two or more nested structures of equal length could work (you'd 'zip' those together), but if you have structures of differing length you need to make an explicit choice how to handle those; zip with empty columns to pad, or to write out the product of those entries (A x B rows, repeating information from A each time you find a B entry).
import csv
from operator import itemgetter
with open(outputfile, 'wb') as outf:
writer = None # will be set to a csv.DictWriter later
for key, item in sorted(data.items(), key=itemgetter(0)):
row = {}
nested_name, nested_items = '', {}
for k, v in item.items():
if not isinstance(v, dict):
row[k] = v
else:
assert not nested_items, 'Only one nested structure is supported'
nested_name, nested_items = k, v
if writer is None:
# build fields for each first key of each nested item first
fields = sorted(row)
# sorted keys of first item in key sorted order
nested_keys = sorted(sorted(nested_items.items(), key=itemgetter(0))[0][1])
fields.extend('__'.join((nested_name, k)) for k in nested_keys)
writer = csv.DictWriter(outf, fields)
writer.writeheader()
for nkey, nitem in sorted(nested_items.items(), key=itemgetter(0)):
row.update(('__'.join((nested_name, k)), v) for k, v in nitem.items())
writer.writerow(row)
For your sample input, this produces:
COUNTRY,ITW,VENUE,RACES__NO,RACES__TIME
HAE,XAD,JOEBURG,1,12:35
HAE,XAD,JOEBURG,2,13:10
HAE,XAD,JOEBURG,3,13:40
HAE,XAD,JOEBURG,4,14:10
HAE,XAD,JOEBURG,5,14:55
HAE,XAD,JOEBURG,6,15:30
HAE,XAD,JOEBURG,7,16:05
HAE,XAD,JOEBURG,8,16:40
ABA,XAD,FOOBURG,1,12:35
ABA,XAD,FOOBURG,2,13:10
ABA,XAD,FOOBURG,3,13:40
ABA,XAD,FOOBURG,4,14:10
ABA,XAD,FOOBURG,5,14:55
ABA,XAD,FOOBURG,6,15:30
ABA,XAD,FOOBURG,7,16:05
ABA,XAD,FOOBURG,8,16:40

Parsing nested JSON and writing it to CSV

I'm struggling with this problem. I have a JSON file and needs ti put it out to CSV, its fine if the structure is kind of flat with no deep nested items.
But in this case the nested RACES is messing me up.
How would I go about getting the data in a format like this:
VENUE, COUNTRY, ITW, RACES__NO, RACES__TIME
for each object and each race in the object?
{
"1": {
"VENUE": "JOEBURG",
"COUNTRY": "HAE",
"ITW": "XAD",
"RACES": {
"1": {
"NO": 1,
"TIME": "12:35"
},
"2": {
"NO": 2,
"TIME": "13:10"
},
"3": {
"NO": 3,
"TIME": "13:40"
},
"4": {
"NO": 4,
"TIME": "14:10"
},
"5": {
"NO": 5,
"TIME": "14:55"
},
"6": {
"NO": 6,
"TIME": "15:30"
},
"7": {
"NO": 7,
"TIME": "16:05"
},
"8": {
"NO": 8,
"TIME": "16:40"
}
}
},
"2": {
"VENUE": "FOOBURG",
"COUNTRY": "ABA",
"ITW": "XAD",
"RACES": {
"1": {
"NO": 1,
"TIME": "12:35"
},
"2": {
"NO": 2,
"TIME": "13:10"
},
"3": {
"NO": 3,
"TIME": "13:40"
},
"4": {
"NO": 4,
"TIME": "14:10"
},
"5": {
"NO": 5,
"TIME": "14:55"
},
"6": {
"NO": 6,
"TIME": "15:30"
},
"7": {
"NO": 7,
"TIME": "16:05"
},
"8": {
"NO": 8,
"TIME": "16:40"
}
}
}, ...
}
I would like to output this to CSV like this:
VENUE, COUNTRY, ITW, RACES__NO, RACES__TIME
JOEBERG, HAE, XAD, 1, 12:35
JOEBERG, HAE, XAD, 2, 13:10
JOEBERG, HAE, XAD, 3, 13:40
...
...
FOOBURG, ABA, XAD, 1, 12:35
FOOBURG, ABA, XAD, 2, 13:10
So first I get the correct keys:
self.keys = self.data.keys()
keys = ["DATA_KEY"]
for key in self.keys:
if type(self.data[key]) == dict:
for k in self.data[key].keys():
if k not in keys:
if type(self.data[key][k]) == unicode:
keys.append(k)
elif type(self.data[key][k]) == dict:
self.subkey = k
for sk in self.data[key][k].values():
for subkey in sk.keys():
subkey = "%s__%s" % (self.subkey, subkey)
if subkey not in keys:
keys.append(subkey)
Then add the data:
But how?
This should be a fun one for you skilled forloopers. ;-)
I'd collect keys only for the first object, then assume that the rest of the format is consistent.
The following code also limits the nested object to just one; you did not specify what should happen when there is more than one. Having two or more nested structures of equal length could work (you'd 'zip' those together), but if you have structures of differing length you need to make an explicit choice how to handle those; zip with empty columns to pad, or to write out the product of those entries (A x B rows, repeating information from A each time you find a B entry).
import csv
from operator import itemgetter
with open(outputfile, 'wb') as outf:
writer = None # will be set to a csv.DictWriter later
for key, item in sorted(data.items(), key=itemgetter(0)):
row = {}
nested_name, nested_items = '', {}
for k, v in item.items():
if not isinstance(v, dict):
row[k] = v
else:
assert not nested_items, 'Only one nested structure is supported'
nested_name, nested_items = k, v
if writer is None:
# build fields for each first key of each nested item first
fields = sorted(row)
# sorted keys of first item in key sorted order
nested_keys = sorted(sorted(nested_items.items(), key=itemgetter(0))[0][1])
fields.extend('__'.join((nested_name, k)) for k in nested_keys)
writer = csv.DictWriter(outf, fields)
writer.writeheader()
for nkey, nitem in sorted(nested_items.items(), key=itemgetter(0)):
row.update(('__'.join((nested_name, k)), v) for k, v in nitem.items())
writer.writerow(row)
For your sample input, this produces:
COUNTRY,ITW,VENUE,RACES__NO,RACES__TIME
HAE,XAD,JOEBURG,1,12:35
HAE,XAD,JOEBURG,2,13:10
HAE,XAD,JOEBURG,3,13:40
HAE,XAD,JOEBURG,4,14:10
HAE,XAD,JOEBURG,5,14:55
HAE,XAD,JOEBURG,6,15:30
HAE,XAD,JOEBURG,7,16:05
HAE,XAD,JOEBURG,8,16:40
ABA,XAD,FOOBURG,1,12:35
ABA,XAD,FOOBURG,2,13:10
ABA,XAD,FOOBURG,3,13:40
ABA,XAD,FOOBURG,4,14:10
ABA,XAD,FOOBURG,5,14:55
ABA,XAD,FOOBURG,6,15:30
ABA,XAD,FOOBURG,7,16:05
ABA,XAD,FOOBURG,8,16:40

Categories

Resources