Flattening a dict - python

I have the following array of dicts (there's only one dict):
[{
'RuntimeInMinutes': '21',
'EpisodeNumber': '21',
'Genres': ['Animation'],
'ReleaseDate': '2005-02-05',
'LanguageOfMetadata': 'EN',
'Languages': [{
'_Key': 'CC',
'Value': ['en']
}, {
'_Key': 'Primary',
'Value': ['EN']
}],
'Products': [{
'URL': 'http://www.hulu.com/watch/217566',
'Rating': 'TV-Y',
'Currency': 'USD',
'SUBSCRIPTION': '0.00',
'_Key': 'US'
}, {
'URL': 'http://www.hulu.com/d/217566',
'Rating': 'TV-Y',
'Currency': 'USD',
'SUBSCRIPTION': '0.00',
'_Key': 'DE'
}],
'ReleaseYear': '2005',
'TVSeriesID': '5638#TVSeries',
'Type': 'TVEpisode',
'Studio': '4K Media'
}]
I would like to flatten the dict as follows:
[{
'RuntimeInMinutes': '21',
'EpisodeNumber': '21',
'Genres': ['Animation'],
'ReleaseDate': '2005-02-05',
'LanguageOfMetadata': 'EN',
'Languages._Key': ['CC', 'Primary'],
'Languages.Value': ['en', 'EN'],
'Products.URL': ['http://www.hulu.com/watch/217566', 'http://www.hulu.com/d/217566'],
'Products.Rating': ['TV-Y', 'TV-Y'],
'Products.Currency': ['USD', 'USD'],
'Products.SUBSCRIPTION': ['0.00', '0.00'],
'Products._Key': ['US', 'DE'],
'ReleaseYear': '2005',
'TVSeriesID': '5638#TVSeries',
'Type': 'TVEpisode',
'Studio': '4K Media'
}]
In other words, anytime a dict is encountered, it need to convert to either a string, number, or list.
What I currently have is something along the lines of the following, which uses a while loop to iterate through all the subpaths of the json.
while True:
for key in copy(keys):
val = get_sub_object_from_path(obj, key)
if isinstance(val, dict):
FLAT_OBJ[key.replace('/', '.')] = val
else:
keys.extend(os.path.join(key, _nextkey) for _nextkey in val.keys())
keys.remove(key)
if (not keys) or (n > 5):
break
else:
n += 1
continue

You can use recursion with a generator:
from collections import defaultdict
_d = [{'RuntimeInMinutes': '21', 'EpisodeNumber': '21', 'Genres': ['Animation'], 'ReleaseDate': '2005-02-05', 'LanguageOfMetadata': 'EN', 'Languages': [{'_Key': 'CC', 'Value': ['en']}, {'_Key': 'Primary', 'Value': ['EN']}], 'Products': [{'URL': 'http://www.hulu.com/watch/217566', 'Rating': 'TV-Y', 'Currency': 'USD', 'SUBSCRIPTION': '0.00', '_Key': 'US'}, {'URL': 'http://www.hulu.com/d/217566', 'Rating': 'TV-Y', 'Currency': 'USD', 'SUBSCRIPTION': '0.00', '_Key': 'DE'}], 'ReleaseYear': '2005', 'TVSeriesID': '5638#TVSeries', 'Type': 'TVEpisode', 'Studio': '4K Media'}]
def get_vals(d, _path = []):
for a, b in getattr(d, 'items', lambda :{})():
if isinstance(b, list) and all(isinstance(i, dict) or isinstance(i, list) for i in b):
for c in b:
yield from get_vals(c, _path+[a])
elif isinstance(b, dict):
yield from get_vals(b, _path+[a])
else:
yield ['.'.join(_path+[a]), b]
results = [i for b in _d for i in get_vals(b)]
_c = defaultdict(list)
for a, b in results:
_c[a].append(b)
result = [{a:list(b) if len(b) > 1 else b[0] for a, b in _c.items()}]
import json
print(json.dumps(result, indent=4))
Output:
[
{
"RuntimeInMinutes": "21",
"EpisodeNumber": "21",
"Genres": [
"Animation"
],
"ReleaseDate": "2005-02-05",
"LanguageOfMetadata": "EN",
"Languages._Key": [
"CC",
"Primary"
],
"Languages.Value": [
[
"en"
],
[
"EN"
]
],
"Products.URL": [
"http://www.hulu.com/watch/217566",
"http://www.hulu.com/d/217566"
],
"Products.Rating": [
"TV-Y",
"TV-Y"
],
"Products.Currency": [
"USD",
"USD"
],
"Products.SUBSCRIPTION": [
"0.00",
"0.00"
],
"Products._Key": [
"US",
"DE"
],
"ReleaseYear": "2005",
"TVSeriesID": "5638#TVSeries",
"Type": "TVEpisode",
"Studio": "4K Media"
}
]
Edit: wrapping solution in outer function:
def flatten_obj(data):
def get_vals(d, _path = []):
for a, b in getattr(d, 'items', lambda :{})():
if isinstance(b, list) and all(isinstance(i, dict) or isinstance(i, list) for i in b):
for c in b:
yield from get_vals(c, _path+[a])
elif isinstance(b, dict):
yield from get_vals(b, _path+[a])
else:
yield ['.'.join(_path+[a]), b]
results = [i for b in data for i in get_vals(b)]
_c = defaultdict(list)
for a, b in results:
_c[a].append(b)
return [{a:list(b) if len(b) > 1 else b[0] for a, b in _c.items()}]

EDIT
This now appears to be fixed:
As #panda-34 correctly points out (+1), the currently accepted
solution loses data, specifically Genres and Languages.Value when
you run the posted code.
Unfortunately, #panda-34's code modifies Genres:
'Genres': 'Animation',
rather than leaving it alone as in the OP's example:
'Genres': ['Animation'],
Below's my solution which attacks the problem a different way. None of the keys in the original data contains a dictionary as a value, only non-containers or lists (e.g. lists of dictionaries). So a primary a list of dictionaries will becomes a dictionary of lists (or just a plain dictionary if there's only one dictionary in the list.) Once we've done that, then any value that's now a dictionary is expanded back into the original data structure:
def flatten(container):
# A list of dictionaries becomes a dictionary of lists (unless only one dictionary in list)
if isinstance(container, list) and all(isinstance(element, dict) for element in container):
new_dictionary = {}
first, *rest = container
for key, value in first.items():
new_dictionary[key] = [flatten(value)] if rest else flatten(value)
for dictionary in rest:
for key, value in dictionary.items():
new_dictionary[key].append(value)
container = new_dictionary
# Any dictionary value that's a dictionary is expanded into original dictionary
if isinstance(container, dict):
new_dictionary = {}
for key, value in container.items():
if isinstance(value, dict):
for sub_key, sub_value in value.items():
new_dictionary[key + "." + sub_key] = sub_value
else:
new_dictionary[key] = value
container = new_dictionary
return container
OUTPUT
{
"RuntimeInMinutes": "21",
"EpisodeNumber": "21",
"Genres": [
"Animation"
],
"ReleaseDate": "2005-02-05",
"LanguageOfMetadata": "EN",
"Languages._Key": [
"CC",
"Primary"
],
"Languages.Value": [
[
"en"
],
[
"EN"
]
],
"Products.URL": [
"http://www.hulu.com/watch/217566",
"http://www.hulu.com/d/217566"
],
"Products.Rating": [
"TV-Y",
"TV-Y"
],
"Products.Currency": [
"USD",
"USD"
],
"Products.SUBSCRIPTION": [
"0.00",
"0.00"
],
"Products._Key": [
"US",
"DE"
],
"ReleaseYear": "2005",
"TVSeriesID": "5638#TVSeries",
"Type": "TVEpisode",
"Studio": "4K Media"
}
But this solution introduces a new apparent inconsistency:
'Languages.Value': ['en', 'EN'],
vs.
"Languages.Value": [["en"], ["EN"]],
However, I believe this is tied up with the Genres inconsistency mentioned earlier and the OP needs to define a consistent resolution.

Ajax1234's answer loses values of 'Genres' and 'Languages.Value'
Here's a bit more generic version:
def flatten_obj(data):
def flatten_item(item, keys):
if isinstance(item, list):
for v in item:
yield from flatten_item(v, keys)
elif isinstance(item, dict):
for k, v in item.items():
yield from flatten_item(v, keys+[k])
else:
yield '.'.join(keys), item
res = []
for item in data:
res_item = defaultdict(list)
for k, v in flatten_item(item, []):
res_item[k].append(v)
res.append({k: (v if len(v) > 1 else v[0]) for k, v in res_item.items()})
return res
P.S. "Genres" value is also flattened. It is either an inconsistency in the OP requirements or a separate problem which is not addressed in this answer.

Related

Remove nan value from dictionary and list using python

I want to remove nan value from dictonaires and list. If nan is the only value in case of phone_type work then remove full dictionary itself.
Input Data
dic = {'Customer_Number': 12345, 'Email': [{'Email_Type': 'Primary', 'Email': ['sa#ru.edu', nan]}]
,'Phone_Number': [{'Phone_Type': 'Mobile', 'Phone': [1217]}, {'Phone_Type': 'work', 'Phone': [nan]}]}
Expected Output
{'Customer_Number': 12345, 'Email': [{'Email_Type': 'Primary', 'Email': ['sam#rus.edu']}]
,'Phone_Number': [{'Phone_Type': 'Mobile', 'Phone': [1217]}]}
Code tried:
for i in range(0, len(dic)):
for j in dic[i][key]:
print("j key:",j)
print("j",j[value[1]])
if (pd.isna(j[value[1]])):
print("nan condition")
dic[i][value[1]].remove(j)
else:
null_val_dict_removal.append(j)
dic[i][key] = null_val_dict_removal
print("dict key", dic[i][key])
null_val_dict_removal = []
Getting error :
if (pd.isna(j[value[1]])):
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
You can create a recursive function to remove nan and empty lists/dictionaries:
import json
from numpy import nan, NaN, NAN
dic = {'Customer_Number': 92154246, 'Email': [{'Email_Type': 'Primary', 'Email': ['saman.zonouz#rutgers.edu', nan]}]
,'Phone_Number': [{'Phone_Type': 'Mobile', 'Phone': [12177218280.0]}, {'Phone_Type': 'work', 'Phone': [nan]}]}
# define which elements you want to remove:
to_be_deleted = [[], {}, "", None, nan]
def remove_empty_elements(jsonData):
if isinstance(jsonData, list):
jsonData = [new_elem for elem in jsonData
if (new_elem := remove_empty_elements(elem)) not in to_be_deleted]
elif isinstance(jsonData,dict):
jsonData = {key: new_value for key, value in jsonData.items()
if (new_value := remove_empty_elements(value)) not in to_be_deleted}
if len(jsonData) == 1:
return None
return jsonData
new_dic = remove_empty_elements(dic)
print(json.dumps(new_dic, indent=4))
Output:
{
"Customer_Number": 92154246,
"Email": [
{
"Email_Type": "Primary",
"Email": [
"saman.zonouz#rutgers.edu"
]
}
],
"Phone_Number": [
{
"Phone_Type": "Mobile",
"Phone": [
12177218280.0
]
}
]
}
Edit: for python < 3.8, remove the comprehension assignments from the function:
def remove_empty_elements(jsonData):
if isinstance(jsonData, list):
jsonData = [remove_empty_elements(elem) for elem in jsonData
if remove_empty_elements(elem) not in to_be_deleted]
elif isinstance(jsonData,dict):
jsonData = {key: remove_empty_elements(value) for key, value in jsonData.items()
if remove_empty_elements(value) not in to_be_deleted}
if len(jsonData) == 1:
return None
return jsonData

Python group by a object key gives errros

Does anyone know of a (itertools.groupby if possible too) way in Python to group an array of objects by an object key then create a new array of objects based on the grouping? For example, I have an array of car objects:
cars = [
{
'make': 'audi',
'model': 'r8',
'year': '2012'
}, {
'make': 'audi',
'model': 'rs5',
'year': '2013'
}, {
'make': 'ford',
'model': 'mustang',
'year': '2012'
}, {
'make': 'ford',
'model': 'fusion',
'year': '2015'
}, {
'make': 'kia',
'model': 'optima',
'year': '2012'
},
]
I want to make a new array of car objects that's grouped by make:
newCarsList = {
'audi': [
{
'model': 'r8',
'year': '2012'
}, {
'model': 'rs5',
'year': '2013'
},
],
'ford': [
{
'model': 'mustang',
'year': '2012'
}, {
'model': 'fusion',
'year': '2015'
}
],
'kia': [
{
'model': 'optima',
'year': '2012'
}
]
}
I tried with but this groupby:
def key_func(k):
return k['make']
newCarsList = []
for key, value in groupby(cars, key_func):
newCarsList.append({key: value})
print(newCarsList)
but this returns: [{'audi': <itertools._grouper object at 0x7f9b2c2bdd30>}, ...] and can't find how to fix.
Can someone help please?
A simple, mostly-functional solution:
from operator import itemgetter
from itertools import groupby
from functools import partial
def del_ret(d, key):
del d[key]
return d
dict(map(lambda k_v: (k_v[0], tuple(map(partial(del_ret, key="make"), k_v[1]))),
groupby(cars, itemgetter("make"))))
Change tuple to list to get identical output to what you want. But I assume you aren't going to modify those, so always use tuple in those circumstances…
The itertools._grouper is an iterable object. You can extract the values by iterating over it. For example, instead of appending {key: value}, you can pull the elements with a list comprehension: {key: [item for item in value]}.
It seems that your desired output is a dict though, not a list. You can get your pattern with
result = {}
for key, value in groupby(cars, key_func):
result[key] = [item for item in value]
for item in result[key]:
del item['make']
Edit: It's nicer to not add items that we're going to delete anyway. That can be done like this:
result = {}
for key, value in groupby(cars, key_func):
result[key] = [{subkey: subval for (subkey, subval)
in make.items() if subkey != 'make'}
for make in value]
df=pd.DataFrame(cars)
print(df)
for mytuple in df.itertuples():
print(mytuple)

Search for key inside nested lists inside nested dictionaries

The original post is Go through json line by line including unkown nested arrays and objects
I am trying to search for a specific value in all nested lists and nested dictionaries inside a JSON dictionary. The structure of the dictionary is not always known. The nested dictionary can have a nested list.
The key I'm looking for is Date and should not have String. Example key = Date is True but if key = DateString condition is not met.
Code:
def ConvertTimestamp(my_list_of_dicts: list):
for e in my_list_of_dicts:
# check top level keys whose values are not a list
keys_with_date = [k for k, v in e.items() if 'Date' in k and type(v) and 'String' not in k != list]
for k1 in keys_with_date:
e[k1] = 'found'
# check top level keys whose values are a list
keys_with_lists = [k for k, v in e.items() if type(v) == list]
for k1 in keys_with_lists:
for i, d in enumerate(e[k1]):
for k2, v in d.items():
if 'Date' in k2 and 'String' not in k2:
e[k1][i][k2] = 'found'
return my_list_of_dicts
Data
test_data = [{
"PurchaseOrderID": "aaff50c2-05d5-4943-9a37-421d1b326dc3",
"PurchaseOrderNumber": "PO-0001",
"DateString": "2020-06-04T00:00:00",
"Date": "2020-06-04T02:00:00.000000",
"DeliveryDateString": "2020-06-11T00:00:00",
"DeliveryDate": "2020-06-11T02:00:00.000000",
"DeliveryAddress": "",
"AttentionTo": "",
"Telephone": "",
"DeliveryInstructions": "",
"HasErrors": false,
"IsDiscounted": true,
"Reference": "",
"Type": "PURCHASEORDER",
"CurrencyRate": 1.0,
"CurrencyCode": "EUR",
"Contact": {
"ContactID": "31dcd998-026662967",
"ContactStatus": "ACTIVE",
"Name": "Test",
"FirstName": "",
"LastName": "",
"Addresses": [],
"Phones": [],
"UpdatedDateUTC": "/Date(1591272554130+0000)/",
"ContactGroups": [],
"DefaultCurrency": "EUR",
"ContactPersons": [],
"HasValidationErrors": false
},
"BrandingThemeID": "86a1c878-7b2ed792b224",
"Status": "DELETED",
"LineAmountTypes": "Exclusive",
"SubTotal": 1000.0,
"TotalTax": 0.0,
"Total": 1000.0,
"UpdatedDateUTC": "2020-06-04T12:14:26.527000",
"HasAttachments": false }]
Result should be
[{
"PurchaseOrderID": "aaff50c2-05d5-4943-9a37-421d1b326dc3",
"PurchaseOrderNumber": "PO-0001",
"DateString": "2020-06-04T00:00:00",
"Date": "2020-06-04T02:00:00.000000",
"DeliveryDateString": "2020-06-11T00:00:00",
"DeliveryDate": "2020-06-11T02:00:00.000000",
"DeliveryAddress": "",
"AttentionTo": "",
"Telephone": "",
"DeliveryInstructions": "",
"HasErrors": false,
"IsDiscounted": true,
"Reference": "",
"Type": "PURCHASEORDER",
"CurrencyRate": 1.0,
"CurrencyCode": "EUR",
"Contact": {
"ContactID": "31dcd998-026662967",
"ContactStatus": "ACTIVE",
"Name": "Test",
"FirstName": "",
"LastName": "",
"Addresses": [],
"Phones": [],
"UpdatedDateUTC": "2020-06-03T09:55:30.000000",
"ContactGroups": [],
"DefaultCurrency": "EUR",
"ContactPersons": [],
"HasValidationErrors": false
},
"BrandingThemeID": "86a1c878-7b2ed792b224",
"Status": "DELETED",
"LineAmountTypes": "Exclusive",
"SubTotal": 1000.0,
"TotalTax": 0.0,
"Total": 1000.0,
"UpdatedDateUTC": "2020-06-04T12:14:26.527000",
"HasAttachments": false }]
Since you don't know what the structure of the dictionaries are, it could be under an arbitrary number of levels.
Also, the expected result didn't really show what you want to do with the dictionary once found, so I've just added those dictionaries to a list.
Recursion helps in such cases.
def search_dict(d, results):
for k,v in d.items():
if 'Date' in k and 'String' not in k:
# Do what you want with `d` here.
# Your "Result should be" didn't really explain the
# found part, but if it gets here it means you found it.
# Appended to results as we want to continue searching for more.
# Appending (k,v,d) where d is the dictionary containing
# this key and value, incase you wanted that too.
# Adjust this accordingly.
results.append((k,v,d))
if isinstance(v, dict):
search_dict(v, results)
if isinstance(v, list):
search_list(v, results)
def search_list(l, results):
for item in l:
if not isinstance(item, dict):
continue # don't care about things that aren't dictionaries
if isinstance(item, list):
search_list(item, results)
if isinstance(item, dict):
search_dict(item, results)
def ConvertTimestamp(my_list_of_dicts: list):
results = []
search_list(my_list_of_dicts, results)
return results
Here is how you can recurse through the object, making the updates.
For sake of generality, the recursive function takes two externally defined callables an addition to the object being recursed:
a "key tester" function that should take a key (string) and return a boolean, which is used to decide which keys are to have their values updated
a "replacer" function that should take a value and return the new value
from datetime import date
from pprint import pprint
from copy import deepcopy
import re
def do_replacements(obj, key_tester, replacer):
"""
recursing through the nested list/dict structure,
and wherever key_tester(key) yields True,
use replacer function to generate the new value
"""
if isinstance(obj, dict):
for k, v in obj.items():
if key_tester(k):
obj[k] = replacer(v)
else:
do_replacements(v, key_tester, replacer)
elif isinstance(obj, list):
for item in obj:
do_replacements(item, key_tester, replacer)
def fix_time(ts):
"""
replace the timestamp if it fits a particular pattern
(based on code in original question)
"""
pattern = '\(|\)'
if not re.search(pattern, ts):
return ts
format = '%Y-%m-%dT%H:%M:%S.%f'
ts_utc = re.split(pattern, ts)[1]
ts_utc = ts_utc[:ts_utc.find("+")]
return date.fromtimestamp(float(ts_utc)/1000).strftime(format)
test_data = [{'PurchaseOrderID': 'aaff50c2-05d5-4943-9a37-421d1b326dc3', 'PurchaseOrderNumber': 'PO-0001', 'DateString': '2020-06-04T00:00:00', 'Date': '2020-06-04T02:00:00.000000', 'DeliveryDateString': '2020-06-11T00:00:00', 'DeliveryDate': '2020-06-11T02:00:00.000000', 'DeliveryAddress': '', 'AttentionTo': '', 'Telephone': '', 'DeliveryInstructions': '', 'HasErrors': False, 'IsDiscounted': True, 'Reference': '', 'Type': 'PURCHASEORDER', 'CurrencyRate': 1.0, 'CurrencyCode': 'EUR', 'Contact': {'ContactID': '31dcd998-026662967', 'ContactStatus': 'ACTIVE', 'Name': 'Test', 'FirstName': '', 'LastName': '', 'Addresses': [], 'Phones': [], 'UpdatedDateUTC': '/Date(1591272554130+0000)/', 'ContactGroups': [], 'DefaultCurrency': 'EUR', 'ContactPersons': [], 'HasValidationErrors': False}, 'BrandingThemeID': '86a1c878-7b2ed792b224', 'Status': 'DELETED', 'LineAmountTypes': 'Exclusive', 'SubTotal': 1000.0, 'TotalTax': 0.0, 'Total': 1000.0, 'UpdatedDateUTC': '2020-06-04T12:14:26.527000', 'HasAttachments': False}]
func = lambda k: "Date" in k and "String" not in k
output = deepcopy(test_data)
do_replacements(output, func, fix_time)
pprint(output)
gives:
[{'AttentionTo': '',
'BrandingThemeID': '86a1c878-7b2ed792b224',
'Contact': {'Addresses': [],
'ContactGroups': [],
'ContactID': '31dcd998-026662967',
'ContactPersons': [],
'ContactStatus': 'ACTIVE',
'DefaultCurrency': 'EUR',
'FirstName': '',
'HasValidationErrors': False,
'LastName': '',
'Name': 'Test',
'Phones': [],
'UpdatedDateUTC': '2020-06-04T00:00:00.000000'},
'CurrencyCode': 'EUR',
'CurrencyRate': 1.0,
'Date': '2020-06-04T02:00:00.000000',
'DateString': '2020-06-04T00:00:00',
'DeliveryAddress': '',
'DeliveryDate': '2020-06-11T02:00:00.000000',
'DeliveryDateString': '2020-06-11T00:00:00',
'DeliveryInstructions': '',
'HasAttachments': False,
'HasErrors': False,
'IsDiscounted': True,
'LineAmountTypes': 'Exclusive',
'PurchaseOrderID': 'aaff50c2-05d5-4943-9a37-421d1b326dc3',
'PurchaseOrderNumber': 'PO-0001',
'Reference': '',
'Status': 'DELETED',
'SubTotal': 1000.0,
'Telephone': '',
'Total': 1000.0,
'TotalTax': 0.0,
'Type': 'PURCHASEORDER',
'UpdatedDateUTC': '2020-06-04T12:14:26.527000'}]
(Note: the output shown here is the pretty-printed python object, rather than JSON, although similar.)

Updating only the value n-nested dictionary

I'm trying update update the value of a nested dictionary within a for loop, so it doesn't generate a new dictionary every time, I'm pretty new to traversing nested structures so bear with me. Each value is located in a list:
My list:
id_list = ['asf245', 'kjb456', '235sdg']
My dictionary:
temp = {"ent": {"type": "IDN", "attributes": [{"ent": {"id": "abc123"}}], "limit": 20}}
Ideally I would append each update dictionary to a dataframe and then update it with the new value:
Ideal output:
temp = {"ent": {"type": "IDN", "attributes": [{"ent": {"id": "asf245"}}], "limit": 20}}
temp = {"ent": {"type": "IDN", "attributes": [{"ent": {"id": "kjb456"}}], "limit": 20}}
temp = {"ent": {"type": "IDN", "attributes": [{"ent": {"id": "235sdg"}}], "limit": 20}}
Where temp gets appended to a dataframe every iteration then gets overwritten with the new value:
I've tried:
import collections
def update(d, u):
for k, v in u.items():
if isinstance(v, collections.Mapping):
d[k] = update(d.get(k, {}), v)
else:
d[k] = v
return d
print(update(temp, 'Apples')) <- "run this through a loop"
But running this through a visualizer I can see that it doesn't go deep enough, and I don't truly have a good understanding of it, if anyone could explain it that would be awesome.
Here. The result of the function is a list of dicts (with modified id)
import copy
def clone_dict(d, ids):
result = []
for id in ids:
clone = copy.deepcopy(d)
clone['ent']['attributes'][0]['ent']['id'] = id
result.append(clone)
return result
temp = {"ent": {"type": "IDN", "attributes": [{"ent": {"id": "abc123"}}], "limit": 20}}
ids = ['x', 'y', 'z']
print(clone_dict(temp, ids))
output
[{'ent': {'attributes': [{'ent': {'id': 'x'}}], 'type': 'IDN', 'limit': 20}}, {'ent': {'attributes': [{'ent': {'id': 'y'}}], 'type': 'IDN', 'limit': 20}}, {'ent': {'attributes': [{'ent': {'id': 'z'}}], 'type': 'IDN', 'limit': 20}}]
A generic approach below
import copy
def clone_dict(src_dict, values_to_inject, path_elements):
""" Clone a dict N times and replace a nested field
:param src_dict: Used as 'template'
:param values_to_inject: List of values to inject
:param path_elements: List of path elements. Used in dict navigation
:return: A list of cloned modified dicts
"""
result = []
for value in values_to_inject:
clone = copy.deepcopy(src_dict)
temp = clone[path_elements[0]]
for path_element in path_elements[1:-1]:
temp = temp[path_element]
temp[path_elements[-1]] = value
result.append(clone)
return result
src_dict = {"ent": {"type": "IDN", "attributes": [{"ent": {"id": "abc123"}}], "limit": 20}}
values_to_inject = ['x', 'y', 'z']
path_elements = ['ent', 'attributes', 0, 'ent', 'id']
print(clone_dict(src_dict, values_to_inject, path_elements))
Here is a more generic solution involving recursion. It takes a dictionary to update, the key to update, and the value that you want to update.
def update(to_update, key, val):
for k, v in to_update.items():
if k == key:
to_update[k] = val
else:
if isinstance(v, dict):
update(v, key, val)
elif isinstance(v, list):
for item in v:
if isinstance(item, (dict, list)):
update(item, key, val)
else:
continue
else:
continue
return to_update
for id_ in id_list:
new = update(temp, 'id', id_)
print(new)
{'ent': {'type': 'IDN', 'attributes': [{'ent': {'id': 'asf245'}}], 'limit': 20}}
{'ent': {'type': 'IDN', 'attributes': [{'ent': {'id': 'kjb456'}}], 'limit': 20}}
{'ent': {'type': 'IDN', 'attributes': [{'ent': {'id': '235sdg'}}], 'limit': 20}}

Exclude empty/null values from JSON serialization

I am serializing multiple nested dictionaries to JSON using Python with simplejson.
Is there any way to automatically exclude empty/null values?
For example, serialize this:
{
"dict1" : {
"key1" : "value1",
"key2" : None
}
}
to
{
"dict1" : {
"key1" : "value1"
}
}
When using Jackson with Java you can use Inclusion.NON_NULL to do this. Is there a simplejson equivalent?
def del_none(d):
"""
Delete keys with the value ``None`` in a dictionary, recursively.
This alters the input so you may wish to ``copy`` the dict first.
"""
# For Python 3, write `list(d.items())`; `d.items()` won’t work
# For Python 2, write `d.items()`; `d.iteritems()` won’t work
for key, value in list(d.items()):
if value is None:
del d[key]
elif isinstance(value, dict):
del_none(value)
return d # For convenience
Sample usage:
>>> mydict = {'dict1': {'key1': 'value1', 'key2': None}}
>>> print(del_none(mydict.copy()))
{'dict1': {'key1': 'value1'}}
Then you can feed that to json.
My Python3 version of this has the benefit of not changing the input, as well as recursion into dictionaries nested in lists:
def clean_nones(value):
"""
Recursively remove all None values from dictionaries and lists, and returns
the result as a new dictionary or list.
"""
if isinstance(value, list):
return [clean_nones(x) for x in value if x is not None]
elif isinstance(value, dict):
return {
key: clean_nones(val)
for key, val in value.items()
if val is not None
}
else:
return value
For example:
a = {
"a": None,
"b": "notNone",
"c": ["hello", None, "goodbye"],
"d": [
{
"a": "notNone",
"b": None,
"c": ["hello", None, "goodbye"],
},
{
"a": "notNone",
"b": None,
"c": ["hello", None, "goodbye"],
}
]
}
print(clean_nones(a))
results in this:
{
'b': 'notNone',
'c': ['hello', 'goodbye'],
'd': [
{
'a': 'notNone',
'c': ['hello', 'goodbye']
},
{
'a': 'notNone',
'c': ['hello', 'goodbye']
}
]
}
>>> def cleandict(d):
... if not isinstance(d, dict):
... return d
... return dict((k,cleandict(v)) for k,v in d.iteritems() if v is not None)
...
>>> mydict = dict(dict1=dict(key1='value1', key2=None))
>>> print cleandict(mydict)
{'dict1': {'key1': 'value1'}}
>>>
I don't like using del in general, changing the existing dictionary can have subtle effects depending on how they are created. Creating new dictionaries with None removed prevents all side effect.
You can try this approach. In my case (I use python 3), it works well.
def to_json(self):
return json.dumps(self,
default=lambda o: dict((key, value) for key, value in o.__dict__.items() if value),
indent=4,
allow_nan=False)
This solution is correction of the one above from #eric which does not handle list type corectly.
Values in canonical JSON dictionary can be of one of following 3 types:
dictionary
list
value type (string, integer or floating point)
Note: Assumption is that we are dealing here with canonical JSON dictionary which can really contain only above mentioned types. If dictionary contains other types then ones mentioned above (e.g. tuples, custom classes, ...), then this solution won't work as expected.
The essential difference between this solution (below) and the original one from #eric is that list can contain elements of dictionary type from iside of which we want to drop elements with None value.
def cleandict(d):
if isinstance(d, dict):
return {k: cleandict(v) for k, v in d.items() if v is not None}
elif isinstance(d, list):
return [cleandict(v) for v in d]
else:
return d
Note: Please keep in mind that we must NOT remove None elements from the list since it would affect structural integrity of the list data. If some ( or all) of list elements have None value, they shall remain listed in the list structure as they were in order to preserve original structural meaning/integrity of the list.
def excludeNone(d):
for k in list(d):
if k in d:
if type(d[k]) == dict:
excludeNone(d[k])
if not d[k]:
del d[k]
It works for me:
When dictionary has dict/list/tuple values ....
for example it is my object:
dict_obj = {
'inline_keyboard': [
[
{'text': '0-0', 'url': None, 'login_url': None, 'callback_data': '0-0', 'switch_inline_query': None},
{'text': '0-1', 'url': None, 'login_url': None, 'callback_data': '0-1', 'switch_inline_query': None}
],
[
{'text': '1-0', 'url': None, 'login_url': None, 'callback_data': '1-0', 'switch_inline_query': None},
{'text': '1-1', 'url': None, 'login_url': None, 'callback_data': '1-1', 'switch_inline_query': None}
],
[
{'text': '2-0', 'url': None, 'login_url': None, 'callback_data': '2-0', 'switch_inline_query': None}
]
]
}
I wrote this function:
def delete_none_values(obj):
if isinstance(obj, dict):
for k, v in list(obj.items()):
if v is None:
del obj[k]
elif isinstance(v, dict):
delete_none_values(v)
elif isinstance(v, (list, tuple)):
for _ in v:
delete_none_values(_)
elif isinstance(obj, (list, tuple)):
for _ in obj:
delete_none_values(_)
return obj
And then when use this fuction:
from json import dumps
print(
dumps(
delete_none_values(dict_obj.copy()),
indent=2
)
)
output is:
{
"inline_keyboard": [
[
{"text": "0-0", "callback_data": "0-0"},
{"text": "0-1", "callback_data": "0-1"}
],
[
{"text": "1-0", "callback_data": "1-0"},
{"text": "1-1", "callback_data": "1-1"}
],
[
{"text": "2-0", "callback_data": "2-0"}
]
]
}
Could you maybe remain 'url' if it has value in one place and remove it if it none on another place?
'inline_keyboard': [
[
{'text': '0-0', 'url': 'someValue', 'login_url': None, 'callback_data': '0-0', 'switch_inline_query': None},
{'text': '0-1', 'url': None, 'login_url': None, 'callback_data': '0-1', 'switch_inline_query': None}
],
[
{'text': '1-0', 'url': None, 'login_url': None, 'callback_data': '1-0', 'switch_inline_query': None},
{'text': '1-1', 'url': None, 'login_url': None, 'callback_data': '1-1', 'switch_inline_query': None}
],
[
{'text': '2-0', 'url': None, 'login_url': None, 'callback_data': '2-0', 'switch_inline_query': None}
]
]

Categories

Resources