Related
The original post is Go through json line by line including unkown nested arrays and objects
I am trying to search for a specific value in all nested lists and nested dictionaries inside a JSON dictionary. The structure of the dictionary is not always known. The nested dictionary can have a nested list.
The key I'm looking for is Date and should not have String. Example key = Date is True but if key = DateString condition is not met.
Code:
def ConvertTimestamp(my_list_of_dicts: list):
for e in my_list_of_dicts:
# check top level keys whose values are not a list
keys_with_date = [k for k, v in e.items() if 'Date' in k and type(v) and 'String' not in k != list]
for k1 in keys_with_date:
e[k1] = 'found'
# check top level keys whose values are a list
keys_with_lists = [k for k, v in e.items() if type(v) == list]
for k1 in keys_with_lists:
for i, d in enumerate(e[k1]):
for k2, v in d.items():
if 'Date' in k2 and 'String' not in k2:
e[k1][i][k2] = 'found'
return my_list_of_dicts
Data
test_data = [{
"PurchaseOrderID": "aaff50c2-05d5-4943-9a37-421d1b326dc3",
"PurchaseOrderNumber": "PO-0001",
"DateString": "2020-06-04T00:00:00",
"Date": "2020-06-04T02:00:00.000000",
"DeliveryDateString": "2020-06-11T00:00:00",
"DeliveryDate": "2020-06-11T02:00:00.000000",
"DeliveryAddress": "",
"AttentionTo": "",
"Telephone": "",
"DeliveryInstructions": "",
"HasErrors": false,
"IsDiscounted": true,
"Reference": "",
"Type": "PURCHASEORDER",
"CurrencyRate": 1.0,
"CurrencyCode": "EUR",
"Contact": {
"ContactID": "31dcd998-026662967",
"ContactStatus": "ACTIVE",
"Name": "Test",
"FirstName": "",
"LastName": "",
"Addresses": [],
"Phones": [],
"UpdatedDateUTC": "/Date(1591272554130+0000)/",
"ContactGroups": [],
"DefaultCurrency": "EUR",
"ContactPersons": [],
"HasValidationErrors": false
},
"BrandingThemeID": "86a1c878-7b2ed792b224",
"Status": "DELETED",
"LineAmountTypes": "Exclusive",
"SubTotal": 1000.0,
"TotalTax": 0.0,
"Total": 1000.0,
"UpdatedDateUTC": "2020-06-04T12:14:26.527000",
"HasAttachments": false }]
Result should be
[{
"PurchaseOrderID": "aaff50c2-05d5-4943-9a37-421d1b326dc3",
"PurchaseOrderNumber": "PO-0001",
"DateString": "2020-06-04T00:00:00",
"Date": "2020-06-04T02:00:00.000000",
"DeliveryDateString": "2020-06-11T00:00:00",
"DeliveryDate": "2020-06-11T02:00:00.000000",
"DeliveryAddress": "",
"AttentionTo": "",
"Telephone": "",
"DeliveryInstructions": "",
"HasErrors": false,
"IsDiscounted": true,
"Reference": "",
"Type": "PURCHASEORDER",
"CurrencyRate": 1.0,
"CurrencyCode": "EUR",
"Contact": {
"ContactID": "31dcd998-026662967",
"ContactStatus": "ACTIVE",
"Name": "Test",
"FirstName": "",
"LastName": "",
"Addresses": [],
"Phones": [],
"UpdatedDateUTC": "2020-06-03T09:55:30.000000",
"ContactGroups": [],
"DefaultCurrency": "EUR",
"ContactPersons": [],
"HasValidationErrors": false
},
"BrandingThemeID": "86a1c878-7b2ed792b224",
"Status": "DELETED",
"LineAmountTypes": "Exclusive",
"SubTotal": 1000.0,
"TotalTax": 0.0,
"Total": 1000.0,
"UpdatedDateUTC": "2020-06-04T12:14:26.527000",
"HasAttachments": false }]
Since you don't know what the structure of the dictionaries are, it could be under an arbitrary number of levels.
Also, the expected result didn't really show what you want to do with the dictionary once found, so I've just added those dictionaries to a list.
Recursion helps in such cases.
def search_dict(d, results):
for k,v in d.items():
if 'Date' in k and 'String' not in k:
# Do what you want with `d` here.
# Your "Result should be" didn't really explain the
# found part, but if it gets here it means you found it.
# Appended to results as we want to continue searching for more.
# Appending (k,v,d) where d is the dictionary containing
# this key and value, incase you wanted that too.
# Adjust this accordingly.
results.append((k,v,d))
if isinstance(v, dict):
search_dict(v, results)
if isinstance(v, list):
search_list(v, results)
def search_list(l, results):
for item in l:
if not isinstance(item, dict):
continue # don't care about things that aren't dictionaries
if isinstance(item, list):
search_list(item, results)
if isinstance(item, dict):
search_dict(item, results)
def ConvertTimestamp(my_list_of_dicts: list):
results = []
search_list(my_list_of_dicts, results)
return results
Here is how you can recurse through the object, making the updates.
For sake of generality, the recursive function takes two externally defined callables an addition to the object being recursed:
a "key tester" function that should take a key (string) and return a boolean, which is used to decide which keys are to have their values updated
a "replacer" function that should take a value and return the new value
from datetime import date
from pprint import pprint
from copy import deepcopy
import re
def do_replacements(obj, key_tester, replacer):
"""
recursing through the nested list/dict structure,
and wherever key_tester(key) yields True,
use replacer function to generate the new value
"""
if isinstance(obj, dict):
for k, v in obj.items():
if key_tester(k):
obj[k] = replacer(v)
else:
do_replacements(v, key_tester, replacer)
elif isinstance(obj, list):
for item in obj:
do_replacements(item, key_tester, replacer)
def fix_time(ts):
"""
replace the timestamp if it fits a particular pattern
(based on code in original question)
"""
pattern = '\(|\)'
if not re.search(pattern, ts):
return ts
format = '%Y-%m-%dT%H:%M:%S.%f'
ts_utc = re.split(pattern, ts)[1]
ts_utc = ts_utc[:ts_utc.find("+")]
return date.fromtimestamp(float(ts_utc)/1000).strftime(format)
test_data = [{'PurchaseOrderID': 'aaff50c2-05d5-4943-9a37-421d1b326dc3', 'PurchaseOrderNumber': 'PO-0001', 'DateString': '2020-06-04T00:00:00', 'Date': '2020-06-04T02:00:00.000000', 'DeliveryDateString': '2020-06-11T00:00:00', 'DeliveryDate': '2020-06-11T02:00:00.000000', 'DeliveryAddress': '', 'AttentionTo': '', 'Telephone': '', 'DeliveryInstructions': '', 'HasErrors': False, 'IsDiscounted': True, 'Reference': '', 'Type': 'PURCHASEORDER', 'CurrencyRate': 1.0, 'CurrencyCode': 'EUR', 'Contact': {'ContactID': '31dcd998-026662967', 'ContactStatus': 'ACTIVE', 'Name': 'Test', 'FirstName': '', 'LastName': '', 'Addresses': [], 'Phones': [], 'UpdatedDateUTC': '/Date(1591272554130+0000)/', 'ContactGroups': [], 'DefaultCurrency': 'EUR', 'ContactPersons': [], 'HasValidationErrors': False}, 'BrandingThemeID': '86a1c878-7b2ed792b224', 'Status': 'DELETED', 'LineAmountTypes': 'Exclusive', 'SubTotal': 1000.0, 'TotalTax': 0.0, 'Total': 1000.0, 'UpdatedDateUTC': '2020-06-04T12:14:26.527000', 'HasAttachments': False}]
func = lambda k: "Date" in k and "String" not in k
output = deepcopy(test_data)
do_replacements(output, func, fix_time)
pprint(output)
gives:
[{'AttentionTo': '',
'BrandingThemeID': '86a1c878-7b2ed792b224',
'Contact': {'Addresses': [],
'ContactGroups': [],
'ContactID': '31dcd998-026662967',
'ContactPersons': [],
'ContactStatus': 'ACTIVE',
'DefaultCurrency': 'EUR',
'FirstName': '',
'HasValidationErrors': False,
'LastName': '',
'Name': 'Test',
'Phones': [],
'UpdatedDateUTC': '2020-06-04T00:00:00.000000'},
'CurrencyCode': 'EUR',
'CurrencyRate': 1.0,
'Date': '2020-06-04T02:00:00.000000',
'DateString': '2020-06-04T00:00:00',
'DeliveryAddress': '',
'DeliveryDate': '2020-06-11T02:00:00.000000',
'DeliveryDateString': '2020-06-11T00:00:00',
'DeliveryInstructions': '',
'HasAttachments': False,
'HasErrors': False,
'IsDiscounted': True,
'LineAmountTypes': 'Exclusive',
'PurchaseOrderID': 'aaff50c2-05d5-4943-9a37-421d1b326dc3',
'PurchaseOrderNumber': 'PO-0001',
'Reference': '',
'Status': 'DELETED',
'SubTotal': 1000.0,
'Telephone': '',
'Total': 1000.0,
'TotalTax': 0.0,
'Type': 'PURCHASEORDER',
'UpdatedDateUTC': '2020-06-04T12:14:26.527000'}]
(Note: the output shown here is the pretty-printed python object, rather than JSON, although similar.)
I have the following array of dicts (there's only one dict):
[{
'RuntimeInMinutes': '21',
'EpisodeNumber': '21',
'Genres': ['Animation'],
'ReleaseDate': '2005-02-05',
'LanguageOfMetadata': 'EN',
'Languages': [{
'_Key': 'CC',
'Value': ['en']
}, {
'_Key': 'Primary',
'Value': ['EN']
}],
'Products': [{
'URL': 'http://www.hulu.com/watch/217566',
'Rating': 'TV-Y',
'Currency': 'USD',
'SUBSCRIPTION': '0.00',
'_Key': 'US'
}, {
'URL': 'http://www.hulu.com/d/217566',
'Rating': 'TV-Y',
'Currency': 'USD',
'SUBSCRIPTION': '0.00',
'_Key': 'DE'
}],
'ReleaseYear': '2005',
'TVSeriesID': '5638#TVSeries',
'Type': 'TVEpisode',
'Studio': '4K Media'
}]
I would like to flatten the dict as follows:
[{
'RuntimeInMinutes': '21',
'EpisodeNumber': '21',
'Genres': ['Animation'],
'ReleaseDate': '2005-02-05',
'LanguageOfMetadata': 'EN',
'Languages._Key': ['CC', 'Primary'],
'Languages.Value': ['en', 'EN'],
'Products.URL': ['http://www.hulu.com/watch/217566', 'http://www.hulu.com/d/217566'],
'Products.Rating': ['TV-Y', 'TV-Y'],
'Products.Currency': ['USD', 'USD'],
'Products.SUBSCRIPTION': ['0.00', '0.00'],
'Products._Key': ['US', 'DE'],
'ReleaseYear': '2005',
'TVSeriesID': '5638#TVSeries',
'Type': 'TVEpisode',
'Studio': '4K Media'
}]
In other words, anytime a dict is encountered, it need to convert to either a string, number, or list.
What I currently have is something along the lines of the following, which uses a while loop to iterate through all the subpaths of the json.
while True:
for key in copy(keys):
val = get_sub_object_from_path(obj, key)
if isinstance(val, dict):
FLAT_OBJ[key.replace('/', '.')] = val
else:
keys.extend(os.path.join(key, _nextkey) for _nextkey in val.keys())
keys.remove(key)
if (not keys) or (n > 5):
break
else:
n += 1
continue
You can use recursion with a generator:
from collections import defaultdict
_d = [{'RuntimeInMinutes': '21', 'EpisodeNumber': '21', 'Genres': ['Animation'], 'ReleaseDate': '2005-02-05', 'LanguageOfMetadata': 'EN', 'Languages': [{'_Key': 'CC', 'Value': ['en']}, {'_Key': 'Primary', 'Value': ['EN']}], 'Products': [{'URL': 'http://www.hulu.com/watch/217566', 'Rating': 'TV-Y', 'Currency': 'USD', 'SUBSCRIPTION': '0.00', '_Key': 'US'}, {'URL': 'http://www.hulu.com/d/217566', 'Rating': 'TV-Y', 'Currency': 'USD', 'SUBSCRIPTION': '0.00', '_Key': 'DE'}], 'ReleaseYear': '2005', 'TVSeriesID': '5638#TVSeries', 'Type': 'TVEpisode', 'Studio': '4K Media'}]
def get_vals(d, _path = []):
for a, b in getattr(d, 'items', lambda :{})():
if isinstance(b, list) and all(isinstance(i, dict) or isinstance(i, list) for i in b):
for c in b:
yield from get_vals(c, _path+[a])
elif isinstance(b, dict):
yield from get_vals(b, _path+[a])
else:
yield ['.'.join(_path+[a]), b]
results = [i for b in _d for i in get_vals(b)]
_c = defaultdict(list)
for a, b in results:
_c[a].append(b)
result = [{a:list(b) if len(b) > 1 else b[0] for a, b in _c.items()}]
import json
print(json.dumps(result, indent=4))
Output:
[
{
"RuntimeInMinutes": "21",
"EpisodeNumber": "21",
"Genres": [
"Animation"
],
"ReleaseDate": "2005-02-05",
"LanguageOfMetadata": "EN",
"Languages._Key": [
"CC",
"Primary"
],
"Languages.Value": [
[
"en"
],
[
"EN"
]
],
"Products.URL": [
"http://www.hulu.com/watch/217566",
"http://www.hulu.com/d/217566"
],
"Products.Rating": [
"TV-Y",
"TV-Y"
],
"Products.Currency": [
"USD",
"USD"
],
"Products.SUBSCRIPTION": [
"0.00",
"0.00"
],
"Products._Key": [
"US",
"DE"
],
"ReleaseYear": "2005",
"TVSeriesID": "5638#TVSeries",
"Type": "TVEpisode",
"Studio": "4K Media"
}
]
Edit: wrapping solution in outer function:
def flatten_obj(data):
def get_vals(d, _path = []):
for a, b in getattr(d, 'items', lambda :{})():
if isinstance(b, list) and all(isinstance(i, dict) or isinstance(i, list) for i in b):
for c in b:
yield from get_vals(c, _path+[a])
elif isinstance(b, dict):
yield from get_vals(b, _path+[a])
else:
yield ['.'.join(_path+[a]), b]
results = [i for b in data for i in get_vals(b)]
_c = defaultdict(list)
for a, b in results:
_c[a].append(b)
return [{a:list(b) if len(b) > 1 else b[0] for a, b in _c.items()}]
EDIT
This now appears to be fixed:
As #panda-34 correctly points out (+1), the currently accepted
solution loses data, specifically Genres and Languages.Value when
you run the posted code.
Unfortunately, #panda-34's code modifies Genres:
'Genres': 'Animation',
rather than leaving it alone as in the OP's example:
'Genres': ['Animation'],
Below's my solution which attacks the problem a different way. None of the keys in the original data contains a dictionary as a value, only non-containers or lists (e.g. lists of dictionaries). So a primary a list of dictionaries will becomes a dictionary of lists (or just a plain dictionary if there's only one dictionary in the list.) Once we've done that, then any value that's now a dictionary is expanded back into the original data structure:
def flatten(container):
# A list of dictionaries becomes a dictionary of lists (unless only one dictionary in list)
if isinstance(container, list) and all(isinstance(element, dict) for element in container):
new_dictionary = {}
first, *rest = container
for key, value in first.items():
new_dictionary[key] = [flatten(value)] if rest else flatten(value)
for dictionary in rest:
for key, value in dictionary.items():
new_dictionary[key].append(value)
container = new_dictionary
# Any dictionary value that's a dictionary is expanded into original dictionary
if isinstance(container, dict):
new_dictionary = {}
for key, value in container.items():
if isinstance(value, dict):
for sub_key, sub_value in value.items():
new_dictionary[key + "." + sub_key] = sub_value
else:
new_dictionary[key] = value
container = new_dictionary
return container
OUTPUT
{
"RuntimeInMinutes": "21",
"EpisodeNumber": "21",
"Genres": [
"Animation"
],
"ReleaseDate": "2005-02-05",
"LanguageOfMetadata": "EN",
"Languages._Key": [
"CC",
"Primary"
],
"Languages.Value": [
[
"en"
],
[
"EN"
]
],
"Products.URL": [
"http://www.hulu.com/watch/217566",
"http://www.hulu.com/d/217566"
],
"Products.Rating": [
"TV-Y",
"TV-Y"
],
"Products.Currency": [
"USD",
"USD"
],
"Products.SUBSCRIPTION": [
"0.00",
"0.00"
],
"Products._Key": [
"US",
"DE"
],
"ReleaseYear": "2005",
"TVSeriesID": "5638#TVSeries",
"Type": "TVEpisode",
"Studio": "4K Media"
}
But this solution introduces a new apparent inconsistency:
'Languages.Value': ['en', 'EN'],
vs.
"Languages.Value": [["en"], ["EN"]],
However, I believe this is tied up with the Genres inconsistency mentioned earlier and the OP needs to define a consistent resolution.
Ajax1234's answer loses values of 'Genres' and 'Languages.Value'
Here's a bit more generic version:
def flatten_obj(data):
def flatten_item(item, keys):
if isinstance(item, list):
for v in item:
yield from flatten_item(v, keys)
elif isinstance(item, dict):
for k, v in item.items():
yield from flatten_item(v, keys+[k])
else:
yield '.'.join(keys), item
res = []
for item in data:
res_item = defaultdict(list)
for k, v in flatten_item(item, []):
res_item[k].append(v)
res.append({k: (v if len(v) > 1 else v[0]) for k, v in res_item.items()})
return res
P.S. "Genres" value is also flattened. It is either an inconsistency in the OP requirements or a separate problem which is not addressed in this answer.
I would like to know how I can change a value in a dictionary using list of keys and 1 value
my_dict = {
'bob': {
'name': {'first': 'FirstName', 'last': 'LastName'},
'job': 'Developer'
}
}
string1 = 'bob.name.first=Bob'
string1 = string.split('=')
string2 = string1[0].split('.')
string2.append(string[1])
Here I end up with a list of 4 items, the first 3 are keys and the last is the value.
How can I use this given list to change the value in my_dict considering that the given list keys number can be changed for example if I want to change bob.job=QA
You can write:
string1 = 'bob.name.first=Bob'
string1,string2 = string1.split('=')
string1 = string1.split('.')
my_dict[string1[0]][string1[1]][string1[2]] = string2
I suppose the following function is what you are looking for, it works with any number of keys and creates intermediates dictionaries if not exist yet.
d = {
'bob': {
'name': {
'first': 'FirstName',
'last': 'LastName'
},
'job': 'Developer'
}
}
def update_dict_by_expr(d, expr):
keys_value = expr.split('=')
keys = keys_value[0].split('.')
value = keys_value[1]
item = d
while len(keys) > 1:
key = keys.pop(0)
if key not in item:
item[key] = {}
item = item[key]
key = keys[0]
item[key] = value
print(d)
update_dict_by_expr(d, 'bob.name.first=Bob Junior')
update_dict_by_expr(d, 'bob.name.birth.date=01/01/2017')
update_dict_by_expr(d, 'bob.name.birth.place=NYC')
print(d)
You want a dict with keys that can be accessed as attributes. You can achieve that by subclassing dict class, and add support for your need. I think this is more pythonic solution as it is much more intuative:
class MyDict(dict):
def __getattr__(self, attr):
return self[attr] if attr in self.keys() else None
def __setattr__(self, attr, value):
self[attr] = value
my_dict = MyDict({
'bob': MyDict(
{'name':
MyDict({'first': 'FirstName', 'last': 'LastName'}),
'job':'Developer'})})
>>> my_dict.bob
{'job': 'Developer', 'name': {'last': 'LastName', 'first': 'FirstName'}}
>>> my_dict.bob.job
'Developer'
>>> my_dict.bob.name
{'last': 'LastName', 'first': 'FirstName'}
It does require some overhead, as you will need to build your dicts based on MyDict. Regular dicts won't work if added to this dict.
This supports setting a new value as well:
>>> my_dict.bob.job = 'QA'
>>> my_dict.bob.job
'QA'
If you want to update Bob's job you can access it using my_dict['bob']['job']
my_dict = {
'bob': {
'name': {'first': 'FirstName', 'last': 'LastName'},
'job': 'Developer'
}
}
my_dict['bob']['job'] = 'QA'
print(my_dict)
>> {'bob': {'name': {'last': 'LastName', 'first': 'FirstName'}, 'job': 'QA'}}
or by splitting your string:
my_dict = {
'bob': {
'name': {'first': 'FirstName', 'last': 'LastName'},
'job': 'Developer'
}
}
bobjob_key_value = 'bob.job=QA'
key, value = bobjob_key_value.split('=')
key = key.split('.')
my_dict[key[0]][key[1]] = value
print(my_dict)
>> {'bob': {'job': 'QA', 'name': {'last': 'LastName', 'first': 'FirstName'}}}
import yaml
def get_dictionary_replace_value(file, new_value, strip_qoutes=True):
with open(file, 'r') as rf:
yaml_doc = yaml.load(rf)
rf.close()
key, value = new_value.split('=')
keys = key.split('.')
inner_dict = yaml_doc
for i in keys[:-1]:
inner_dict = inner_dict[i]
inner_dict[keys[-1]] = value
with open(file, 'w') as wf:
if strip_qoutes:
wf.write(yaml.dump(yaml_doc,
default_flow_style=False).replace("'", "").replace('"', ""))
else:
wf.write(yaml.dump(yaml_doc, default_flow_style=False))
wf.close()
I wanna make a dictionary has name's key & data.In views.py I wrote
data_dict ={}
def try_to_int(arg):
try:
return int(arg)
except:
return arg
def main():
book4 = xlrd.open_workbook('./data/excel1.xlsx')
sheet4 = book4.sheet_by_index(0)
data_dict_origin = OrderedDict()
tag_list = sheet4.row_values(0)[1:]
for row_index in range(1, sheet4.nrows):
row = sheet4.row_values(row_index)[1:]
row = list(map(try_to_int, row))
data_dict_origin[row_index] = dict(zip(tag_list, row))
if data_dict_origin['name'] in data_dict:
data_dict[data_dict_origin['name']].update(data_dict_origin)
else:
data_dict[data_dict_origin['name']] = data_dict_origin
main()
When I printed out data_dict,it is
OrderedDict([(1, {'user_id': '100', 'group': 'A', 'name': 'Tom', 'dormitory': 'C'}), (2, {'user_id': '50', 'group': 'B', 'name': 'Blear', 'dormitory': 'E'})])
My ideal dictionary is
dicts = {
Tom: {
'user_id': '100',
'group': 'A',
'name': 'Tom',
'dormitory': 'C'
},
Blear: {
},
}
How should I fix this?What should I write it?
The code is using the wrong key in the dictionary. The keys are 1, 2, and do not have the name key. You can use this code instead:
for value in data_dict.values():
if value['name'] in data_dict:
data_dict[value['name']].update(value)
else:
data_dict[value['name']] = value
Your data_dict_origin has numbers as keys and dicts as values (which technically makes it a sparse array of dicts). The "name" key exists in those dicts, not in your data_dict.
I am serializing multiple nested dictionaries to JSON using Python with simplejson.
Is there any way to automatically exclude empty/null values?
For example, serialize this:
{
"dict1" : {
"key1" : "value1",
"key2" : None
}
}
to
{
"dict1" : {
"key1" : "value1"
}
}
When using Jackson with Java you can use Inclusion.NON_NULL to do this. Is there a simplejson equivalent?
def del_none(d):
"""
Delete keys with the value ``None`` in a dictionary, recursively.
This alters the input so you may wish to ``copy`` the dict first.
"""
# For Python 3, write `list(d.items())`; `d.items()` won’t work
# For Python 2, write `d.items()`; `d.iteritems()` won’t work
for key, value in list(d.items()):
if value is None:
del d[key]
elif isinstance(value, dict):
del_none(value)
return d # For convenience
Sample usage:
>>> mydict = {'dict1': {'key1': 'value1', 'key2': None}}
>>> print(del_none(mydict.copy()))
{'dict1': {'key1': 'value1'}}
Then you can feed that to json.
My Python3 version of this has the benefit of not changing the input, as well as recursion into dictionaries nested in lists:
def clean_nones(value):
"""
Recursively remove all None values from dictionaries and lists, and returns
the result as a new dictionary or list.
"""
if isinstance(value, list):
return [clean_nones(x) for x in value if x is not None]
elif isinstance(value, dict):
return {
key: clean_nones(val)
for key, val in value.items()
if val is not None
}
else:
return value
For example:
a = {
"a": None,
"b": "notNone",
"c": ["hello", None, "goodbye"],
"d": [
{
"a": "notNone",
"b": None,
"c": ["hello", None, "goodbye"],
},
{
"a": "notNone",
"b": None,
"c": ["hello", None, "goodbye"],
}
]
}
print(clean_nones(a))
results in this:
{
'b': 'notNone',
'c': ['hello', 'goodbye'],
'd': [
{
'a': 'notNone',
'c': ['hello', 'goodbye']
},
{
'a': 'notNone',
'c': ['hello', 'goodbye']
}
]
}
>>> def cleandict(d):
... if not isinstance(d, dict):
... return d
... return dict((k,cleandict(v)) for k,v in d.iteritems() if v is not None)
...
>>> mydict = dict(dict1=dict(key1='value1', key2=None))
>>> print cleandict(mydict)
{'dict1': {'key1': 'value1'}}
>>>
I don't like using del in general, changing the existing dictionary can have subtle effects depending on how they are created. Creating new dictionaries with None removed prevents all side effect.
You can try this approach. In my case (I use python 3), it works well.
def to_json(self):
return json.dumps(self,
default=lambda o: dict((key, value) for key, value in o.__dict__.items() if value),
indent=4,
allow_nan=False)
This solution is correction of the one above from #eric which does not handle list type corectly.
Values in canonical JSON dictionary can be of one of following 3 types:
dictionary
list
value type (string, integer or floating point)
Note: Assumption is that we are dealing here with canonical JSON dictionary which can really contain only above mentioned types. If dictionary contains other types then ones mentioned above (e.g. tuples, custom classes, ...), then this solution won't work as expected.
The essential difference between this solution (below) and the original one from #eric is that list can contain elements of dictionary type from iside of which we want to drop elements with None value.
def cleandict(d):
if isinstance(d, dict):
return {k: cleandict(v) for k, v in d.items() if v is not None}
elif isinstance(d, list):
return [cleandict(v) for v in d]
else:
return d
Note: Please keep in mind that we must NOT remove None elements from the list since it would affect structural integrity of the list data. If some ( or all) of list elements have None value, they shall remain listed in the list structure as they were in order to preserve original structural meaning/integrity of the list.
def excludeNone(d):
for k in list(d):
if k in d:
if type(d[k]) == dict:
excludeNone(d[k])
if not d[k]:
del d[k]
It works for me:
When dictionary has dict/list/tuple values ....
for example it is my object:
dict_obj = {
'inline_keyboard': [
[
{'text': '0-0', 'url': None, 'login_url': None, 'callback_data': '0-0', 'switch_inline_query': None},
{'text': '0-1', 'url': None, 'login_url': None, 'callback_data': '0-1', 'switch_inline_query': None}
],
[
{'text': '1-0', 'url': None, 'login_url': None, 'callback_data': '1-0', 'switch_inline_query': None},
{'text': '1-1', 'url': None, 'login_url': None, 'callback_data': '1-1', 'switch_inline_query': None}
],
[
{'text': '2-0', 'url': None, 'login_url': None, 'callback_data': '2-0', 'switch_inline_query': None}
]
]
}
I wrote this function:
def delete_none_values(obj):
if isinstance(obj, dict):
for k, v in list(obj.items()):
if v is None:
del obj[k]
elif isinstance(v, dict):
delete_none_values(v)
elif isinstance(v, (list, tuple)):
for _ in v:
delete_none_values(_)
elif isinstance(obj, (list, tuple)):
for _ in obj:
delete_none_values(_)
return obj
And then when use this fuction:
from json import dumps
print(
dumps(
delete_none_values(dict_obj.copy()),
indent=2
)
)
output is:
{
"inline_keyboard": [
[
{"text": "0-0", "callback_data": "0-0"},
{"text": "0-1", "callback_data": "0-1"}
],
[
{"text": "1-0", "callback_data": "1-0"},
{"text": "1-1", "callback_data": "1-1"}
],
[
{"text": "2-0", "callback_data": "2-0"}
]
]
}
Could you maybe remain 'url' if it has value in one place and remove it if it none on another place?
'inline_keyboard': [
[
{'text': '0-0', 'url': 'someValue', 'login_url': None, 'callback_data': '0-0', 'switch_inline_query': None},
{'text': '0-1', 'url': None, 'login_url': None, 'callback_data': '0-1', 'switch_inline_query': None}
],
[
{'text': '1-0', 'url': None, 'login_url': None, 'callback_data': '1-0', 'switch_inline_query': None},
{'text': '1-1', 'url': None, 'login_url': None, 'callback_data': '1-1', 'switch_inline_query': None}
],
[
{'text': '2-0', 'url': None, 'login_url': None, 'callback_data': '2-0', 'switch_inline_query': None}
]
]