Modifying nested dictionaries - python

Given this two dicts:
empty = {'151': {'1': 'empty', '0': 'empty', '2': '2.30'}}
full = {'151': {'1': 3.4, '0': 3.6, '2': 2}}
Firstly, I want to check if empty.keys() == full.keys() if it holds, I want to replace the empty values with corresponding value from full dictionary. It ought to result in:
not_empty = {'151': {'1': '3.4', '0': '3.6', '2': '2.30'}}
My solution so far: I thought I would identify all the keys with empty values using regex, but for whatever reason my code so far, produces an empty dict {}.
import re
find_empty = re.findall("'(\d)':\s'empty'", str(empty))[0]
if empty.keys() == full.keys():
k = empty.values()[0].keys()
v = empty.values()[0].values()
print {k:v for k,v in empty.values()[0].iteritems()\
if empty.values()[0][find_empty] != 'empty'}
I hoped it can output {'151': {'2': '2.30'}} for a good starting point. Anyway, I guess there exists more clean solution then regex for this task so any hints are welcomed!

Regex is not the right tool for this job. I would suggest a recursive approach like the following.
empty = {'151': {'1': 'empty', '0': 'empty', '2': '2.30'}}
full = {'151': {'1': 3.4, '0': 3.6, '2': 2}}
def repl(a, b):
clean = {}
for k, v in a.items():
# This is the case where we want to replace what we have in b if we have something. Just in case, use the dict.get method and provide a default.
if v == 'empty':
clean[k] = b.get(k, 'Not there')
# If the value is another dict, then call this function with the value, and put the return as the value for our current key
elif isinstance(v, dict):
v_clean = repl(v, b.get(k, {}))
clean[k] = v_clean
# The value isn't equal to 'empty', and it isn't another dict, so just keep the current value.
else:
clean[k] = v
# Finally, return the cleaned up dictionary.
return clean
print repl(empty, full)
OUTPUT
{'151': {'1': 3.4, '0': 3.6, '2': '2.30'}}
EDIT I am not sure if this takes care of all of your cases, but it probably worth a look anyway.
empty = {'151': {'1': 'empty', '0': 'empty', '2': '2.30', '8': ['empty', 'empty', 5, {"foo2": "bar2", "1": "empty"}]}}
full = {'151': {'1': 3.4, '0': 3.6, '2': 2, '8': ['foo', 'bar', 'baz', {"foo3": "bar3", "1": "2"}]}}
def repl(a, b):
if isinstance(a, dict) and isinstance(b, dict):
clean = {}
for k, v in a.items():
# This is the case where we want to replace what we have in b if we have something. Just in case, use the dict.get method and provide a default.
if v == 'empty':
clean[k] = b.get(k, 'Not there')
# If the value is another dict, then call this function with the value, and put the return as the value for our current key
elif isinstance(v, dict):
v_clean = repl(v, b.get(k, {}))
clean[k] = v_clean
# The value isn't equal to 'empty', and it isn't another dict, so just keep the current value.
elif isinstance(v, list):
v_clean = repl(v, b.get(k, []))
clean[k] = v_clean
else:
clean[k] = v
# Finally, return the cleaned up dictionary.
elif isinstance(a, list) and isinstance(b, list):
clean = []
for item_a, item_b in zip(a, b):
if item_a == 'empty':
clean.append(item_b)
elif isinstance(item_a, dict):
clean_a = repl(item_a, item_b)
clean.append(clean_a)
else:
clean.append(item_a)
return clean
print repl(empty, full)
OUTPUT
{'151': {'1': 3.4, '0': 3.6, '2': '2.30', '8': ['foo', 'bar', 5, {'1': '2', 'foo2': 'bar2'}]}}

Related

Limit number of items / length of json for logging

I am working on an API that returns JSON. I am logging my responses, and sometimes the JSON is just absurdly long and basically clogs my log files. Is there a neat way to reduce the length of a JSON, purely for visually logging the data? (not in effect in production)
The basic approach is to reduce arrays over a length of 5 to [first 2, "...", last 2], and dictionaries with more than 4 items to {first 4, "..." : "..."}
The code below is ugly. I am aware that it should be a recursive solution that reduces the items in the same way for a JSON of arbitrary depth - it currently only does so for depth 2.
def log_reducer(response_log):
original_response_log = response_log
try:
if type(response_log) == dict:
if len(response_log) >= 4: # {123456}
response_log = dict(list(response_log.items())[:4])
response_log.update({"...": "..."}) # {1234...}
for key, value in response_log.items():
if type(value) == list:
if len(value) >= 5: # {key:[123456]}
new_item = value[:2] + ['...'] + value[-2:] # {[12...56]}
response_log.update({key: new_item})
if type(value) == dict:
if len(value) >= 4: # {key:{123456}}
reduced_dict = dict(list(value.items())[:4])
reduced_dict.update({"...": "..."})
response_log.update({key: reduced_dict}) # {{1234...}}
elif type(response_log) == list:
if len(response_log) >= 5: # [123456]
response_log = response_log[:2] + ['...'] + response_log[-2:] # [12...56]
for inner_item in response_log:
if type(inner_item) == list:
if len(inner_item) >= 5: # [[123456]]
reduced_list = inner_item[:2] + ['...'] + inner_item[-2:] # [[12...56]]
response_log.remove(inner_item)
response_log.append(reduced_list)
if type(inner_item) == dict:
if len(inner_item) >= 4: # [{123456}]
reduced_dict = dict(list(inner_item.items())[:4])
reduced_dict.update({"...": "..."}) # [{1234...}]
response_log.remove(inner_item)
response_log.append(reduced_dict)
except Exception as e:
return original_response_log
return response_log
The returned response_log is then logged with logger.info(str(response_log))
As you can see, the fact that there can be either arrays or dictionaries at every level makes this task a little more complex, and I am struggling to find a library or code snipped of any kind which would simplify this. If anyone wants to give it a shot, I would appreciate it a lot.
you can use a test JSON like this to see it in effect:
test_json = {"works": [1, 2, 3, 4, 5, 6],
"not_affected": [{"1": "1", "2": "2", "3": "3", "4": "4", "5": "5"}],
"1": "1", "2": "2", "3": "3",
"removed": "removed"
}
print("original", test_json)
reduced_log = log_reducer(test_json)
print("reduced", reduced_log)
print("original", test_json)
reduced_log = log_reducer([test_json]) # <- increases nesting depth
print("reduced", reduced_log)
This answer uses #calceamenta's idea, but implements the actual cutting-down logic:
def recursive_reduce(obj):
if isinstance(obj, (float, str, int, bool, type(None))):
return obj
if isinstance(obj, dict):
keys = list(sorted(obj))
obj['...'] = '...'
if len(keys) > 5:
new_keys = keys[:2] + ["..."] + keys[-2:]
else:
new_keys = keys
new_dict = {x:obj[x] for x in new_keys}
for k, v in new_dict.items():
new_dict[k] = recursive_reduce(v)
return new_dict
if isinstance(obj, list):
if len(obj) > 5:
new_list = obj[:2] + ["..."] + obj[-2:]
else:
new_list = obj
for i, v in enumerate(new_list):
new_list[i] = recursive_reduce(v)
return new_list
return str(obj)
test_json = {"works": [1, 2, 3, 4, 5, 6],
"not_affected": [{"1": "1", "2": "2", "3": "3", "4": "4", "5": "5"}],
"1": "1", "2": "2", "3": "3",
"removed": "removed"
}
print("original", test_json)
reduced_log = recursive_reduce(test_json)
print("reduced", reduced_log)
Output:
original {'works': [1, 2, 3, 4, 5, 6], 'not_affected': [{'1': '1', '2': '2', '3': '3', '4': '4', '5': '5'}], '1': '1', '2': '2', '3': '3', 'removed': 'removed'}
reduced {'1': '1', '2': '2', '...': '...', 'removed': 'removed', 'works': [1, 2, '...', 5, 6]}
Hope this helps :)
You can overwrite the string representation of dicts and lists in python using the def __str__(): method. Using this just recursively call the print function on all elements. It can have a simple boilerplate like this:
def custom_print(obj):
log_str = ''
if type(obj) == list:
for item in obj:
log_str += custom_print(item)
elif type(obj) == dict:
for k, item in obj.items():
custom_print(item)
Use this custom log function to print into your log file as per your log file format.

Python: How do I iterate over more than 2 dictionaries?

Not really sure what I'm doing wrong here. I thought the zip method would work to check if a value is in multiple lists? What I'd want it to do is check to see if that value is in any of those dictionaries, and if so, to print its key, but if not, then print only one string of ('Not in Any Dictionary'). This method prints 40 of them for some reason with the real dictionaries.
MLB_Teams = {1: 'New York Yankees', 2: 'Pittsburgh Pirates'}
NBA_Teams = {1: 'Houston Rockets', 2: 'Brooklyn Nets'}
NFL_Teams = {1: 'Philadelphia Eagles', 2: 'Detroit Lions'}
for (key,value), (key,value), (key, value) in zip(MLB_Teams.items(), NBA_Teams.items(), NFL_Teams.items()):
reply = 'Houston Rockets'
if reply == value:
print(key)
else:
print('Not In Any Dictionary')
I think you can do it in a very simple way:
MLB_Teams = {1: 'New York Yankees', 2: 'Pittsburgh Pirates'}
NBA_Teams = {1: 'Houston Rockets', 2: 'Brooklyn Nets'}
NFL_Teams = {1: 'Philadelphia Eagles', 2: 'Detroit Lions'}
v = 'Philadelphia Eagles'
def find_in_dict(val, d):
for k, v in d.items():
if v == val:
print(k)
return True
for dd in (MLB_Teams, NBA_Teams, NFL_Teams):
if find_in_dict(v, dd):
break
else:
print('Not In Any Dictionary')
The issue lies in how you've reused the variable names for key and value. Add a print statement to see the effect.
for (key,value), (key,value), (key, value) in zip(MLB_Teams.items(), NBA_Teams.items(), NFL_Teams.items()):
print(value) #added
reply = 'Houston Rockets'
if reply == value:
print(key)
else:
print('Not In Any Dictionary')
#output
Philadelphia Eagles
Not In Any Dictionary
Detroit Lions
Not In Any Dictionary
The variables key and value get reassigned to the last entry in the tuple.
You can use the zip just fine if you handle unpacking later.
MLB_Teams = {1: 'New York Yankees', 2: 'Pittsburgh Pirates'}
NBA_Teams = {1: 'Houston Rockets', 2: 'Brooklyn Nets'}
NFL_Teams = {1: 'Philadelphia Eagles', 2: 'Detroit Lions'}
reply = 'Houston Rockets'
for tups in zip(MLB_Teams.items(), NBA_Teams.items(), NFL_Teams.items()):
if any(reply == val for key,val in tups):
print(tups[0][0]) #key
else:
print('Not In Any Dictionary')
#output
1
Not In Any Dictionary

Fuzzy set addition and squaring

I'm working with fuzzy sets. I was wondering if there are any libraries available for Python? Namely, I'm having trouble adding 2 fuzzy sets and also squaring them. I am storing the fuzzy set in a Python dictionary, the key being the member element and the value is the membership value.
My sets are:
set_A = {'3':0.1, '4': 0.8, '5': 0.5}
set_B = {'6':0.6, '7': 0.2, '8': 0.7}
I want to find out set_A + set_B
and also set_A^2 + set_B^2
I don't know for certain if there's not already a library for this, but here's quick and simple class that I think does what you expect:
class Fuzzy_Set:
def __init__(self, set):
self.set = set
def __add__(self, other):
retset = {}
for item in set(self.set.keys()).union(set(other.set.keys())):
retset[item] = self.set.get(item, 0) + other.set.get(item, 0)
return retset
def __pow__(self, power, modulo=None):
if modulo:
return {k:v**power%modulo for k, v in self.set.items()}
else:
return {k:v**power for k, v in self.set.items()}
def __mod__(self, other):
return pow(Fuzzy_Set(self.set), 1, other)
if __name__ == '__main__':
s1 = Fuzzy_Set({'3':0.1, '4': 0.8, '5': 0.5})
s2 = Fuzzy_Set({'5': .5, '6':0.6, '7': 0.2, '8': 0.7})
print(s1 + s2)
print(s1**2)
print(Fuzzy_Set({'1': 1, '2': 2, '3': 3})%2)
This implements adding and exponentiation and modulo. The output of main is:
{'3': 0.1, '6': 0.6, '5': 1.0, '7': 0.2, '8': 0.7, '4': 0.8}
{'3': 0.010000000000000002, '4': 0.6400000000000001, '5': 0.25}
{'1': 1, '2': 0, '3': 1}

How to recursively find specific key in nested JSON?

I'm trying to pull nested values from a json file. I want to print out each of the values for every "id" key. I think I'm close but can't figure out why the obj type changes from a dict to a list, and then why I'm unable to parse that list.
Here is a link to the json I'm working with: http://hastebin.com/ratevimixa.tex
and here is my current code:
#!/usr/bin/env python
#-*- coding: utf-8 -*-
import json
json_data = open('JubJubProductions.json', 'r+')
jdata = json.loads(json_data.read().decode("utf-8"))
def recursion(dict):
for key, value in dict.items():
if type(value) == type(dict):
if key != "paging":
for key, value in value.items():
if isinstance (value,list):
print key
# place where I need to enter list comprehension?
if type(value) == type(dict):
if key == "id":
print " id found " + value
if key != "id":
print key + " 1st level"
if key == "id":
print key
else:
if key == "id":
print "id found " + value
if __name__ == '__main__':
recursion(jdata)
-------------------------------------------------------------------------------------------update
This is now what I'm working with and it'll return a single id value, but not all of them:
#!/usr/bin/env python
#-*- coding: utf-8 -*-
import json
json_data = open('jubjubProductions', 'r+')
jdata = json.loads(json_data.read().decode("utf-8"))
def id_generator(d):
for k, v in d.items():
if k == "id":
yield v
elif isinstance(v, dict):
for id_val in id_generator(v):
yield id_val
if __name__ == '__main__':
for _ in id_generator(jdata):
print (_)
The JSON might contain a list of objects, which needs to be searched:
Python 2.7 version:
def item_generator(json_input, lookup_key):
if isinstance(json_input, dict):
for k, v in json_input.iteritems():
if k == lookup_key:
yield v
else:
for child_val in item_generator(v, lookup_key):
yield child_val
elif isinstance(json_input, list):
for item in json_input:
for item_val in item_generator(item, lookup_key):
yield item_val
Python 3.x version:
def item_generator(json_input, lookup_key):
if isinstance(json_input, dict):
for k, v in json_input.items():
if k == lookup_key:
yield v
else:
yield from item_generator(v, lookup_key)
elif isinstance(json_input, list):
for item in json_input:
yield from item_generator(item, lookup_key)
def id_generator(dict_var):
for k, v in dict_var.items():
if k == "id":
yield v
elif isinstance(v, dict):
for id_val in id_generator(v):
yield id_val
This will create an iterator which will yield every value on any level under key "id". Example usage (printing all of those values):
for _ in id_generator(some_json_dict):
print(_)
A little bit cleaner code (in python 3.x).
def parse_json_recursively(json_object, target_key):
if type(json_object) is dict and json_object:
for key in json_object:
if key == target_key:
print("{}: {}".format(target_key, json_object[key]))
parse_json_recursively(json_object[key], target_key)
elif type(json_object) is list and json_object:
for item in json_object:
parse_json_recursively(item, target_key)
json_object = {"key1": "val1", "key2": [{"key3":"val3", "key4": "val4"}, 123, "abc"]}
target_key = "key3"
parse_json_recursively(json_object, target_key) # Ouput key3: val3
Here is a simple recursive function to collect all values from a json document for a given key. Values can be json documents as well. The corresponding values appended to search_result.
def json_full_search(lookup_key, json_dict, search_result = []):
if type(json_dict) == dict:
for key, value in json_dict.items():
if key == lookup_key:
search_result.append(value)
json_full_search(lookup_key, value, search_result)
elif type(json_dict) == list:
for element in json_dict:
json_full_search(lookup_key, element, search_result)
return search_result
def get_val(j, s, v=None):
for k in j:
if v == None and k == s:
return j[k]
elif v != None and k == s and v == j[k]:
return True
elif v != None and k == s and v != j[k]:
return False
elif isinstance(j[k], dict):
return get_val(j[k], s, v)
You can use with for a json list l below,
for l in j:
r = get_val(l, 'key')
print(r)
for l in j:
r = get_val(l, 'mac', '00-60-2F-5A-04-51')
print(r)
Extension to python 3.x answer:
If nested json has similar keys under different list or dictionaries and you want to take first value of it..
below is the generic way:
def get_value_from_generator(json_input, lookup_key):
value = list(item_generator(json_input, lookup_key))
val = value[0] if value else None
print(f'lookup_key -> value : {val}')
return val
def item_generator(json_input, lookup_key):
if isinstance(json_input, dict):
for k, v in json_input.items():
print(f'{k} -- {v}')
if k == lookup_key:
yield v
else:
yield from item_generator(v, lookup_key)
elif isinstance(json_input, list):
for item in json_input:
yield from item_generator(item, lookup_key)

Python if statement: False vs. 0.0

Is it possible to:
for k,v in kwargs.items()
if v == None or v == '' or v == 1.0 or v == False:
del kwargs[k]
without deleting the key if v == 0.0? (False seems to equal 0.0), and without deleting the keys who equal True.
Or you can put it like this :
if v in (None, '', 1.0) or v is False:
You should use v is False instead of v == False. The same applies for your comparison to None. See PEP 8 - Style Guide for Python:
Comparisons to singletons like None should always be done with 'is' or 'is not', never the equality operators.
Slow down guys with your advice, from PEP 8:
Don't compare boolean values to True or False using ==
Yes: if greeting:
No: if greeting == True:
Worse: if greeting is True:
Also comparing float value you should not use == but
abs(x-other) < verysmall
Also you could use
if not v:
# do something
This may not be quite as precise as if v is False as it also runs for if v is 0, None, empty set etc.
I had trouble with this problem and the above solution worked for me.
Thanks for your replies. Using the suggestions, the problem was solved:
kwargs = {'None': None, 'empty': '', 'False': False, 'float': 1.0, 'True': True}
for k,v in kwargs.items():
if v in (None, '', 1.0) and v is not True:
del kwargs[k]
if v is False:
del kwargs[k]
kwargs
{'True': True}
-->
kwargs = {'None': None, 'empty': '', 'False': False, 'float': 0.0, 'True': True}
for k,v in kwargs.items():
if v in (None, '', 1.0) and v is not True:
del kwargs[k]
if v is False:
del kwargs[k]
kwargs
{'True': True, 'float': 0.0}

Categories

Resources