compare two dictionaries in python - python

How do i campare dictionaries that contain dictionaries ?
This will work in case the dictionaries contain simple values
# will show the keys with different values
d1_keys = set(dict1.keys())
d2_keys = set(dict2.keys())
intersect_keys = d1_keys.intersection(d2_keys)
modified = {}
for i in intersect_keys:
if dict1[i] != dict2[i] : modified.update({i : (dict1[i], dict2[i])})
but i have a dictionary like this:
{ 'medic1' : {'date' : '02/02/2015', 'no' : '123' }, 'medic2' : {'date' :'02/03/2015', 'no' : '456' }}

By recursion function for nested dictionary.
Get common keys from both dictionary by keys() and set operation.
Iterate common keys by for loop.
Check type of value of key is dict or not.
If value type is dict then call same function and pass values dictionary as arguments. and add result as key into modified dictionary.
If value type is not dict then add into modified dictionary.
code:
dict1 = {
'medic1' : {'date' : '02/02/2015', 'no' : '123' },
'medic2' : {'date' : '02/03/2015', 'no' : '456' },
'testkey1': 'testvalue1',
'testkey2': 'testvalue2',
'testkey3':{ "level2_1":"value2_1",
"level2_2":{
"level3_1": "value3_1_change",
"level3_2": "value3_2",
}
}
}
dict2 = {
'medic1' : {'date' : '02/02/2015', 'no' : '456' },
'medic2' : {'date' : '02/03/2015', 'no' : '456' },
'testkey1': 'testvalue1',
'testkey2': 'testvalue22',
'testkey3':{ "level2_1":"value2_1",
"level2_2":{
"level3_1": "value3_1",
"level3_2": "value3_2",
}
}
}
import copy
def compareDict(dict1, dict2):
d1_keys = dict1.keys()
d2_keys = dict2.keys()
intersect_keys = set(d1_keys).intersection(set(d2_keys))
modified = {}
for i in intersect_keys:
if dict1[i] != dict2[i] :
if isinstance(dict1[i], dict) and isinstance(dict1[i], dict):
modified[i]=compareDict(dict1[i], dict2[i])
else:
modified.update({i : (dict1[i], dict2[i])})
return copy.deepcopy(modified)
modified = compareDict(dict1, dict2)
import pprint
pprint.pprint(modified)
output:
vivek#vivek:~/Desktop/stackoverflow$ python 5.py
{'medic1': {'no': ('123', '456')},
'testkey2': ('testvalue2', 'testvalue22'),
'testkey3': {'level2_2': {'level3_1': ('value3_1_change', 'value3_1')}}}

As long as the keys are hashable (which a dict guarantees!-) it doesn't matter whether the values are or not -- code like:
common_keys = set(dict1).intersection(dict2)
(a simplified version of your first three lines), then
modified = {k: (dict1[k], dict2[k])
for k in common_keys if dict1[k] != dict2[k]}
(a simplified version of your last three lines) will work fine!

Related

Pyspark - get attribute names from json file

I am new to pyspark . My requirement is to get/extract the attribute names from a nested json file . I tried using json_normalize imported from pandas package. It works for direct attributes but never fetches the attributes within json array attributes. My json doesn't have a static structure. It varies for each document that we receive. Could someone please help me with explanation for the small example provided below,
{
"id":"1",
"name":"a",
"salaries":[
{
"salary":"1000"
},
{
"salary":"5000"
}
],
"states":{
"state":"Karnataka",
"cities":[
{
"city":"Bangalore"
},
{
"city":"Mysore"
}
],
"state":"Tamil Nadu",
"cities":[
{
"city":"Chennai"
},
{
"city":"Coimbatore"
}
]
}
}
Especially for the json array elements..
Expected output :
id
name
salaries.salary
states.state
states.cities.city``
Here is the another solution for extracting all nested attributes from json
import json
result_set = set([])
def parse_json_array(json_obj, parent_path):
array_obj = list(json_obj)
for i in range(0, len(array_obj)):
json_ob = array_obj[i]
if type(json_obj) == type(json_obj):
parse_json(json_ob, parent_path)
return None
def parse_json(json_obj, parent_path):
for key in json_obj.keys():
key_value = json_obj.get(key)
# if isinstance(a, dict):
if type(key_value) == type(json_obj):
parse_json(key_value, str(key) if parent_path == "" else parent_path + "." + str(key))
elif type(key_value) == type(list(json_obj)):
parse_json_array(key_value, str(key) if parent_path == "" else parent_path + "." + str(key))
result_set.add((parent_path + "." + key).encode('ascii', 'ignore'))
return None
file_name = "C:/input/sample.json"
file_data = open(file_name, "r")
json_data = json.load(file_data)
print json_data
parse_json(json_data, "")
print list(result_set)
Output:
{u'states': {u'state': u'Tamil Nadu', u'cities': [{u'city': u'Chennai'}, {u'city': u'Coimbatore'}]}, u'id': u'1', u'salaries': [{u'salary': u'1000'}, {u'salary': u'5000'}], u'name': u'a'}
['states.cities.city', 'states.cities', '.id', 'states.state', 'salaries.salary', '.salaries', '.states', '.name']
Note:
My Python version: 2.7
you can do in this way also.
data = { "id":"1", "name":"a", "salaries":[ { "salary":"1000" }, { "salary":"5000" } ], "states":{ "state":"Karnataka", "cities":[ { "city":"Bangalore" }, { "city":"Mysore" } ], "state":"Tamil Nadu", "cities":[ { "city":"Chennai" }, { "city":"Coimbatore" } ] } }
def dict_ittr(lin,data):
for k, v in data.items():
if type(v)is list:
for l in v:
dict_ittr(lin+"."+k,l)
elif type(v)is dict:
dict_ittr(lin+"."+k,v)
pass
else:
print lin+"."+k
dict_ittr("",data)
output
.states.state
.states.cities.city
.states.cities.city
.id
.salaries.salary
.salaries.salary
.name
If you treat the json like a python dictionary, this should work.
I just wrote a simple recursive program.
Script
import json
def js_r(filename):
with open(filename) as f_in:
return(json.load(f_in))
g = js_r("city.json")
answer_d = {}
def base_line(g, answer_d):
for key in g.keys():
answer_d[key] = {}
return answer_d
answer_d = base_line(g, answer_d)
def recurser_func(g, answer_d):
for k in g.keys():
if type(g[k]) == type([]): #If the value is a list
answer_d[k] = {list(g[k][0].keys())[0]:{}}
if type(g[k]) == type({}): #If the value is a dictionary
answer_d[k] = {list(g[k].keys())[0]: {}} #set key equal to
answer_d[k] = recurser_func(g[k], answer_d[k])
return answer_d
recurser_func(g,answer_d)
def printer_func(answer_d, list_to_print, parent):
for k in answer_d.keys():
if len(answer_d[k].keys()) == 1:
list_to_print.append(parent)
list_to_print[-1] += k
list_to_print[-1] += "." + str(list(answer_d[k].keys())[0])
if len(answer_d[k].keys()) == 0:
list_to_print.append(parent)
list_to_print[-1] += k
if len(answer_d[k].keys()) > 1:
printer_func(answer_d[k], list_to_print, k + ".")
return list_to_print
l = printer_func(answer_d, [], "")
final = " ".join(l)
print(final)
Explanation
base_line makes a dictionary of all your base keys.
recursur_func checks if the key's value is a list or dict then adds to the answer dictionary as is necessary until answer_d looks like: {'id': {}, 'name': {}, 'salaries': {'salary': {}}, 'states': {'state': {}, 'cities': {'city': {}}}}
After these 2 functions are called you have a dictionary of keys in a sense. Then printer_func is a recursive function to print it as you desired.
NOTE:
Your question is similar to this one: Get all keys of a nested dictionary but since you have a nested list/dictionary instead of just a nested dictionary, their answers won't work for you, but there is more discussion on the topic on that question if you like more info
EDIT 1
my python version is 3.7.1
I have added a json file opener to the top. I assume that the json is named city.json and is in the same directory
EDIT 2: More thorough explanation
The main difficulty that I found with dealing with your data is the fact that you can have infinitely nested lists and dictionaries. This makes it complicated. Since it was infinite possible nesting, I new this was a recursion problem.
So, I build a dictionary of dictionaries representing the key structure that you are looking for. Firstly I start with the baseline.
base_line makes {'id': {}, 'name': {}, 'salaries': {}, 'states': {}} This is a dictionary of empty dictionaries. I know that when you print. Every key structure (like states.state) starts with one of these words.
recursion
Then I add all the child keys using recursur_func.
When given a dictionary g this function for loop through all the keys in that dictionary and (assuming answer_d has each key that g has) for each key will add that keys child to answer_d.
If the child is a dictionary. Then I recurse with the given dictionary g now being the sub-part of the dictionary that pertains to the children, and answer_d being the sub_part of answer_d that pertains to the child.

Update dictionary inside of dictionary without deleting contents [duplicate]

I am trying to update values in a nested dictionary, without over-writting previous entries when the key already exists.
For example, I have a dictionary:
myDict = {}
myDict["myKey"] = { "nestedDictKey1" : aValue }
giving,
print myDict
>> { "myKey" : { "nestedDictKey1" : aValue }}
Now, I want to add another entry , under "myKey"
myDict["myKey"] = { "nestedDictKey2" : anotherValue }}
This will return:
print myDict
>> { "myKey" : { "nestedDictKey2" : anotherValue }}
But I want:
print myDict
>> { "myKey" : { "nestedDictKey1" : aValue ,
"nestedDictKey2" : anotherValue }}
Is there a way to update or append "myKey" with new values, without overwriting the previous ones?
This is a very nice general solution to dealing with nested dicts:
import collections
def makehash():
return collections.defaultdict(makehash)
That allows nested keys to be set at any level:
myDict = makehash()
myDict["myKey"]["nestedDictKey1"] = aValue
myDict["myKey"]["nestedDictKey2"] = anotherValue
myDict["myKey"]["nestedDictKey3"]["furtherNestedDictKey"] = aThirdValue
For a single level of nesting, defaultdict can be used directly:
from collections import defaultdict
myDict = defaultdict(dict)
myDict["myKey"]["nestedDictKey1"] = aValue
myDict["myKey"]["nestedDictKey2"] = anotherValue
And here's a way using only dict:
try:
myDict["myKey"]["nestedDictKey2"] = anotherValue
except KeyError:
myDict["myKey"] = {"nestedDictKey2": anotherValue}
You can use collections.defaultdict for this, and just set the key-value pairs within the nested dictionary.
from collections import defaultdict
my_dict = defaultdict(dict)
my_dict['myKey']['nestedDictKey1'] = a_value
my_dict['myKey']['nestedDictKey2'] = another_value
Alternatively, you can also write those last 2 lines as
my_dict['myKey'].update({"nestedDictKey1" : a_value })
my_dict['myKey'].update({"nestedDictKey2" : another_value })
You can write a generator to update key in nested dictionary, like this.
def update_key(key, value, dictionary):
for k, v in dictionary.items():
if k == key:
dictionary[key]=value
elif isinstance(v, dict):
for result in update_key(key, value, v):
yield result
elif isinstance(v, list):
for d in v:
if isinstance(d, dict):
for result in update_key(key, value, d):
yield result
list(update_key('Any level key', 'Any value', DICTIONARY))
from ndicts.ndicts import NestedDict
nd = NestedDict()
nd["myKey", "nestedDictKey1"] = 0
nd["myKey", "nestedDictKey2"] = 1
>>> nd
NestedDict({'myKey': {'nestedDictKey1': 0, 'nestedDictKey2': 1}})
>>> nd.to_dict()
{'myKey': {'nestedDictKey1': 0, 'nestedDictKey2': 1}}
To install ndicts pip install ndicts
You could treat the nested dict as immutable:
myDict["myKey"] = dict(myDict["myKey"], **{ "nestedDictKey2" : anotherValue })
myDict["myKey"]["nestedDictKey2"] = anotherValue
myDict["myKey"] returns the nested dictionary to which we can add another key like we do for any dictionary :)
Example:
>>> d = {'myKey' : {'k1' : 'v1'}}
>>> d['myKey']['k2'] = 'v2'
>>> d
{'myKey': {'k2': 'v2', 'k1': 'v1'}}
I wrote myself a function to tackle this issue
def updateDict2keys(myDict,mykey1,mykey2,myitems):
"""
updates a dictionary by appending values at given keys (generating key2 if not already existing)
input: dictionary, key1, key2 and items to append
output: dictionary orgnanized as {mykey1:{mykey2:myitems}}
"""
myDict.setdefault(mykey1, {})[mykey2] = myitems
return myDict

Check dictionary's values are included in another dictionary in Python 3

I have two dictionaries like below. What I want to do is checking that all a's values are included in b dictionary. Two dictionaries may be different structure. And some a's keys are not included in b. I want to know generic ways to realize this.
Check value list. All a's values should be included in b
Expected outputs are like below text output. I know a[0].name is not valid in python. This is not python's raw code.
a[0]['name'] in b? => yes, same value
a[0]['vals'][0]['apple'] in b? => yes, but different value
a[0]['vals'][0]['banana'][0]['hoge'] in b? => not exists
a[0]]'vals'][0]['banana'][0]['fuga'] in b? => not exits
Two dictionaries.
a = [
{
"name":"hoge",
"vals":[
{
"apple":11,
"banana":{
"hoge":1,
"fuga":"aaa"
}
}
]
}
]
b = [
{
"name":"hoge",
"vals":[
{
"apple":21,
"grape":{
"foo":1
}
}
]
}
]
You can implement a dict comparison function as I did below:
def compare_ndic(src, dst, pre=''):
for skey, sval in src.items():
if pre:
print_skey = pre + '.' + skey
else:
print_skey = skey
if skey not in dst.keys():
print('Key "{}" in {} does not existed in {}'.format(print_skey, 'src', 'dst'))
else:
if isinstance(sval, dict) and isinstance(dst.get(skey), dict):
#If the value of the same key is still dict
compare_ndic(sval, dst.get(skey), print_skey)
elif sval == dst.get(skey):
print('Value of key "{}" in {} is the same with value in {}'.format(print_skey, 'src', 'dst'))
else:
print('Value of key "{}" in {} is different with value in {}'.format(print_skey, 'src', 'dst'))
a = {
"name":"hoge",
"vals":
{
"apple":11,
"banana":{
"hoge":1,
"fuga":"aaa"
}
}
}
b = {
"name": "hoge",
"vals":
{
"apple": 11,
"banana": {
"hoge": 2,
"fuga": "aaa",
}
}
}
compare_ndic(a, b)
The output is like this:
Value of key "vals.banana.fuga" in src is the same with value in dst
Value of key "vals.banana.hoge" in src is different with value in dst
Value of key "vals.apple" in src is the same with value in dst
Value of key "name" in src is the same with value in dst
Be careful, my code cannot be used directly for your scenario, because you have list in your data. You can add some conditional statements and to iterate the whole list if necessary. Anyway, I've just provided an idea to compare two dicts, you need to modify it in your own way.
You have mistake in accessing method.
a is list which is accessed as
a[0]
but a[0] is dictionary which is accessed as
a[0]['vals'] # 'vals' is a key stored in dictionary
to only know keys in dictionary you can try
a[0].keys() # gives you result dict_keys(['name', 'vals']) which you can iterate further as you wish
and you can get all elemnt using
a[0].items() # gives you dict_items([('name', 'hoge'), ('vals', [{'banana': {'hoge': 1, 'fuga': 'aaa'}, 'apple': 21}])])
Moreover use correct syntax in code.
you have used incorrect syntax in your code
a = [{"name": "hoge", "vals": [{"apple": 21, "banana": {"hoge": 1, "fuga": "aaa"}}]}]
b = [{"name": "hoge", "vals": [{ "apple": 21, "grape": {"foo": 1}}]}]
if a[0]['name'] in b[0]['name']:
print('first match')
if a[0]['name'] == b[0]['name']:
print('item exist with same value')
else:
print('item exist but not same value')
for key in a[0]['vals'][0].keys():
if key in b[0]['vals'][0].keys():
print('second match with key : ' + str(key))
if a[0]['vals'][0][str(key)] == b[0]['vals'][0][str(key)]:
print('match exist with same value for key : ' + str(key))
else:
print('match failed for key : ' + str(key))
else:
print('match failed at 1')

Updating nested dictionaries when data has existing key

I am trying to update values in a nested dictionary, without over-writting previous entries when the key already exists.
For example, I have a dictionary:
myDict = {}
myDict["myKey"] = { "nestedDictKey1" : aValue }
giving,
print myDict
>> { "myKey" : { "nestedDictKey1" : aValue }}
Now, I want to add another entry , under "myKey"
myDict["myKey"] = { "nestedDictKey2" : anotherValue }}
This will return:
print myDict
>> { "myKey" : { "nestedDictKey2" : anotherValue }}
But I want:
print myDict
>> { "myKey" : { "nestedDictKey1" : aValue ,
"nestedDictKey2" : anotherValue }}
Is there a way to update or append "myKey" with new values, without overwriting the previous ones?
This is a very nice general solution to dealing with nested dicts:
import collections
def makehash():
return collections.defaultdict(makehash)
That allows nested keys to be set at any level:
myDict = makehash()
myDict["myKey"]["nestedDictKey1"] = aValue
myDict["myKey"]["nestedDictKey2"] = anotherValue
myDict["myKey"]["nestedDictKey3"]["furtherNestedDictKey"] = aThirdValue
For a single level of nesting, defaultdict can be used directly:
from collections import defaultdict
myDict = defaultdict(dict)
myDict["myKey"]["nestedDictKey1"] = aValue
myDict["myKey"]["nestedDictKey2"] = anotherValue
And here's a way using only dict:
try:
myDict["myKey"]["nestedDictKey2"] = anotherValue
except KeyError:
myDict["myKey"] = {"nestedDictKey2": anotherValue}
You can use collections.defaultdict for this, and just set the key-value pairs within the nested dictionary.
from collections import defaultdict
my_dict = defaultdict(dict)
my_dict['myKey']['nestedDictKey1'] = a_value
my_dict['myKey']['nestedDictKey2'] = another_value
Alternatively, you can also write those last 2 lines as
my_dict['myKey'].update({"nestedDictKey1" : a_value })
my_dict['myKey'].update({"nestedDictKey2" : another_value })
You can write a generator to update key in nested dictionary, like this.
def update_key(key, value, dictionary):
for k, v in dictionary.items():
if k == key:
dictionary[key]=value
elif isinstance(v, dict):
for result in update_key(key, value, v):
yield result
elif isinstance(v, list):
for d in v:
if isinstance(d, dict):
for result in update_key(key, value, d):
yield result
list(update_key('Any level key', 'Any value', DICTIONARY))
from ndicts.ndicts import NestedDict
nd = NestedDict()
nd["myKey", "nestedDictKey1"] = 0
nd["myKey", "nestedDictKey2"] = 1
>>> nd
NestedDict({'myKey': {'nestedDictKey1': 0, 'nestedDictKey2': 1}})
>>> nd.to_dict()
{'myKey': {'nestedDictKey1': 0, 'nestedDictKey2': 1}}
To install ndicts pip install ndicts
You could treat the nested dict as immutable:
myDict["myKey"] = dict(myDict["myKey"], **{ "nestedDictKey2" : anotherValue })
myDict["myKey"]["nestedDictKey2"] = anotherValue
myDict["myKey"] returns the nested dictionary to which we can add another key like we do for any dictionary :)
Example:
>>> d = {'myKey' : {'k1' : 'v1'}}
>>> d['myKey']['k2'] = 'v2'
>>> d
{'myKey': {'k2': 'v2', 'k1': 'v1'}}
I wrote myself a function to tackle this issue
def updateDict2keys(myDict,mykey1,mykey2,myitems):
"""
updates a dictionary by appending values at given keys (generating key2 if not already existing)
input: dictionary, key1, key2 and items to append
output: dictionary orgnanized as {mykey1:{mykey2:myitems}}
"""
myDict.setdefault(mykey1, {})[mykey2] = myitems
return myDict

python recursive dictionary converting to strings

I had a problem on converting dictionaries to strings which has recursive features.
I had a map of routing such as the following;
urls = {
'/' : 'BaseController.hello',
'/api' : {
'/auth' : {
'/me' : 'ApiController.hello',
'/login' : {
'/guest' : 'ApiController.guest_login',
'/member': 'ApiController.member_login'
}
}
}
}
What I need to do is to generate a dictionary from that into the following;
url_map = {
'/' : 'BaseController.hello',
'/api/auth/me' : 'ApiController.hello',
'/api/auth/login/guest' : 'ApiController.guest_login',
'/api/auth/login/member': 'ApiController.member_login',
}
This feature is called route grouping but I haven't been able to write a function to generate that. Any ideas ?
You can recursively do it like this
def flatten(current_dict, current_key, result_dict):
# For every key in the dictionary
for key in current_dict:
# If the value is of type `dict`, then recurse with the value
if isinstance(current_dict[key], dict):
flatten(current_dict[key], current_key + key, result_dict)
# Otherwise, add the element to the result
else:
result_dict[current_key + key] = current_dict[key]
return result_dict
print flatten(urls, "", {})
Output
{
'/api/auth/me': 'ApiController.hello',
'/api/auth/login/guest': 'ApiController.guest_login',
'/': 'BaseController.hello',
'/api/auth/login/member': 'ApiController.member_login'
}

Categories

Resources