I have a Json file(dataSchema) and I want to fill the leaves with the values of another Json file(data).
Below an example of the two input files and what I want as output.
dataSchema:
"data": {
"keyA": {},
"keyB": {
"keyB1" : {
"keyB11" : ""
}
},
"keyC": {},
"keyD": {},
"keyE": {
"keyE1" : ""
}
}
data:
"data": {
"keyA": {
"keyA1" : {
"keyA11" : ValueA11,
"keyA12" : ValueA12
},
},
"keyB": {
"keyB1" : {
"keyB11" : ValueB11
},
"keyB2" : {
"keyB21" : ValueB21
},
"keyB3" : {
"keyB31" : ValueB31,
"keyB32" : ValueB32
}
},
"keyC": {
"keyC1" : ValueC1
},
"keyD": {
"keyD1" : {
"keyD11" : ValueD11
},
"keyD2" : {
"keyD21" : ValueD21
}
},
"keyE": {
"keyE1" : {
"keyE11" : {
"keyE111" : ValueE111,
"keyE112" : ValueE112
},
"keyE12" : ValueE12
},
"keyE2" : ValueE2
}
}
What I want:
"data": {
"keyA": {
"keyA1" : {
"keyA11" : ValueA11,
"keyA12" : ValueA12
},
},
"keyB": {
"keyB1" : {
"keyB11" : ValueB11
}
},
"keyC": {
"keyC1" : ValueC1
},
"keyD": {
"keyD1" : {
"keyD11" : ValueD11
},
"keyD2" : {
"keyD21" : ValueD21
}
},
"keyE": {
"keyE1" : {
"keyE11" : {
"keyE111" : ValueE111,
"keyE112" : ValueE112
},
"keyE12" : ValueE12
}
}
}
I tried with update function but it includes all sub-keys.
There's a way to do this in elengant and short way with python?
You can use recursion for that:
def is_leaf(v):
return v == {} or not isinstance(v, dict)
def fill_leaves(schema, data):
return {k: v if is_leaf(schema[k]) else fill_leaves(schema[k], data[k]) for k, v in data.items() if k in schema}
Here is the same function with loop instad of dict comprehension (more readable):
def fill_leaves(schema, data):
d_out = {}
for k, v in data.items():
if k in schema:
if is_leaf(schema[k]):
d_out[k] = v
else:
d_out[k] = fill_leaves(schema[k], data[k])
return d_out
well, let's do a recursive walk on schema tree:
def normalize(schema, data, result):
for k in schema:
value = schema[k]
result[k] = value
if not value or not isinstance(value, dict):
# leaf
if k in data:
result[k] = data[k]
elif k in data:
normalize(schema[k], data[k], result[k])
rs = {}
normalize(schema, data, rs)
print json.dumps(rs, indent=2, sort_keys=True)
Related
I have a python dictionary, where I don't exactly know, how deeply nested it is, but here is an example of such:
{
"name":"a_struct",
"type":"int",
"data":{
"type":"struct",
"elements":[
{
"data":[
{
"name":"test1",
"data_id":0,
"type":"uint8",
"wire_type":0,
"data":0
},
{
"name":"test2",
"data_id":2,
"type":"uint32",
"wire_type":2,
"data":0
},
{
"name":"test3",
"data_id":3,
"type":"int",
"wire_type":4,
"data":{
"type":"uint32",
"elements":[
]
}
},
{
"name":"test4",
"data_id":4,
"type":"uint32",
"wire_type":2,
"data":0
},
{
"name":"test5",
"data_id":5,
"type":"int",
"wire_type":4,
"data":{
"type":"uint32",
"elements":[
]
}
}
]
}
]
}
}
My goal is to filter out each dictionary that does not contains values ["test1", "test3", "test5"] by the name key. This shall be applicable to various deeply nested dictionaries.
So in that case, the result shall be a filtered dictionary:
{
"name":"a_struct",
"type":"int",
"data":{
"type":"struct",
"elements":[
{
"data":[
{
"name":"test1",
"data_id":0,
"type":"uint8",
"wire_type":0,
"data":0
},
{
"name":"test3",
"data_id":3,
"type":"int",
"wire_type":4,
"data":{
"type":"uint32",
"elements":[
]
}
},
{
"name":"test5",
"data_id":5,
"type":"int",
"wire_type":4,
"data":{
"type":"uint32",
"elements":[
]
}
}
]
}
]
}
}
I tried to use the dpath lib (https://pypi.org/project/dpath/), by providing a filter criteria like so:
def afilter(x):
if isinstance(x, dict):
if "name" in x:
if x["name"] in ["test1", "test3", "test5"]:
return True
else:
return False
else:
return False
result = dpath.util.search(my_dict, "**", afilter=afilter)
But I get a wrong result, so every other key, has been filtered out, which is not what I want:
{
"data":{
"elements":[
{
"data":[
{
"name":"test1",
"data_id":0,
"type":"uint8",
"wire_type":0,
"data":0
},
null,
{
"name":"test3",
"data_id":3,
"type":"int",
"wire_type":4,
"data":{
"type":"uint32",
"elements":[
]
}
},
null,
{
"name":"test5",
"data_id":5,
"type":"int",
"wire_type":4,
"data":{
"type":"uint32",
"elements":[
]
}
}
]
}
]
}
}
How to get this right?
PS: I'm not forced to use the dpath lib. So, the solution might be written in pure python.
You can recursively process your dictionary while filtering unneeded records:
def delete_keys(data, keys_to_keep):
res = {}
for k, v in data.items():
if isinstance(v, dict):
res[k] = delete_keys(v, keys_to_keep)
elif isinstance(v, list):
if k == "data":
res[k] = [delete_keys(obj, keys_to_keep) for obj in v if obj.get('name') in keys_to_keep]
else:
res[k] = [delete_keys(obj, keys_to_keep) for obj in v]
else:
res[k] = v
return res
keys_to_keep = {'test1', 'test3', 'test5'}
print(delete_keys(data, keys_to_keep))
For your input, it gives:
{
"name": "a_struct",
"type": "int",
"data": {
"type": "struct",
"elements": [
{
"data": [
{
"name": "test1",
"data_id": 0,
"type": "uint8",
"wire_type": 0,
"data": 0,
},
{
"name": "test3",
"data_id": 3,
"type": "int",
"wire_type": 4,
"data": {"type": "uint32", "elements": []},
},
{
"name": "test5",
"data_id": 5,
"type": "int",
"wire_type": 4,
"data": {"type": "uint32", "elements": []},
},
]
}
],
},
}
Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 2 years ago.
Improve this question
Our crawling team has captured industry information on shopping sites such as.
industry_list = [
{
"parent_ind" : "Clothing",
"name" : "shirt"
},
{
"name": "Clothing"
},
{
"parent_ind" : "Clothing",
"name" : "jeans"
},
{
"parent_ind" : "Clothing",
"name" : "Dress"
},
{
"name": "Digital Products"
},
{
"parent_ind" : "Digital Products",
"name": "computer parts"
},
{
"parent_ind" : "computer parts",
"name": "Hard Disk"
},
]
For ease of access, we would like to be able to convert this to a tree format, for example:
{
"Digital Products": {
"computer parts": {
"Hard Disk" : {}
}
},
"Clothing" : {
"shirt": {},
"jeans": {},
"Dress": {}
}
}
You can do it in O(n) time by initializing a links dictionary with all names plus a None entry for the root. Then add each child to its parent's sub-dictionary (no recursion needed). The root of the links dictionary will point to the final tree:
links = { d["name"]:{} for d in industry_list }
tree = links[None] = dict()
for d in industry_list:
name,parent = d["name"],d.get("parent_ind",None)
links[parent].update({name:links[name]})
output:
print(tree)
{ 'Clothing':
{ 'shirt': {}, 'jeans': {}, 'Dress': {} },
'Digital Products':
{ 'computer parts':
{'Hard Disk': {}}
}
}
Something like this could work:
result = {}
for i in industry_list:
if i.get('parent_ind'):
parent = i.get('parent_ind')
if parent not in result:
result[parent] = {}
del i['parent_ind']
for key, val in result.items():
if parent in val:
result[key][parent][i['name']] = {}
else:
result[parent][i['name']] = {}
key_as_val = any(parent in d.keys() for d in result.values())
if key_as_val is True:
del result[parent]
result:
{
'Clothing': {
'shirt': {},
'jeans': {},
'Dress': {}
},
'Digital Products': {
'computer parts': {
'Hard Disk': {}
}
}
}
This should work I believe... Obviously you'll need to modify it to suit your needs. Right now, you can use this script to print out the accessTree you're after. You'll want to adopt the internal logic and have a python script that dynamically reads your industry lists.
import pprint
pp = pprint.PrettyPrinter(indent=2)
industry_list = [
{
"parent_ind" : "Clothing",
"name" : "shirt"
},
{
"name": "Clothing"
},
{
"parent_ind" : "Clothing",
"name" : "jeans"
},
{
"parent_ind" : "Clothing",
"name" : "Dress"
},
{
"name": "Digital Products"
},
{
"parent_ind" : "Digital Products",
"name": "computer parts"
},
{
"parent_ind" : "computer parts",
"name": "Hard Disk"
}
]
# Initialize an access tree object
accessTree = {}
# Recursive object search function
def _findItem(obj, key):
if key in obj: return obj[key]
for k, v in obj.items():
if isinstance(v, dict):
item = _findItem(v, key)
if item is not None:
return obj[k][key]
# For each item in the industry list
for listItem in industry_list:
# Recursively check if there is a parent industry
if "parent_ind" in listItem.keys():
parentInd = listItem["parent_ind"]
itemName = listItem["name"]
parentObject = _findItem(accessTree, parentInd)
# If so, add it to the object
if parentObject is not None:
parentObject[itemName] = {}
# Otherwise, add it
elif parentObject is None:
accessTree[parentInd] = {}
accessTree[parentInd][itemName]= {}
print("Final Access Tree as follows: \n")
pp.pprint(accessTree)
industry_list = [
{
"parent_ind" : "Clothing",
"name" : "shirt"
},
{
"name": "Clothing"
},
{
"parent_ind" : "Clothing",
"name" : "jeans"
},
{
"parent_ind" : "Clothing",
"name" : "Dress"
},
{
"name": "Digital Products"
},
{
"parent_ind" : "Digital Products",
"name": "computer parts"
},
{
"parent_ind" : "computer parts",
"name": "Hard Disk"
},
]
new_industry_list = {}
where_map = {}
while industry_list:
for i, d in enumerate(industry_list):
if 'parent_ind' not in d: # no parent
the_dict = {}
new_industry_list[d['name']] = the_dict
where_map[d['name']] = the_dict
del industry_list[i]
break
if d['parent_ind'] in where_map: # its parent is in the new dictionary and this is where
the_dict = {}
where_map[d['parent_ind']][d['name']] = the_dict
where_map[d['name']] = the_dict
del industry_list[i]
break
print(new_industry_list)
Prints:
{'Clothing': {'shirt': {}, 'jeans': {}, 'Dress': {}}, 'Digital Products': {'computer parts': {'Hard Disk': {}}}}
I got json dumps like this:
"aaa": {
"bbb": {
"ccc": {
"ddd": "string1",
"eee": "string2"
}
},
"kkk": "string3"
}
And I'd like to format it this way: enclose every key-value pair (separated by :) with {} and then replace : with ,.
I know that I can use re.sub() to replace string patterns, but regular expression does not work with overlapping patterns, so I can match, for example, "ddd": "string1" but not "ccc": {...} at the same time.
For the above json string, I'd like to get:
{"aaa", {
{"bbb", {
{"ccc", {
{"ddd", "string1"},
{"eee", "string2"}
}}
}},
{"kkk", "string3"}
}}
Here's a hack which converts everything to lists and then changes square brackets to curly ones. If your strings might contain square brackets that'll be a problem.
import json
inp = """
{
"aaa": {
"bbb": {
"ccc": {
"ddd": "string1",
"eee": "string2"
}
},
"kkk": "string3"
}
}
"""
inp = json.loads(inp)
def items(d):
if isinstance(d, dict):
return [(k, items(v)) for k, v in d.items()]
return d
inp = items(inp)
print(json.dumps(inp, indent=2).replace("[", "{").replace("]", "}"))
Output:
{
{
"aaa",
{
{
"bbb",
{
{
"ccc",
{
{
"ddd",
"string1"
},
{
"eee",
"string2"
}
}
}
}
},
{
"kkk",
"string3"
}
}
}
}
Note that you are treating dictionary keys as ordered when they aren't, so I made it more explicit with lists.
If it were me, I wouldn't dump to JSON in the first place, I'd serialize the native python data structure straight to C++ initializer list syntax:
myobj = {
"aaa": [
{ "bbb": {
"ccc": [
{"ddd": "string1"},
{"eee": "string2"}
]
}},
{ "kkk": "string3" }
]
}
def pyToCpp(value, key=None):
if key:
return '{{ "{}", {} }}'.format(key, pyToCpp(value))
if type(value) == dict:
for k, v in value.items():
return pyToCpp(v, k)
elif type(value) == list:
l = [pyToCpp(v) for v in value]
return '{{ {} }}'.format(", ".join(l))
else:
return '"{}"'.format(value)
y = pyToCpp(myobj)
print(y)
Output:
{ "aaa", { { "bbb", { "ccc", { { "ddd", "string1" }, { "eee", "string2" } } } }, { "kkk", "string3" } } }
Run it here: https://repl.it/repls/OddFrontUsers
I am trying to sort a nested dictionary using its second key where my dictionary looks like:
my_dictionary = {
"char": {
"3": {
"genman": [
"motion"
]
}
},
"fast": {
"2": {
"empty": []
}
},
"EMPT": {
"0": {}
},
"veh": {
"1": {
"tankers": [
"varA",
"varB"
]
}
}
}
And my expected output will be:
my_dictionary = {
"EMPT": {
"0": {}
},
"veh": {
"1": {
"tankers": [
"varA",
"varB"
]
}
},
"fast": {
"2": {
"empty": []
}
},
"char": {
"3": {
"genman": [
"motion"
]
}
}
}
Tried using the following code:
new_dict = {}
for k, v in my_dictionary.items():
for s in sorted(my_dictionary.itervalues()):
if not s.keys()[0]:
new_val = my_dictionary[k].get(s.keys()[0])
my_dictionary[s.keys()[0]] = new_val
my_dictionary.update(new_dict)
It fails badly, and I am getting the same result as my initial dictionary.
This works:
sorted(my_dictionary.items(), key=lambda x: list(x[1].keys())[0])
Returns:
[('EMPT', {'0': {}}),
('veh', {'1': {'tankers': ['varA', 'varB']}}),
('fast', {'2': {'empty': []}}),
('char', {'3': {'genman': ['motion']}})]
Sorted receives a list of key-value pairs, we sort using the result of lambda x: list(x[1].keys())[0] which takes a list of the keys in the inner dict, then grabs the first key (need to do this because dict_keys directly is not indexable).
Edit: the result is a list of key, value pairs but it can be fed into an OrderedDict to use it as a dict.
actually there is no order for a dict, however you can use OrderedDIct instead.
from collections import OrderedDict
my_dictionary = {
"char": {
"3": {
"genman": [
"motion"
]
}
},
"fast": {
"2": {
"empty": []
}
},
"EMPT": {
"0": {}
},
"veh": {
"1": {
"tankers": [
"varA",
"varB"
]
}
}
}
s = sorted((list(v.keys())[0], k) for k, v in my_dictionary.items())
new_dic = OrderedDict([(k,my_dictionary[k]) for _, k in s])
I am trying to add my sub dictionary element in list. It is giving me type error.
Here is dictionary and my code:
{
"key1": "value1",
"key2": {
"skey1": "svalue2",
"skey2": {
"sskey1": [{
"url": "value",
"sid": "511"
},
{
"url": "value",
"sid": "522"
},
{
"url": "value",
"sid": "533"
}]
}
}
}
I want to add the sid into the list like [511,522,533]:
here is my code:
rsId=[]
for i in op['key2']['skey2']['sskey1']:
for k,v in i.items():
if k=='sid':
rsId.append(v)
D = {
"key1":"value1",
"key2":{
"skey1":"svalue2",
"skey2":{
"sskey1":[{
"url":"value",
"sid":"511"
},
{
"url":"value",
"sid":"522"
},
{
"url":"value",
"sid":"533"
} ]
}
}
}
res = []
for i in D['key2']['skey2']['sskey1']:
res.append(i['sid'])
print res
Result:
['511', '522', '533']
or a one line code:
res = [i['sid'] for i in D['key2']['skey2']['sskey1']]
You can use dict comprehension:
rsId = [v for item in op['key2']['skey2']['sskey1'] for k, v in item.items() if k == 'sid']
You can try with one line something like this:
print(list(map(lambda x:x['sid'],data['key2']['skey2']['sskey1'])))
output:
['511', '522', '533']
If you want value in int then:
print(list(map(lambda x:int(x['sid']),data['key2']['skey2']['sskey1'])))
output:
[511, 522, 533]
when data is:
data = {
"key1":"value1",
"key2":{
"skey1":"svalue2",
"skey2":{
"sskey1":[{
"url":"value",
"sid":"511"
},
{
"url":"value",
"sid":"522"
},
{
"url":"value",
"sid":"533"
} ]
}
}
}
Get the int as output
The type error is probably due to the fact that you get a string as item of the list. Let’s see it transforming it to a number wit int() it solves your problem.
The only change to your code is in the last line of code.
op = {
"key1": "value1",
"key2": {
"skey1": "svalue2",
"skey2": {
"sskey1": [{
"url": "value",
"sid": "511"
},
{
"url": "value",
"sid": "522"
},
{
"url": "value",
"sid": "533"
}]
}
}
}
rsId = []
for i in op['key2']['skey2']['sskey1']:
for k, v in i.items():
if k == 'sid':
rsId.append(int(v)) # put the int here
output
>>> rsId
[511, 522, 533]
Another approach: checking every key that has a dictionary as value
op = {
"key1": "value1",
"key2": {
"skey1": "svalue2",
"skey2": {
"sskey1": [
{
"url": "value",
"sid": "511"
},
{
"url": "value",
"sid": "522"
},
{
"url": "value",
"sid": "533"
}
]
}
}
}
l = []
for k in op: # searching in the main dictonary
if type(op[k]) is dict: # if the value contains a dict (sub1)
for k2 in op[k]: # for every key
if type(op[k][k2]) is dict: # if the value is a dict (sub2)
for k3 in op[k][k2]: # for each key of subdict 2
for i in op[k][k2][k3]: # for every item of the list
for k4 in i: # foreach key in the item (a dict)
if k4 == 'sid': # if the key is 'sid'
l.append(int((i[k4]))) # append the value
print(l)
output
[511, 522, 533]