There is a nested dictionary with multiple level of keys. The requirements are:
Create nested keys by using keypath if it doesn't exist
Update the value by using the keypath if it exists
For example, this is the dictionary:
{
"animal": {
"dog": {
"type": "beagle"
}
},
"man": {
"name": "john",
"age": 36
},
"plant": {
"fruit": {
"apple": {
"type": "gala"
}
}
}
}
Here are the functions to update the value or append a new nested keypath:
appendDict(["man", "name"], "daniel", json_dict)
appendDict(["computer", "laptop", "maker"], "hp", json_dict)
Here is the expected result:
{
"animal": {
"dog": {
"type": "beagle"
}
},
"man": {
"name": "daniel",
"age": 36
},
"plant": {
"fruit": {
"apple": {
"type": "gala"
}
}
},
"computer": {
"laptop": {
"maker": "hp"
}
}
}
My question is how to implement the appendDict() function in order to support the requirements?
Here is my code so far which doesn't work yet:
json_dict = {
"animal": {"dog": {"type": "beagle"}},
"man": {"name": "john", "age": 36},
"plant": {"fruit": {"apple": {"type": "gala"}}}
}
def appendDict(keys, value, json_dict):
for index, key in enumerate(keys):
if key not in json_dict:
if index == len(keys) - 1:
some_data = {}
some_data[key] = value
json_dict[key] = some_data
else:
some_data = {}
json_dict[key] = some_data
else:
json_dict[key] = value
appendDict(["man", "name"], "daniel", json_dict)
appendDict(["computer", "laptop", "maker"], "hp", json_dict)
You can use recursion by slicing keys at every call:
def appendDict(keys, value, json_dict):
if len(keys) == 1:
json_dict[keys[0]] = value
else:
if keys[0] not in json_dict:
json_dict[keys[0]] = {}
appendDict(keys[1:], value, json_dict[keys[0]])
json_dict = {'animal': {'dog': {'type': 'beagle'}}, 'man': {'name': 'john', 'age': 36}, 'plant': {'fruit': {'apple': {'type': 'gala'}}}}
appendDict(["man", "name"], "daniel", json_dict)
appendDict(["computer", "laptop", "maker"], "hp", json_dict)
import json
print(json.dumps(json_dict, indent=4))
Output:
{
"animal": {
"dog": {
"type": "beagle"
}
},
"man": {
"name": "daniel",
"age": 36
},
"plant": {
"fruit": {
"apple": {
"type": "gala"
}
}
},
"computer": {
"laptop": {
"maker": "hp"
}
}
}
Related
I have a python dictionary, where I don't exactly know, how deeply nested it is, but here is an example of such:
{
"name":"a_struct",
"type":"int",
"data":{
"type":"struct",
"elements":[
{
"data":[
{
"name":"test1",
"data_id":0,
"type":"uint8",
"wire_type":0,
"data":0
},
{
"name":"test2",
"data_id":2,
"type":"uint32",
"wire_type":2,
"data":0
},
{
"name":"test3",
"data_id":3,
"type":"int",
"wire_type":4,
"data":{
"type":"uint32",
"elements":[
]
}
},
{
"name":"test4",
"data_id":4,
"type":"uint32",
"wire_type":2,
"data":0
},
{
"name":"test5",
"data_id":5,
"type":"int",
"wire_type":4,
"data":{
"type":"uint32",
"elements":[
]
}
}
]
}
]
}
}
My goal is to filter out each dictionary that does not contains values ["test1", "test3", "test5"] by the name key. This shall be applicable to various deeply nested dictionaries.
So in that case, the result shall be a filtered dictionary:
{
"name":"a_struct",
"type":"int",
"data":{
"type":"struct",
"elements":[
{
"data":[
{
"name":"test1",
"data_id":0,
"type":"uint8",
"wire_type":0,
"data":0
},
{
"name":"test3",
"data_id":3,
"type":"int",
"wire_type":4,
"data":{
"type":"uint32",
"elements":[
]
}
},
{
"name":"test5",
"data_id":5,
"type":"int",
"wire_type":4,
"data":{
"type":"uint32",
"elements":[
]
}
}
]
}
]
}
}
I tried to use the dpath lib (https://pypi.org/project/dpath/), by providing a filter criteria like so:
def afilter(x):
if isinstance(x, dict):
if "name" in x:
if x["name"] in ["test1", "test3", "test5"]:
return True
else:
return False
else:
return False
result = dpath.util.search(my_dict, "**", afilter=afilter)
But I get a wrong result, so every other key, has been filtered out, which is not what I want:
{
"data":{
"elements":[
{
"data":[
{
"name":"test1",
"data_id":0,
"type":"uint8",
"wire_type":0,
"data":0
},
null,
{
"name":"test3",
"data_id":3,
"type":"int",
"wire_type":4,
"data":{
"type":"uint32",
"elements":[
]
}
},
null,
{
"name":"test5",
"data_id":5,
"type":"int",
"wire_type":4,
"data":{
"type":"uint32",
"elements":[
]
}
}
]
}
]
}
}
How to get this right?
PS: I'm not forced to use the dpath lib. So, the solution might be written in pure python.
You can recursively process your dictionary while filtering unneeded records:
def delete_keys(data, keys_to_keep):
res = {}
for k, v in data.items():
if isinstance(v, dict):
res[k] = delete_keys(v, keys_to_keep)
elif isinstance(v, list):
if k == "data":
res[k] = [delete_keys(obj, keys_to_keep) for obj in v if obj.get('name') in keys_to_keep]
else:
res[k] = [delete_keys(obj, keys_to_keep) for obj in v]
else:
res[k] = v
return res
keys_to_keep = {'test1', 'test3', 'test5'}
print(delete_keys(data, keys_to_keep))
For your input, it gives:
{
"name": "a_struct",
"type": "int",
"data": {
"type": "struct",
"elements": [
{
"data": [
{
"name": "test1",
"data_id": 0,
"type": "uint8",
"wire_type": 0,
"data": 0,
},
{
"name": "test3",
"data_id": 3,
"type": "int",
"wire_type": 4,
"data": {"type": "uint32", "elements": []},
},
{
"name": "test5",
"data_id": 5,
"type": "int",
"wire_type": 4,
"data": {"type": "uint32", "elements": []},
},
]
}
],
},
}
I get an input like this:
input 1:
{
"name": "Ben",
"description": "Ben",
"attributes": [
{
"type": "Background",
"value": "Default"
},
{
"type": "Hair-color",
"value": "Brown"
}
]
}
input 2
{
"name": "Ice",
"description": "Ice",
"attributes": [
{
"type": "Background",
"value": "Green"
},
{
"type": "Hair-color",
"value": "White"
}
]
}
input 3
{
"name": "Itay",
"description": "Itay",
"attributes": [
{
"type": "Background",
"value": "Default"
},
{
"type": "Hair-color",
"value": "Brown"
}
]
}
What I want to do is count the amount of each type of background and each type of hair-color appearing.
(These are sample examples and in reality there are more types and different values)
Let's say in these examples we have 2 objects that have a background as default then I want to have a count of that like so:
Backround default count=2
hair-color brown = 2
background green = 1
hair-color white = 1
I want the most effective code because there are other aspects to the code, in addition it will run on thousands of queries not just two, so needs to run in good times too :D
My code so far:
import requests
import json
from collections import Counter
from collections import defaultdict
from time import sleep
attributes = []
test_dict = defaultdict(list)
for i in range(min_id, max_id+1):
api = 'api/v1/test/{}'.format(i)
response = requests.get(api)
item_dict = json.loads(response.text)
for item in item_dict['attributes']:
attributes.append(item["trait_type"]) if item["trait_type"] not in attributes else attributes
test_dict[item["trait_type"]].append(item["value"])
sleep(0.02)
for attribute in attributes:
print(attribute)
print(Counter(test_dict[attribute]))
This should work for you:
def constract_data(data_dict):
output = []
total_count = 0
for data in data_dict:
attributes = data["attributes"]
for attribute in attributes:
total_count += 1
dict_key = attribute["type"].lower()
dict_value = attribute["value"].lower()
dict_index = [index for index, data in enumerate(output) if data.get(dict_key, "") == dict_value]
if dict_index:
output[dict_index[0]]['count'] += 1
else:
atb_dict = {dict_key: dict_value, 'count': 1}
output.append(atb_dict)
return output, total_count
def calculate_occurrence_ratio(data_dict, total_count):
for index, data in enumerate(data_dict):
count = data.get('count', 0)
ratio = round(((count / total_count) * 100), 2)
data['ratio'] = f'{ratio}%'
return data_dict
data_dict = [
{
"name":"Ice",
"description":"Ice",
"attributes":[
{
"type":"Background",
"value":"Green"
},
{
"type":"Hair-color",
"value":"White"
}
]
},
{
"name":"Ben",
"description":"Ben",
"attributes":[
{
"type":"Background",
"value":"Default"
},
{
"type":"Hair-color",
"value":"Brown"
}
]
},
{
"name":"Itay",
"description":"Itay",
"attributes":[
{
"type":"Background",
"value":"Default"
},
{
"type":"Hair-color",
"value":"Brown"
}
]
}
]
output_data, total_count = constract_data(data_dict)
output_data = calculate_occurrence_ratio(output_data, total_count)
print(output_data)
Output:
[{'background': 'green', 'count': 1, 'ratio': '16.67%'}, {'hair-color': 'white', 'count': 1, 'ratio': '16.67%'}, {'background': 'default', 'count': 2, 'ratio': '33.33%'}, {'hair-color': 'brown', 'count': 2, 'ratio': '33.33%'}]
this solution will work for you:
list_data = [
{
"name": "Ice",
"description": "Ice",
"attributes": [
{
"type": "Background",
"value": "Green"
},
{
"type": "Hair-color",
"value": "White"
},
{
"type": "other",
"value": "White"
}
]
},
{
"name": "Ben",
"description": "Ben",
"attributes": [
{
"type": "Background",
"value": "Default"
},
{
"type": "Hair-color",
"value": "Brown"
}
]
},{
"name": "Itay",
"description": "Itay",
"attributes": [
{
"type": "Background",
"value": "Default"
},
{
"type": "Hair-color",
"value": "Brown"
}
]
},
]
output = {}
all_count = {}
for user in list_data:
data = user["attributes"]
for dat in data:
typeu = dat["type"]
if typeu not in all_count:
all_count[typeu]=1
else:
all_count[typeu]+=1
for user in list_data:
data = user["attributes"]
for dat in data:
typeu = dat["type"]
if typeu not in output:
output[typeu]={}
if dat["value"] not in output[typeu]:
output[typeu][dat["value"]] = "1 with: {}%".format(int(1/all_count[typeu]*100))
else:
count = int(output[typeu][dat["value"]][0])+1
output[typeu][dat["value"]] = str(count)+" with: {}%".format(int(count/all_count[typeu]*100))
print(output)
Here is another approach:
from collections import defaultdict
data_dict = [
{
"name":"Ice",
"description":"Ice",
"attributes":[
{
"type":"Background",
"value":"Green"
},
{
"type":"Hair-color",
"value":"White"
}
]
},
{
"name":"Ben",
"description":"Ben",
"attributes":[
{
"type":"Background",
"value":"Default"
},
{
"type":"Hair-color",
"value":"Brown"
}
]
},
{
"name":"Itay",
"description":"Itay",
"attributes":[
{
"type":"Background",
"value":"Default"
},
{
"type":"Hair-color",
"value":"Brown"
}
]
}
]
out = defaultdict(lambda: defaultdict(int))
tot_count = defaultdict(int)
for data in data_dict:
for attri in data['attributes']:
tot_count[attri['type']]+=1
out[attri['type']][attri['value']]+=1
for k, v in out.items():
for k1, v1 in v.items():
print (f'{k.lower()} {k1} count={v1} ratio={int(v1*100/tot_count[k])}%')
Output:
background Green count=1 ratio=33%
background Default count=2 ratio=66%
hair-color White count=1 ratio=33%
hair-color Brown count=2 ratio=66%
Expected Query Output
food = {
'fruit': ['apple', 'banana', 'cherry'],
'vegetables': ['onion', 'cucumber'],
}
Data Format in Database
[{
"category": "fruit",
"name": "banana"
}, {
"category": "fruit",
"name": "apple"
}, {
"category": "fruit",
"name": "cherry"
}, {
"category": "vegetables",
"name": "onion"
}, {
"category": "vegetables",
"name": "cucumber"
}]
Basically, I need to fetch distinct category and list of names against it from mongodb.
TIA
db.collection.aggregate([{
"$group": {
"_id": "$category",
"list": {
"$addToSet": "$name"
}
}
},
{
"$addFields": {
"array": [{
"k": "$_id",
"v": "$list"
}]
}
},
{
"$replaceRoot": {
"newRoot": {
"$arrayToObject": "$array"
}
}
}
])
Working eg. https://mongoplayground.net/p/bccPDlORK7W
I need to check if all the keys declared in a schema file are present in a dictionary and if they are not, I need to fill them with a default value, of a given type. I need to do this dynamically because the structure can be even more complicated than the one below.
{
"type": "object",
"properties": {
"vid": {
"type": ["null", "integer"]
},
"merged-vids": {
"type": ["null", "array"],
"items": {
"type": ["null", "integer"]
}
},
"portal-id": {
"type": ["null", "integer"]
},
"is-contact": {
"type": ["null", "boolean"]
}
"form-submissions": {
"type": ["null", "array"],
"items": {
"type": ["null", "object"],
"properties": {
"conversion-id": {
"type": ["null", "string"]
},
"timestamp": {
"type": ["null", "string"],
"format": "date-time"
},
"form-id": {
"type": ["null", "string"]
},
"portal-id": {
"type": ["null", "integer"]
},
"page-url": {
"type": ["null", "string"]
},
"title": {
"type": ["null", "string"]
}
}
}
}
}
}
This is an example of dictionary:
{
"vid": 1000,
"portal-id": 2512,
"is-contact": true,
"profile-token": "dummy_profile_token",
"profile-url": "dummy_profile_url",
"form-submissions": [
{
"conversion-id": "127-798",
"timestamp": 1484080167266,
"form-id": "70fd-4b98-14796-777",
"page-url": "https://example.com/landing-page-url",
"title": "A new test form",
"meta-data": []
}
]
}
I am also new to python and this is a bit too much.
This is what I tried, but I cannot figure out what to do.
def get_default(type_object):
if type_object == 'object':
new_dict = {}
elif type_object == 'array':
return []
else:
return ''
def fill_fields_with_empty_str(record, schema):
if isinstance(schema['type'], list):
type_obj = schema['type'][len(schema['type'])-1]
elif isinstance(schema['type'], str):
type_obj = schema['type']
if type_obj == 'object':
new_dict = {}
for key in schema['properties'].keys():
if not record.get(key):
record[key] = get_default(schema['properties'][key]['type'])
new_dict[key] = fill_fields_with_empty_str(record[key], schema['properties'][key])
return new_dict
elif type_obj == 'array':
new_list = []
type_obj = schema["items"]['type']
if len(record) == 0:
record = get_default(type_obj)
for element in schema["items"]:
new_list.append(fill_fields_with_empty_str(record, schema['items']))
return new_list
else:
return ''
All,
I am trying to change the way some json looks by going through and formatting it in the following way:
1. flatten all of the fields lists
2. Then remove the fields lists and replace them with the name : flatten list
Example:
{
"name": "",
"fields": [{
"name": "keys",
"fields": [{
"node-name": "0/0/CPU0"
},
{
"interface-name": "TenGigE0/0/0/47"
},
{
"device-id": "ASR9K-H1902.corp.cisco.com"
}
]
},
{
"name": "content",
"fields": [{
"name": "lldp-neighbor",
"fields": [{
"receiving-interface-name": "TenGigE0/0/0/47"
},
{
"receiving-parent-interface-name": "Bundle-Ether403"
},
{
"device-id": "ASR9K-H1902.corp.cisco.com"
},
{
"chassis-id": "78ba.f975.a64f"
},
{
"port-id-detail": "Te0/1/0/4/0"
},
{
"header-version": 0
},
{
"hold-time": 120
},
{
"enabled-capabilities": "R"
},
{
"platform": ""
}
]
}]
}
]
}
Would turn into:
{
"": [{
"keys": [{
"node-name": "0/0/CPU0",
"interface-name": "TenGigE0/0/0/47",
"device-id": "ASR9K-H1902.corp.cisco.com"
}]
},
{
"content": [{
"lldp-neighbor": [{
"receiving-interface-name": "TenGigE0/0/0/47",
"receiving-parent-interface-name": "Bundle-Ether403",
"device-id": "ASR9K-H1902.corp.cisco.com",
"chassis-id": "78ba.f975.a64f",
"port-id-detail": "Te0/1/0/4/0",
"header-version": 0,
"hold-time": 120,
"enabled-capabilities": "R",
"platform": ""
}]
}]
}
]
}
I have tried the following to get the list flattened:
def _flatten_fields(self, fields_list):
c = {}
for b in [d for d in fields_list if bool(d)]:
c.update(b)
return c
This seems to work but I can't figure out a way to get into the sub levels using recursion, I am saving all flatten lists and names into a new dictionary, is there a way to do it by just manipulating the original dictionary?
This worked on the example you provided:
import json
def flatten(data):
result = dict()
if isinstance(data, dict):
if 'name' in data:
name = data['name']
result[name] = flatten(data['fields'])
else:
key = data.keys()[0]
value = data.values()[0]
result[key] = value
else:
for entry in data:
result.update(flatten(entry))
return result
print json.dumps(flatten(data), indent=4)
Output
{
"": {
"keys": {
"node-name": "0/0/CPU0",
"interface-name": "TenGigE0/0/0/47",
"device-id": "ASR9K-H1902.corp.cisco.com"
},
"content": {
"lldp-neighbor": {
"receiving-interface-name": "TenGigE0/0/0/47",
"receiving-parent-interface-name": "Bundle-Ether403",
"header-version": 0,
"port-id-detail": "Te0/1/0/4/0",
"chassis-id": "78ba.f975.a64f",
"platform": "",
"device-id": "ASR9K-H1902.corp.cisco.com",
"hold-time": 120,
"enabled-capabilities": "R"
}
}
}
}
It doesn't have the extra list layers shown in your expected output, but I don't think you want those.
This worked on the example you provided:
def flatten_fields(fields_list):
c = {}
for item in fields_list:
for key in item:
if key == "fields":
c[item["name"]] = flatten_fields(item["fields"])
elif key != "name":
c[key] = item[key]
break
return [c]
But it works on a list of dictionaries, so you should call it like flatten_fields([data])[0].
The output is:
{
"": [{
"keys": [{
"node-name": "0/0/CP0",
"interface-name": "TenGigE0/0/0/47",
"device-id": "ASR9K-H1902.corp.cisco.com"
}],
"content": [{
"lldp-neighbor": [{
"chassis-id": "78ba.f975.a64f",
"receiving-parent-interface-name": "Bndle-Ether403",
"enabled-capabilities": "R",
"device-id": "ASR9K-H1902.corp.cisco.com",
"hold-time": 120,
"receiving-interface-name": "TenGigE0/0/0/47",
"platform": "",
"header-version": 0,
"port-id-detail": "Te0/1/0/4/0"
}]
}]
}]
}