Filter nested python dict by value

Filter nested python dict by value - python

I have a python dictionary, where I don't exactly know, how deeply nested it is, but here is an example of such:
{
"name":"a_struct",
"type":"int",
"data":{
"type":"struct",
"elements":[
{
"data":[
{
"name":"test1",
"data_id":0,
"type":"uint8",
"wire_type":0,
"data":0
},
{
"name":"test2",
"data_id":2,
"type":"uint32",
"wire_type":2,
"data":0
},
{
"name":"test3",
"data_id":3,
"type":"int",
"wire_type":4,
"data":{
"type":"uint32",
"elements":[
]
}
},
{
"name":"test4",
"data_id":4,
"type":"uint32",
"wire_type":2,
"data":0
},
{
"name":"test5",
"data_id":5,
"type":"int",
"wire_type":4,
"data":{
"type":"uint32",
"elements":[
]
}
}
]
}
]
}
}
My goal is to filter out each dictionary that does not contains values ["test1", "test3", "test5"] by the name key. This shall be applicable to various deeply nested dictionaries.
So in that case, the result shall be a filtered dictionary:
{
"name":"a_struct",
"type":"int",
"data":{
"type":"struct",
"elements":[
{
"data":[
{
"name":"test1",
"data_id":0,
"type":"uint8",
"wire_type":0,
"data":0
},
{
"name":"test3",
"data_id":3,
"type":"int",
"wire_type":4,
"data":{
"type":"uint32",
"elements":[
]
}
},
{
"name":"test5",
"data_id":5,
"type":"int",
"wire_type":4,
"data":{
"type":"uint32",
"elements":[
]
}
}
]
}
]
}
}
I tried to use the dpath lib (https://pypi.org/project/dpath/), by providing a filter criteria like so:
def afilter(x):
if isinstance(x, dict):
if "name" in x:
if x["name"] in ["test1", "test3", "test5"]:
return True
else:
return False
else:
return False
result = dpath.util.search(my_dict, "**", afilter=afilter)
But I get a wrong result, so every other key, has been filtered out, which is not what I want:
{
"data":{
"elements":[
{
"data":[
{
"name":"test1",
"data_id":0,
"type":"uint8",
"wire_type":0,
"data":0
},
null,
{
"name":"test3",
"data_id":3,
"type":"int",
"wire_type":4,
"data":{
"type":"uint32",
"elements":[
]
}
},
null,
{
"name":"test5",
"data_id":5,
"type":"int",
"wire_type":4,
"data":{
"type":"uint32",
"elements":[
]
}
}
]
}
]
}
}
How to get this right?
PS: I'm not forced to use the dpath lib. So, the solution might be written in pure python.

You can recursively process your dictionary while filtering unneeded records:
def delete_keys(data, keys_to_keep):
res = {}
for k, v in data.items():
if isinstance(v, dict):
res[k] = delete_keys(v, keys_to_keep)
elif isinstance(v, list):
if k == "data":
res[k] = [delete_keys(obj, keys_to_keep) for obj in v if obj.get('name') in keys_to_keep]
else:
res[k] = [delete_keys(obj, keys_to_keep) for obj in v]
else:
res[k] = v
return res
keys_to_keep = {'test1', 'test3', 'test5'}
print(delete_keys(data, keys_to_keep))
For your input, it gives:
{
"name": "a_struct",
"type": "int",
"data": {
"type": "struct",
"elements": [
{
"data": [
{
"name": "test1",
"data_id": 0,
"type": "uint8",
"wire_type": 0,
"data": 0,
},
{
"name": "test3",
"data_id": 3,
"type": "int",
"wire_type": 4,
"data": {"type": "uint32", "elements": []},
},
{
"name": "test5",
"data_id": 5,
"type": "int",
"wire_type": 4,
"data": {"type": "uint32", "elements": []},
},
]
}
],
},
}

Related

Writing resilient recursive code that will return results from a big json file

I have written a recursive code. I want more experienced people to tell me how resillient and fail-safe is my code:
I have a json file (Json file can be as big as 300MB):
[
{
"modules": {
"webpages": []
},
"webpages": {
"ip_addr": {
"value": "127.0.0.1",
"tags": []
},
"http": {
"status": {
"value": "Unavailable",
"tags": []
},
"title": {
"value": "403 Forbidden",
"tags": [
{
"category": "Server Code",
"match": "403"
},
{
"category": "Interesting Words",
"match": "Forbidden"
}
]
},
"server": {
"value": "Apache",
"tags": [
{
"category": "Apache Server",
"match": "Apache"
}
]
}
},
"redirects": [],
"robottxt": null
}
},
{
"modules": {
"webpages": []
}
}
]
I want to return value keys where tags are populated.
So I want to ignore:
"status": {
"value": "Unavailable",
"tags": []
},
But I want to return the title and server values. I also want to return ip_addr.value
I have written this code:
def getAllValues(nestedDictionary, firstArray, firstObj, firstUseful):
returnedArray = firstArray
tempValue = firstObj
useful = firstUseful
for key, value in nestedDictionary.items():
ipString = nestedDictionary.get("ip_addr")
if ipString is not None:
ipValue = ipString.get("value")
useful = {"ip_add": ipValue}
if isinstance(value, dict):
temp = {
"Key": key,
"useful": useful,
}
getAllValues(value, returnedArray, temp, useful)
else:
if key == "value":
tempValue["value"] = value
if key == "tags" and isinstance(value, list) and len(value) > 0:
tempValue["tags"] = value
returnedArray.append(tempValue)
return returnedArray
The above code should return:
[
{
"Key": "title",
"value": "403 Forbidden",
"useful": { "ip_addr": "127.0.0.1" },
"tags": [
{
"category": "Server Code",
"match": "403"
},
{
"category": "Interesting Words",
"match": "Forbidden"
}
]
},
{
"Key": "server",
"value": "Apache",
"useful": { "ip_addr": "127.0.0.1" },
"tags": [
{
"category": "Apache Server",
"match": "Apache"
}
]
}
]
Its a long post, but hopefully, someone can give me some assurance :)

How to remove parent json element in python3 if child is object is empty

I'm trying to move data from SQL to Mongo. Here is a challenge I'm facing, if any child object is empty I want to remove parent element. I want till insurance field to be removed.
Here is what I tried:
def remove_empty_elements(jsonData):
if(isinstance(jsonData, list) or isinstance(jsonData,dict)):
for elem in list(jsonData):
if not isinstance(elem, dict) and isinstance(jsonData[elem], list) and elem:
jsonData[elem] = [x for x in jsonData[elem] if x]
if(len(jsonData[elem])==0):
del jsonData[elem]
elif not isinstance(elem, dict) and isinstance(jsonData[elem], dict) and not jsonData[elem]:
del jsonData[elem]
else:
pass
return jsonData
sample data
{
"_id": "30546c62-8ea0-4f1a-a239-cc7508041a7b",
"IsActive": "True",
"name": "Pixel 3",
"phone": [
{
"Bill": 145,
"phonetype": "xyz",
"insurance": [
{
"year_one_claims": [
{
"2020": 200
},
{
},
{
},
{
},
{
}
]
},
{
"year_two_claims": [
{
},
{
},
{
},
{
},
{
}
]
},
]
}
],
"Provider": {
"agent": "aaadd",
}
}
Results should look like that
{
"_id": "30546c62-8ea0-4f1a-a239-cc7508041a7b",
"IsActive": "True",
"name": "Pixel 3",
"phone": [
{
"Bill": 145,
"phonetype": "xyz",
"insurance": [
{
"year_one_claims": [
{
"2020": 200
},
]
},
]
}
],
"Provider": {
"agent": "aaadd",
}
}

Your if statements are kind of confusing. I think you are looking for a recursion:
import json
# define which elements you want to remove:
to_be_deleted = [[], {}, "", None]
def remove_empty_elements(jsonData):
if isinstance(jsonData, list):
jsonData = [new_elem for elem in jsonData
if (new_elem := remove_empty_elements(elem)) not in to_be_deleted]
elif isinstance(jsonData,dict):
jsonData = {key: new_value for key, value in jsonData.items()
if (new_value := remove_empty_elements(value)) not in to_be_deleted}
return jsonData
print(json.dumps(remove_empty_elements(jsonData), indent=4))
Edit/Note: from Python3.8 you can use assignements (:=) in comprehensions
Output:
{
"_id": "30546c62-8ea0-4f1a-a239-cc7508041a7b",
"IsActive": "True",
"name": "Pixel 3",
"phone": [
{
"Bill": 145,
"phonetype": "xyz",
"insurance": [
{
"year_one_claims": [
{
"2020": 200
}
]
}
]
}
],
"Provider": {
"agent": "aaadd"
}
}

Try out this:
data = {
"_id": "30546c62-8ea0-4f1a-a239-cc7508041a7b",
"IsActive": "True",
"name": "Pixel 3",
"phone": [
{
"Bill": 145,
"phonetype": "xyz",
"insurance": [
{
"year_one_claims": [
{
"2020": 200
},
{
},
{
},
{
},
{
}
]
},
{
"year_two_claims": [
{
},
{
},
{
},
{
},
{
}
]
},
]
}
],
"Provider": {
"agent": "aaadd",
}
}
for phn_data in data['phone']:
for ins in phn_data['insurance']:
for key, val in list(ins.items()):
for ins_data in list(val):
if not ins_data:
val.remove(ins_data)
if not val:
del ins[key]
phn_data['insurance'].remove(ins)
print (data)
Output:
{
'_id': '30546c62-8ea0-4f1a-a239-cc7508041a7b',
'IsActive': 'True',
'name': 'Pixel 3',
'phone': [{
'Bill': 145,
'phonetype': 'xyz',
'insurance': [{
'year_one_claims': [{
'2020': 200
}]
}]
}],
'Provider': {
'agent': 'aaadd'
}
}

Create nested dictionary using keypath in Python

There is a nested dictionary with multiple level of keys. The requirements are:
Create nested keys by using keypath if it doesn't exist
Update the value by using the keypath if it exists
For example, this is the dictionary:
{
"animal": {
"dog": {
"type": "beagle"
}
},
"man": {
"name": "john",
"age": 36
},
"plant": {
"fruit": {
"apple": {
"type": "gala"
}
}
}
}
Here are the functions to update the value or append a new nested keypath:
appendDict(["man", "name"], "daniel", json_dict)
appendDict(["computer", "laptop", "maker"], "hp", json_dict)
Here is the expected result:
{
"animal": {
"dog": {
"type": "beagle"
}
},
"man": {
"name": "daniel",
"age": 36
},
"plant": {
"fruit": {
"apple": {
"type": "gala"
}
}
},
"computer": {
"laptop": {
"maker": "hp"
}
}
}
My question is how to implement the appendDict() function in order to support the requirements?
Here is my code so far which doesn't work yet:
json_dict = {
"animal": {"dog": {"type": "beagle"}},
"man": {"name": "john", "age": 36},
"plant": {"fruit": {"apple": {"type": "gala"}}}
}
def appendDict(keys, value, json_dict):
for index, key in enumerate(keys):
if key not in json_dict:
if index == len(keys) - 1:
some_data = {}
some_data[key] = value
json_dict[key] = some_data
else:
some_data = {}
json_dict[key] = some_data
else:
json_dict[key] = value
appendDict(["man", "name"], "daniel", json_dict)
appendDict(["computer", "laptop", "maker"], "hp", json_dict)

You can use recursion by slicing keys at every call:
def appendDict(keys, value, json_dict):
if len(keys) == 1:
json_dict[keys[0]] = value
else:
if keys[0] not in json_dict:
json_dict[keys[0]] = {}
appendDict(keys[1:], value, json_dict[keys[0]])
json_dict = {'animal': {'dog': {'type': 'beagle'}}, 'man': {'name': 'john', 'age': 36}, 'plant': {'fruit': {'apple': {'type': 'gala'}}}}
appendDict(["man", "name"], "daniel", json_dict)
appendDict(["computer", "laptop", "maker"], "hp", json_dict)
import json
print(json.dumps(json_dict, indent=4))
Output:
{
"animal": {
"dog": {
"type": "beagle"
}
},
"man": {
"name": "daniel",
"age": 36
},
"plant": {
"fruit": {
"apple": {
"type": "gala"
}
}
},
"computer": {
"laptop": {
"maker": "hp"
}
}
}

Parse the complex JSON in Python without storing in File

I'm trying the parse the following JSON data without storing it in a file, using Python.
{
"select": {
"value": "s_name"
},
"from": "student",
"where": {
"in": [
"s_id",
{
"select": {
"value": "s_id"
},
"from": "student_course",
"where": {
"in": [
"c_id",
{
"select": {
"value": "c_id"
},
"from": "course",
"where": {
"or": [
{
"and": [
{
"eq": [
"c_name",
{
"literal": "DSA"
}
]
},
{
"eq": [
"c_name",
{
"literal": "dbms"
}
]
}
]
},
{
"eq": [
"c_name",
{
"literal": "algorithm"
}
]
}
]
}
}
]
}
}
]
}
}
I'm using the following code:
import json
x = "JSON Data which is shared above"
y = json.dumps(x)
jsonDict = json.loads(y)
print (jsonDict['where'])
And not sure, how to proceed further, could you please advise, how it can be done?
I want to fetch the value of all objects, especially where clause.

json.dumps() takes an object and encodes it into a JSON string. But you are trying to take a JSON string and decode it into an object (a dict in this case). The method you should be applying against x therefore is json.loads(). You can then convert the resulting dict back into a JSON string, y, with json.dumps():
import json
x = """{
"select": {
"value": "s_name"
},
"from": "student",
"where": {
"in": [
"s_id",
{
"select": {
"value": "s_id"
},
"from": "student_course",
"where": {
"in": [
"c_id",
{
"select": {
"value": "c_id"
},
"from": "course",
"where": {
"or": [
{
"and": [
{
"eq": [
"c_name",
{
"literal": "DSA"
}
]
},
{
"eq": [
"c_name",
{
"literal": "dbms"
}
]
}
]
},
{
"eq": [
"c_name",
{
"literal": "algorithm"
}
]
}
]
}
}
]
}
}
]
}
}"""
jsonDict = json.loads(x) # from string to a dict
print(jsonDict['where'])
y = json.dumps(jsonDict) # from dict back to a string
Prints:
{'in': ['s_id', {'select': {'value': 's_id'}, 'from': 'student_course', 'where': {'in': ['c_id', {'select': {'value': 'c_id'}, 'from': 'course', 'where': {'or': [{'and': [{'eq': ['c_name', {'literal': 'DSA'}]}, {'eq': ['c_name', {'literal': 'dbms'}]}]}, {'eq': ['c_name', {'literal': 'algorithm'}]}]}}]}}]}

manipulating json in python using recursion

All,
I am trying to change the way some json looks by going through and formatting it in the following way:
1. flatten all of the fields lists
2. Then remove the fields lists and replace them with the name : flatten list
Example:
{
"name": "",
"fields": [{
"name": "keys",
"fields": [{
"node-name": "0/0/CPU0"
},
{
"interface-name": "TenGigE0/0/0/47"
},
{
"device-id": "ASR9K-H1902.corp.cisco.com"
}
]
},
{
"name": "content",
"fields": [{
"name": "lldp-neighbor",
"fields": [{
"receiving-interface-name": "TenGigE0/0/0/47"
},
{
"receiving-parent-interface-name": "Bundle-Ether403"
},
{
"device-id": "ASR9K-H1902.corp.cisco.com"
},
{
"chassis-id": "78ba.f975.a64f"
},
{
"port-id-detail": "Te0/1/0/4/0"
},
{
"header-version": 0
},
{
"hold-time": 120
},
{
"enabled-capabilities": "R"
},
{
"platform": ""
}
]
}]
}
]
}
Would turn into:
{
"": [{
"keys": [{
"node-name": "0/0/CPU0",
"interface-name": "TenGigE0/0/0/47",
"device-id": "ASR9K-H1902.corp.cisco.com"
}]
},
{
"content": [{
"lldp-neighbor": [{
"receiving-interface-name": "TenGigE0/0/0/47",
"receiving-parent-interface-name": "Bundle-Ether403",
"device-id": "ASR9K-H1902.corp.cisco.com",
"chassis-id": "78ba.f975.a64f",
"port-id-detail": "Te0/1/0/4/0",
"header-version": 0,
"hold-time": 120,
"enabled-capabilities": "R",
"platform": ""
}]
}]
}
]
}
I have tried the following to get the list flattened:
def _flatten_fields(self, fields_list):
c = {}
for b in [d for d in fields_list if bool(d)]:
c.update(b)
return c
This seems to work but I can't figure out a way to get into the sub levels using recursion, I am saving all flatten lists and names into a new dictionary, is there a way to do it by just manipulating the original dictionary?

This worked on the example you provided:
import json
def flatten(data):
result = dict()
if isinstance(data, dict):
if 'name' in data:
name = data['name']
result[name] = flatten(data['fields'])
else:
key = data.keys()[0]
value = data.values()[0]
result[key] = value
else:
for entry in data:
result.update(flatten(entry))
return result
print json.dumps(flatten(data), indent=4)
Output
{
"": {
"keys": {
"node-name": "0/0/CPU0",
"interface-name": "TenGigE0/0/0/47",
"device-id": "ASR9K-H1902.corp.cisco.com"
},
"content": {
"lldp-neighbor": {
"receiving-interface-name": "TenGigE0/0/0/47",
"receiving-parent-interface-name": "Bundle-Ether403",
"header-version": 0,
"port-id-detail": "Te0/1/0/4/0",
"chassis-id": "78ba.f975.a64f",
"platform": "",
"device-id": "ASR9K-H1902.corp.cisco.com",
"hold-time": 120,
"enabled-capabilities": "R"
}
}
}
}
It doesn't have the extra list layers shown in your expected output, but I don't think you want those.

This worked on the example you provided:
def flatten_fields(fields_list):
c = {}
for item in fields_list:
for key in item:
if key == "fields":
c[item["name"]] = flatten_fields(item["fields"])
elif key != "name":
c[key] = item[key]
break
return [c]
But it works on a list of dictionaries, so you should call it like flatten_fields([data])[0].
The output is:
{
"": [{
"keys": [{
"node-name": "0/0/CP0",
"interface-name": "TenGigE0/0/0/47",
"device-id": "ASR9K-H1902.corp.cisco.com"
}],
"content": [{
"lldp-neighbor": [{
"chassis-id": "78ba.f975.a64f",
"receiving-parent-interface-name": "Bndle-Ether403",
"enabled-capabilities": "R",
"device-id": "ASR9K-H1902.corp.cisco.com",
"hold-time": 120,
"receiving-interface-name": "TenGigE0/0/0/47",
"platform": "",
"header-version": 0,
"port-id-detail": "Te0/1/0/4/0"
}]
}]
}]
}

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Filter nested python dict by value - python

Related

Writing resilient recursive code that will return results from a big json file

How to remove parent json element in python3 if child is object is empty

Create nested dictionary using keypath in Python

Parse the complex JSON in Python without storing in File

manipulating json in python using recursion

Categories

Resources