Create tree structure from csv data for collapse menu - python

from collections import defaultdict
import pandas as pd
df = pd.DataFrame(data)
d = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(list))))
for row in df.itertuples():
d[row[1]][row[2]][row[3]][row[4]].append(row[5])
d = json.dumps(d)
d = json.loads(d)
I have this code that reads some csv data as a dataframe. I found a way to create a tree-like structure using defaultdict, and then looping through the df and populating it. The data will always have the same depth
Analyzer: Mode: SubMode: Filename: Signal
The result looks like this:
{
"Analyzer1": {
"Mode1": { "SubMode1": { "filename2": ["Signal1"] } },
"Mode2": {
"SubMode2": {
"filename1": [
"Signal2",
"Signal3"
]
}
},
"Mode3": {
"SubMode1": {
"filename1": ["Signal2"]
},
"SubMode3": {
"filename1": ["Signal3"]
}
}
},
"Analyzer2": {
"Mode1": {
"SubMode4": {
"filename1": ["Signal2"]
}
}
}
}
This is great, but I'm obligated to do some changes to the final result, as this tree will be used with a react package for displaying collapsible menus (react-checkbox-tree: https://www.npmjs.com/package/react-checkbox-tree)
The package uses a similar structure, the difference is that each level has additional properties, and it's children (if any) are in a list. This is the output that I am trying to achieve.
[
{
"label": "Analyzer1",
"value": "analyzer1",
"children": [
{
"label": "Mode1",
"value": "analyzer1/mode1",
"children": [
{
"label": "SubMode1",
"value": "analyzer1/mode1/submode1",
"children": [
{
"label": "Filename2",
"value": "analyzer1/mode1/submode1/filename2",
"children": [
{
"label": "Signal1",
"value": "analyzer1/mode1/submode1/filename2/signal1"
}
]
}
]
}
]
},
{
"label": "Mode2",
"value": "analyzer1/mode2",
"children": [
{
"label": "SubMode2",
"value": "analyzer1/mode2/submode2",
"children": [
{
"label": "Filename1",
"value": "analyzer1/mode2/submode2/filename1",
"children": [
{
"label": "Signal2",
"value": "analyzer1/mode2/submode2/filename1/signal2"
},
{
"label": "Signal3",
"value": "analyzer1/mode2/submode2/filename1/signal3"
}
]
}
]
}
]
},
{
"label": "Mode3",
"value": "analyzer1/mode3",
"children": [
{
"label": "SubMode1",
"value": "analyzer1/mode3/submode1",
"children": [
{
"label": "Filename1",
"value": "analyzer1/mode3/submode1/filename1",
"children": [
{
"label": "Signal2",
"value": "analyzer1/mode3/submode1/filename1/signal2"
}
]
}
]
},
{
"label": "SubMode3",
"value": "analyzer1/mode3/submode3",
"children": [
{
"label": "Filename1",
"value": "analyzer1/mode3/submode3/filename1",
"children": [
{
"label": "Signal3",
"value": "analyzer1/mode3/submode3/filename1/signal3"
}
]
}
]
}
]
}
]
},
{
"label": "Analyzer2",
"value": "analyzer2",
"children": [
{
"label": "Mode1",
"value": "analyzer2/mode1",
"children": [
{
"label": "SubMode4",
"value": "analyzer2/mode1/submode4",
"children": [
{
"label": "Filename1",
"value": "analyzer2/mode1/submode4/filename1",
"children": [
{
"label": "Signal2",
"value": "analyzer2/mode1/submode4/filename1/signal2"
}
]
}
]
}
]
}
]
}
]
I've tried the following, but it is incomplete, I can't find a way to add the children to the parent node.
def adjust(d, res, parent, children, path):
for k, v in d.items():
if(not isinstance(v, list)):
path = path + k.lower() + '/'
parent['value'] = k.lower()
parent['label'] = k
adjust(v, res, parent['children'][0], path)
else:
parent['children'] = []
res.append(parent)
adjust(d, [], {}, [], '')
Any suggestions or pointers would be appreciated, I'm not very skilled at doing recursion.

Your recursive function should not have that many arguments. Let it build its subtree autonomously, only needing the path as extra information from the caller. And make it functional, so that it returns the result for the subtree it deals with. That way the caller can inject the result in its own children attributes.
Here is how that could work:
def maketree(d, path=""):
if isinstance(d, list):
return [{
"label": k,
"value": path + k.lower()
} for k in d]
else:
return [{
"label": k,
"value": path + k.lower(),
"children": maketree(v, path + k.lower() + "/")
} for k, v in d.items()]
Call it as:
tree = maketree(d)

Related

Change structure of python dictionary using recursion

I have the following dictionary:
{
"Land": {
"2018": {
"VALUE:Avg": 49.0,
"VALUE:Sum": 49.0
},
"2008": {
"VALUE:Avg": 27.24,
"VALUE:Sum": 27.24
}
},
"Air": {
"2010": {
"VALUE:Avg": 57.4,
"VALUE:Sum": 57.4
},
"2017": {
"VALUE:Avg": 30.72,
"VALUE:Sum": 61.44
}
}
}
I have to change it to following format with parent keys as labels and the values as children:
[
{
"label": "Land",
"children": [
{
"label": "2018",
"children": [
{
"label": "VALUE:Avg"
},
{
"label": "VALUE:Sum"
}
]
},
{
"label": "2008",
"children": [
{
"label": "VALUE:Avg"
},
{
"label": "VALUE:Sum"
}
]
}
]
},
]
I tried to achieve this recursion but not working
Recursion should work:
def transfer(mydict):
result = []
for key, value in mydict.items():
temp = {"label":key}
if isinstance(value, dict):
temp["children"] = transfer(value)
result.append(temp)
return result

Writing resilient recursive code that will return results from a big json file

I have written a recursive code. I want more experienced people to tell me how resillient and fail-safe is my code:
I have a json file (Json file can be as big as 300MB):
[
{
"modules": {
"webpages": []
},
"webpages": {
"ip_addr": {
"value": "127.0.0.1",
"tags": []
},
"http": {
"status": {
"value": "Unavailable",
"tags": []
},
"title": {
"value": "403 Forbidden",
"tags": [
{
"category": "Server Code",
"match": "403"
},
{
"category": "Interesting Words",
"match": "Forbidden"
}
]
},
"server": {
"value": "Apache",
"tags": [
{
"category": "Apache Server",
"match": "Apache"
}
]
}
},
"redirects": [],
"robottxt": null
}
},
{
"modules": {
"webpages": []
}
}
]
I want to return value keys where tags are populated.
So I want to ignore:
"status": {
"value": "Unavailable",
"tags": []
},
But I want to return the title and server values. I also want to return ip_addr.value
I have written this code:
def getAllValues(nestedDictionary, firstArray, firstObj, firstUseful):
returnedArray = firstArray
tempValue = firstObj
useful = firstUseful
for key, value in nestedDictionary.items():
ipString = nestedDictionary.get("ip_addr")
if ipString is not None:
ipValue = ipString.get("value")
useful = {"ip_add": ipValue}
if isinstance(value, dict):
temp = {
"Key": key,
"useful": useful,
}
getAllValues(value, returnedArray, temp, useful)
else:
if key == "value":
tempValue["value"] = value
if key == "tags" and isinstance(value, list) and len(value) > 0:
tempValue["tags"] = value
returnedArray.append(tempValue)
return returnedArray
The above code should return:
[
{
"Key": "title",
"value": "403 Forbidden",
"useful": { "ip_addr": "127.0.0.1" },
"tags": [
{
"category": "Server Code",
"match": "403"
},
{
"category": "Interesting Words",
"match": "Forbidden"
}
]
},
{
"Key": "server",
"value": "Apache",
"useful": { "ip_addr": "127.0.0.1" },
"tags": [
{
"category": "Apache Server",
"match": "Apache"
}
]
}
]
Its a long post, but hopefully, someone can give me some assurance :)

JSON dump appending random characters to end of file

I am writing a parser that goes through a list of data that is roughly formatted:
{
"teachers": [
{
"fullName": "Testing",
"class": [
{
"className": "Counselor",
"school": {
"id": "2b6671cb-617d-48d6-b0b5-3d44ce4da21c"
}
}
]
},
...
}
The parser is supposed to check for duplicate names within this json object, and when it stumbles upon said duplicate name, append the class to the class array.
So for example:
{
"teachers": [
{
"fullName": "Testing",
"class": [
{
"className": "Counselor",
"school": {
"id": "2b6671cb-617d-48d6-b0b5-3d44ce4da21c"
}
}
]
},
{
"fullName": "Testing",
"class": [
{
"className": "Math 8",
"school": {
"id": "2b6671cb-617d-48d6-b0b5-3d44ce4da21c"
}
}
]
},
...
}
Would return
{
"teachers": [
{
"fullName": "Testing",
"class": [
{
"className": "Counselor",
"school": {
"id": "2b6671cb-617d-48d6-b0b5-3d44ce4da21c"
}
},
{
"className": "Math 8",
"school": {
"id": "2b6671cb-617d-48d6-b0b5-3d44ce4da21c"
}
},
]
},
...
}
My current parser works just fine for most objects, however for some reason it doesn't catch some of the duplicates despite the names being the exact same, and also is appending the string
}7d-48d6-b0b5-3d44ce4da21c"
}
}
]
}
]
to the end of the json document. I am not sure why it would do this considering I am just dumping the modified json (which only is modified within the array).
My parser code is:
i_duplicates = []
name_duplicates = []
def converter():
global i_duplicates
file = open("final2.json", "r+")
infinite = json.load(file)
for i, teacher in enumerate(infinite["teachers"]):
class_name = teacher["class"][0]["className"]
class_data = {
"className": class_name,
"school": {
"id": "2b6671cb-617d-48d6-b0b5-3d44ce4da21c"
}
}
d = {
"fullName": teacher["fullName"],
"index": i
}
c = {
"fullName": teacher["fullName"]
}
if c in name_duplicates:
infinite["teachers"][search(i_duplicates, c["fullName"])]["class"].append(class_data)
infinite["teachers"].pop(i)
file.seek(0)
json.dump(infinite, file, indent=4)
else:
i_duplicates.append(d)
name_duplicates.append(c)
def search(a, t):
for i in a:
if i["fullName"] == t:
return i["index"]
print(Fore.RED + "not found" + Fore.RESET)
I know I am going about this inefficiently, but I am not sure how to fix the issues the current algorithm is having. Any feedback appreciated.

Search for key in Nested Json in Python

I have below Json.
My requirement is to search this Json and get 'id' value if 'name' is equal to 'Latisha Chase'
[
{
"_id": "5d3121cd001453772160a791",
"friends": [
{
"id": 6,
"name": "Mcknight Tran"
},
{
"id": 7,
"name": "Helena Bowers"
},
{
"id": 8,
"name": "Dorsey Ayala"
}
]
},
{
"_id": "5d3121cd838efa513e7dda96",
"friends": [ {
"friends": [
{
"id": 90,
"name": "w Stark"
},
{
"id": 91,
"name": "w Jacobs"
},
{
"id": 93,
"name": "w Garner"
}
]},
{
"id": 10,
"name": "Amalia Stark"
},
{
"id": 11,
"name": "Myra Jacobs"
},
{
"id": 12,
"name": "Norton Garner"
}
]
}
]
This is sample code that I have. Could anyone help me with this.?
I tried recursive codes online but didn't work with my example here.
Update:
Its not necessary that 'friends' will have single depth. it can have friends inside friends. ex: friends [{ friends[ {}]}]
A more general approach using recursion:
def recursive_function(name, l):
if isinstance(l,list):
for i in l:
recursive_function(name, i)
elif isinstance(l,dict):
if l.get("name") == name:
print (l.get("id"))
for v in l.values():
if isinstance(v, list) or isinstance(v, dict):
recursive_function(name, v)
recursive_function("Latisha Chase",json_obj)
Result:
3
Try this
j = [{
"_id": "5d3121cd001453772160a791",
"friends": [{
"id": 6,
"name": "Mcknight Tran"
},
{
"id": 7,
"name": "Helena Bowers"
},
{
"id": 8,
"name": "Dorsey Ayala"
}
]
},
{
"_id": "5d3121cded44d8ba6ad96b78",
"friends": [{
"id": 2,
"name": "June Gilbert"
},
{
"id": 3,
"name": "Latisha Chase"
},
{
"id": 4,
"name": "Franco Carlson"
}
]
},
{
"_id": "5d3121cd838efa513e7dda96",
"friends": [{
"id": 10,
"name": "Amalia Stark"
},
{
"id": 11,
"name": "Myra Jacobs"
},
{
"id": 12,
"name": "Norton Garner"
}
]
}
]
for x in j:
for y in x.get('friends'):
if y.get('name') == 'Latisha Chase':
print y.get('id')

Flatten nested JSON arrays with inherits properties in Python

I have a big json/dictionary with different levels of nested json arrays, I would like to flatten it, and also capture the relationship of the structure,
Part of my json looks like:
{
"name": "root",
"type": "all",
"children": [
{
"name": "properties",
"type": "feature",
"children": [
{
"name": "print",
"type": "feature",
"children": [
{
"name": "graphic print",
"type": "feature",
"inherits": true
},
{
"name": "striped print",
"type": "feature",
"inherits": true,
"children": [
{
"name": "pinstriped",
"type": "feature",
"inherits": true
},
{
"name": "light stripe",
"type": "feature",
"inherits": true
},
{
"name": "wide stripe",
"type": "feature",
"inherits": true
}
]
}
]
}
]
},
{
"name": "colours",
"type": "colour",
"children": [
{
"name": "main colours",
"type": "colour",
"children": [
{
"name": "black",
"type": "colour",
"children": [
{
"name": "light black",
"type": "colour",
"inherits": true
},
{
"name": "blue black",
"type": "colour",
"inherits": true
}
]
},
{
"name": "red",
"type": "colour",
"children": [
{
"name": "bright red",
"type": "colour",
"inherits": true
},
{
"name": "light red",
"type": "colour"
}
]
}
]
}
]
},
{
"name": "genders",
"type": "gender",
"children": [
{
"name": "female",
"type": "gender"
},
{
"name": "male",
"type": "gender"
}
]
}
]
}
The depth of nests is not all the same. I
- want all the nodes (values of "name")
- also want all its parents if the node has "Inherit" key of True value.
Something like:
But if there are better ideas on how to store this data, will be happy to accept as well!
Many Thanks!
I think this should do your need
def parse_dict_of_dict(_dict, _parent = '', ret_dict={}):
_name, _children, _inherit = _dict["name"], _dict.get('children', None), _dict.get('inherits', False)
if _children is not None:
if isinstance(_children, list):
for _child in _children:
parse_dict_of_dict(_child, _name+ ', ' + _parent if _inherit else _name , ret_dict)
ret_dict[ _name] = _parent.strip(' ').strip(',') if _inherit else None
return ret_dict
Can you elaborate more on your output?
OR you can use this function to flatten a nested JSON to a simple JSON.
def parse_dict_of_dict(_dict, _str = ''):
ret_dict = {}
for k, v in _dict.iteritems():
if isinstance(v, dict):
ret_dict.update(parse_dict_of_dict(v, _str= _str+k+'_'))
elif isinstance(v, list):
for index, item in enumerate(v):
if isinstance(item, dict):
ret_dict.update(parse_dict_of_dict(item, _str=_str+k+'_%d_'%(index)))
else:
ret_dict.update({k+'_%d'%(index): item})
else:
try:
ret_dict[_str + k] = str(v)
except Exception as e:
ret_dict[_str + k] = unicode.encode(v, errors='ignore')
return ret_dict

Categories

Resources