I have links in hierarchical form like this.
root/Arts/
root/Arts/Literature/
root/Arts/Literature/Novels/
root/Arts/Literature/Comics/
root/Sports/
root/Sports/Football/
...
I want to plot them and visualize the tree but the tree goes very deep with too many links.
I am not able to view this tree more than 3 levels when using pydot/graphviz.
I want to convert this to a dictionary key value pairing with children
like this
[
{
"name": "root",
"parent": "null",
"children": [
{
"name": "Arts",
"parent": "root",
"children": [
{
"name": "Literature",
"parent": "Arts",
"children": [
{
"name": "Novels",
"parent": "Literature"
},
{
"name": "Comics",
"parent": "Literature"
}
]
}
]
},
{
"name": "Sports",
"parent": "root",
"children": [
{
"name": "Football",
"parent": "Sports"
}
]
}
]
}
]
to plot this into a d3.js interactive tree
EDIT
This worked for me -
def add_to_tree(name, parent, start_tree):
for x in start_tree:
if x["name"] == parent:
x["children"].append({"name":name, "parent":parent, "children":[]})
else:
add_to_tree(name, parent, x["children"])
def dic_converter_single_root(sorted_list):
start_tree = [{"name":"root", "parent":"null", "children":[]}]
for x in sorted_list:
name = x.split('/')[-2]
parent = x.split('/')[-3]
add_to_tree(name, parent, start_tree)
Related
I am trying to build a py script that converts a list of paths into a json object as below; the output of the script should be structured as below.
json_out is a list of dictionaries that have four elements (1)type (2)name (3)path and (4)children
json_out = [
{
"type": "folder",
"name": "dir1",
"path": "/dir1",
"children": [
{
"type": "folder",
"name": "photos",
"path": "/dir1/photos",
"children": [
{
"type": "file",
"name": "mydir1.pdf",
"path": "/dir1/photos/mydir1.pdf"
},
{
"type": "file",
"name": "yourdir1.pdf",
"path": "/dir1/photos/yourdir1.pdf"
}
]
}
]
},
{
"type": "folder",
"name": "dir2",
"path": "/dir2",
"children": [
{
"type": "folder",
"name": "photos",
"path": "/dir2/photos",
"children": [
{
"type": "file",
"name": "mydir2.pdf",
"path": "/dir2/photos/mydir2.pdf"
},
{
"type": "file",
"name": "yourdir2.pdf",
"path": "/dir2/photos/yourdir2.pdf"
}
]
}
]
}
]
This is what I have so far, but this does not return the correct output structure
def my_fx(paths):
for path in paths:
file_path=path
l=file_path.split('/')[1:]
def gen_json(l=l, d=dict()):
tmp = {}
if not d:
d["name"] = l.pop(-1)
tmp["children"]=d
tmp["name"]=l.pop(-1)
return gen_json(l,tmp) if l else tmp
print(json.dumps(gen_json(l), ensure_ascii=False))
My Input
list_of_paths = [
"dir1/photos/mydir1.pdf",
"dir1/photos/yourdir1.pdf",
"dir2/photos/mydir2.pdf",
"dir2/photos/yourdir2.pdf"
]
My Output
{"children": {"name": "mydir1.pdf"}, "name": "photos"}
{"children": {"name": "yourdir1.pdf"}, "name": "photos"}
{"children": {"name": "mydir2.pdf"}, "name": "photos"}
{"children": {"name": "yourdir2.pdf"}, "name": "photos"}
Thanks in advance
I have a JSON file and that is a nested JSON. I would like to remove duplicates based on two keys.
JSON example:
"books": [
{
"id": "1",
"story": {
"title": "Lonely lion"
},
"description": [
{
"release": false,
"author": [
{
"name": "John",
"main": 1
},
{
"name": "Jeroge",
"main": 0
},
{
"name": "Peter",
"main": 0
}
]
}
]
},
{
"id": "2",
"story": {
"title": "Lonely lion"
},
"description": [
{
"release": false,
"author": [
{
"name": "Jeroge",
"main": 1
},
{
"name": "Peter",
"main": 0
},
{
"name": "John",
"main": 0
}
]
}
]
},
{
"id": "3",
"story": {
"title": "Lonely lion"
},
"description": [
{
"release": false,
"author": [
{
"name": "John",
"main": 1
},
{
"name": "Jeroge",
"main": 0
}
]
}
]
}
]
Here I try to match the title and author name. For example, for id 1 and id 2 are duplicates( as the title is same and author names are also same(the author sequence doesn't matter and no need to consider the main attributes). So, in the output JSON only id:1 or id:2 will remain with id:3. In the final output I need two file.
Output_JSON:
"books": [
{
"id": "1",
"story": {
"title": "Lonely lion"
},
"description": [
{
"release": false,
"author": [
{
"name": "John",
"main": 1
},
{
"name": "Jeroge",
"main": 0
},
{
"name": "Peter",
"main": 0
}
]
}
]
},
{
"id": "3",
"story": {
"title": "Lonely lion"
},
"description": [
{
"release": false,
"author": [
{
"name": "John",
"main": 1
},
{
"name": "Jeroge",
"main": 0
}
]
}
]
}
]
duplicatedID.csv:
1-2
The following method I tried but it is not giving correct results:
list= []
duplicate_Id = []
for data in (json_data['books'])[:]:
elements= []
id = data['id']
title = data['story']['title']
elements.append(title)
for i in (data['description'][0]['author']):
name = (i['name'])
elements.append(name)
if not list:
list.append(elements)
else:
for j in list:
if set(elements) == set(j):
duplicate_Id.append(id)
elements = []
The general idea is to:
Get the groups identified by some function that collects duplicates.
Then return the first entry of each group, ensuring no duplicates.
Define the key function as the sorted list of authors and. As the list of authors is by definition the unique key, but may appear in any order.
import json
from itertools import groupby
j = json.load(books)
def transform(books):
groups = [list(group) for _, group in groupby(books, key=getAuthors)]
return [group[0] for group in groups]
def getAuthors(book):
authors = book['description'][0]['author']
return sorted([author['name'] for author in authors])
print(transform(j['books']))
If we wanted to get the duplicates, then we do the same computation, but return any sublist with length > 1 as this is by our definition duplicated data.
def transform(books):
groups = [list(group) for _, group in groupby(books, key=getAuthors)]
return [group for group in groups if len(group) > 1]
Where j['books'] is the JSON you gave enclosed in an object.
I need a help with improving my code.
I've got a nested dict with many levels:
{
"11": {
"FacLC": {
"immty": [
"in_mm",
"in_mm"
],
"moood": [
"in_oo",
"in_oo"
]
}
},
"22": {
"FacLC": {
"immty": [
"in_mm",
"in_mm",
"in_mm"
]
}
}
}
And I want to add additional fields on every level, so my output looks like this:
[
{
"id": "",
"name": "11",
"general": [
{
"id": "",
"name": "FacLC",
"specifics": [
{
"id": "",
"name": "immty",
"characteristics": [
{
"id": "",
"name": "in_mm"
},
{
"id": "",
"name": "in_mm"
}
]
},
{
"id": "",
"name": "moood",
"characteristics": [
{
"id": "",
"name": "in_oo"
},
{
"id": "",
"name": "in_oo"
}
]
}
]
}
]
},
{
"id": "",
"name": "22",
"general": [
{
"id": "",
"name": "FacLC",
"specifics": [
{
"id": "",
"name": "immty",
"characteristics": [
{
"id": "",
"name": "in_mm"
},
{
"id": "",
"name": "in_mm"
},
{
"id": "",
"name": "in_mm"
}
]
}
]
}
]
}
]
I managed to write a 4-times nested for loop, what I find inefficient and inelegant:
for main_name, general in my_dict.items():
generals = []
for general_name, specific in general.items():
specifics = []
for specific_name, characteristics in specific.items():
characteristics_dicts = []
for characteristic in characteristics:
characteristics_dicts.append({
"id": "",
"name": characteristic,
})
specifics.append({
"id": "",
"name": specific_name,
"characteristics": characteristics_dicts,
})
generals.append({
"id": "",
"name": general_name,
"specifics": specifics,
})
my_new_dict.append({
"id": "",
"name": main_name,
"general": generals,
})
I am wondering if there is more compact and efficient solution.
In the past I created a function to do it. Basically you call this function everytime that you need to add new fields to a nested dict, independently on how many levels this nested dict have. You only have to inform the 'full path' , that I called the 'key_map'.
Like ['node1','node1a','node1apart3']
def insert_value_using_map(_nodes_list_to_be_appended, _keys_map, _value_to_be_inserted):
for _key in _keys_map[:-1]:
_nodes_list_to_be_appended = _nodes_list_to_be_appended.setdefault(_key, {})
_nodes_list_to_be_appended[_keys_map[-1]] = _value_to_be_inserted
I'm trying to export a dataFrame into a nested JSON (hierarchical) for D3.js using solution which is only for one level ( parent , children)
Any help would be appreciated. I'm new to python
My DataFrame contains 7 levels
Here is the expected solution
JSON Example:
{
"name": "World",
"children": [
{
"name": "Europe",
"children": [
{
"name": "France",
"children": [
{
"name": "Paris",
"population": 1000000
}]
}]
}]
}
and here is the python method:
def to_flare_json(df, filename):
"""Convert dataframe into nested JSON as in flare files used for D3.js"""
flare = dict()
d = {"name":"World", "children": []}
for index, row in df.iterrows():
parent = row[0]
child = row[1]
child1 = row[2]
child2 = row[3]
child3 = row[4]
child4 = row[5]
child5 = row[6]
child_value = row[7]
# Make a list of keys
key_list = []
for item in d['children']:
key_list.append(item['name'])
#if 'parent' is NOT a key in flare.JSON, append it
if not parent in key_list:
d['children'].append({"name": parent, "children":[{"value": child_value, "name1": child}]})
# if parent IS a key in flare.json, add a new child to it
else:
d['children'][key_list.index(parent)]['children'].append({"value": child_value, "name11": child})
flare = d
# export the final result to a json file
with open(filename +'.json', 'w') as outfile:
json.dump(flare, outfile, indent=4,ensure_ascii=False)
return ("Done")
[EDIT]
Here is a sample of my df
World Continent Region Country State City Boroughs Population
1 Europe Western Europe France Ile de France Paris 17 821964
1 Europe Western Europe France Ile de France Paris 19 821964
1 Europe Western Europe France Ile de France Paris 20 821964
The structure you want is clearly recursive so I made a recursive function to fill it:
def create_entries(df):
entries = []
# Stopping case
if df.shape[1] == 2: # only 2 columns left
for i in range(df.shape[0]): # iterating on rows
entries.append(
{"Name": df.iloc[i, 0],
df.columns[-1]: df.iloc[i, 1]}
)
# Iterating case
else:
values = set(df.iloc[:, 0]) # Getting the set of unique values
for v in values:
entries.append(
{"Name": v,
# reiterating the process but without the first column
# and only the rows with the current value
"Children": create_entries(
df.loc[df.iloc[:, 0] == v].iloc[:, 1:]
)}
)
return entries
All that's left is to create the dictionary and call the function:
mydict = {"Name": "World",
"Children": create_entries(data.iloc[:, 1:])}
Then you just write your dict to a JSON file.
I hope my comments are explicit enough, the idea is to recursively use the first column of the dataset as the "Name" and the rest as the "Children".
Thank you Syncrossus for the answer, but this result in different branches for each boroughs or city
The result is this:
"Name": "World",
"Children": [
{
"Name": "Western Europe",
"Children": [
{
"Name": "France",
"Children": [
{
"Name": "Ile de France",
"Children": [
{
"Name": "Paris",
"Children": [
{
"Name": "17ème",
"Population": 821964
}
]
}
]
}
]
}
]
},{
"Name": "Western Europe",
"Children": [
{
"Name": "France",
"Children": [
{
"Name": "Ile de France",
"Children": [
{
"Name": "Paris",
"Children": [
{
"Name": "10ème",
"Population": 154623
}
]
}
]
}
]
}
]
}
But the desired result is this
"Name": "World",
"Children": [
{
"Continent": "Europe",
"Children": [
{
"Region": "Western Europe",
"Children": [
{
"Country": "France",
"Children": [
{
"State": "Ile De France",
"Children": [
{
"City": "Paris",
"Children": [
{
"Boroughs": "17ème",
"Population": 82194
},
{
"Boroughs": "16ème",
"Population": 99194
}
]
},
{
"City": "Saint-Denis",
"Children": [
{
"Boroughs": "10ème",
"Population": 1294
},
{
"Boroughs": "11ème",
"Population": 45367
}
]
}
]
}
]
},
{
"Country": "Belgium",
"Children": [
{
"State": "Oost-Vlaanderen",
"Children": [
{
"City": "Gent",
"Children": [
{
"Boroughs": "2ème",
"Population": 1234
},
{
"Boroughs": "4ème",
"Population": 7456
}
]
}
]
}
]
}
]
}
]
}
]
I have 2 dictionaries in python (d1, d2) where I need to pass the missing "id" item from d2 to d1, ignoring any other differences (such as the extra "child" in d1). What effectively is needed, is that a result dictionary is just d1 with "id" items added. I have tried merging, but it did not work since either way I lose data.
d1 = {
"parent": {
"name": "Axl",
"surname": "Doe",
"children": [
{
"name": "John",
"surname": "Doe"
},
{
"name": "Jane",
"surname": "Doe",
"children": [
{
"name": "Jim",
"surname": "Doe"
},
{
"name": "Kim",
"surname": "Doe"
}
]
}
]
}
}
d2 = {
"parent": {
"id": 1,
"name": "Axl",
"surname": "Doe",
"children": [
{
"id": 2,
"name": "John",
"surname": "Doe"
},
{
"id": 3,
"name": "Jane",
"surname": "Doe",
"children": [
{
"id": 4,
"name": "Jim",
"surname": "Doe"
},
{
"id": 5,
"name": "Kim",
"surname": "Doe"
},
{
"id": 6
"name": "Bill",
"surname": "Doe"
},
]
}
]
}
}
result = {
"parent": {
"id": 1,
"name": "Axl",
"surname": "Doe",
"children": [
{
"id": 2,
"name": "John",
"surname": "Doe"
},
{
"id": 3,
"name": "Jane",
"surname": "Doe",
"children": [
{
"id": 4,
"name": "Jim",
"surname": "Doe"
},
{
"id": 5,
"name": "Kim",
"surname": "Doe"
}
]
}
]
}
}
Any ideas?
I match children according to a key function, in this case "name" and "surname" attributes.
I then go over the id_lookup dict (named d2 in your example) and try to match each child with main_dict's children. If I find a match, I recurse into it.
In the end, main_dict (or d1 in your example) is filled with IDs :-)
import operator
root = main_dict["parent"]
lookup_root = id_lookup_dict["parent"]
keyfunc = operator.itemgetter("name", "surname")
def _recursive_fill_id(root, lookup_root, keyfunc):
"""Recursively fill root node with IDs
Matches nodes according to keyfunc
"""
root["id"] = lookup_root["id"]
# Fetch children
root_children = root.get("children")
# There are no children
if root_children is None:
return
children_left = len(root_children)
# Create a dict mapping the key identifying a child to the child
# This avoids a hefty lookup cost and requires a single iteration.
children_dict = dict(zip(map(keyfunc, root_children), root_children))
for lookup_child in lookup_root["children"]:
lookup_key = keyfunc(lookup_child)
matching_child = children_dict.get(lookup_key)
if matching_child is not None:
_recursive_fill_id(matching_child, lookup_child, keyfunc)
# Short circuit in case all children were filled
children_left -= 1
if not children_left:
break
_recursive_fill_id(root, lookup_root, keyfunc)
I wished to add an iterative answer instead of the recursive answer, as it'll probably prove to be more efficient.
It will not reach any stack threshold and will be a bit faster:
import operator
root = main_dict["parent"]
lookup_root = id_lookup_dict["parent"]
keyfunc = operator.itemgetter("name", "surname")
def _recursive_fill_id(root, lookup_root, keyfunc):
"""Recursively fill root node with IDs
Matches nodes according to keyfunc
"""
matching_nodes = [(root, lookup_root)]
while matching_nodes:
root, lookup_root = matching_nodes.pop()
root["id"] = lookup_root["id"]
# Fetch children
root_children = root.get("children")
# There are no children
if root_children is None:
continue
children_left = len(root_children)
# Create a dict mapping the key identifying a child to the child
# This avoids a hefty lookup cost and requires a single iteration.
children_dict = dict(zip(map(keyfunc, root_children), root_children))
for lookup_child in lookup_root["children"]:
lookup_key = keyfunc(lookup_child)
matching_child = children_dict.get(lookup_key)
if matching_child is not None:
matching_nodes.append((matching_child, lookup_child))
# Short circuit in case all children were filled
children_left -= 1
if not children_left:
break
_recursive_fill_id(root, lookup_root, keyfunc)