I've a problem I have been struggling on for some time now. What I need to do is to check things for a large amount of data inside many folders. To keep track of what has been done I wanted to create a yaml file containing the tree organization of my data structure. Thus, the objective is to create nested dictionaries of the folders containing data.
The script I made is working, but it duplicates each folder and I don't know how to call recursively the function to avoid this. Here is the code :
def load_tree_structure_as_dictionnary(current_dict):
for dir_name in current_dict.keys():
lst_sub_dir = [f.path for f in os.scandir(dir_name) if f.is_dir()]
if lst_sub_dir == []:
current_dict[dir_name]['correct_calibration'] = None
else:
for sub_dir in lst_sub_dir:
current_dict[dir_name][sub_dir] = load_tree_structure_as_dictionnary( {sub_dir: {}} )
return current_dict
init_dict = {data_path : {} }
full_dict = load_tree_structure_as_dictionnary(init_dict)
I know the error is in the recursive call, but I can't create a new 'sub_dir' key if there isnt a dictionnary initialized ( hence the {sub_dir : {}} )
Also I am new to writing stackoverflow questions, lmk if something needs to be improved in the syntax.
After changing current_dict[dir_name][sub_dir] = load_tree_structure_as_dictionnary( {sub_dir: {}} ) to current_dict[dir_name].update(load_tree_structure_as_dictionnary( {sub_dir: {}} )) your code will not duplicate the sub_dir.
def load_tree_structure_as_dictionnary(current_dict):
for dir_name in current_dict.keys():
lst_sub_dir = [f.path for f in os.scandir(dir_name) if f.is_dir()]
if lst_sub_dir == []:
current_dict[dir_name]['correct_calibration'] = None
else:
for sub_dir in lst_sub_dir:
current_dict[dir_name].update(load_tree_structure_as_dictionnary( {sub_dir: {}} ))
return current_dict
init_dict = {"venv" : {} }
full_dict = load_tree_structure_as_dictionnary(init_dict)
Related
I am messing around with a script in Flask I have this portion here
def get_interfaces_list2(device):
output_interfaces = device.send_command('show interfaces switchport')
current_dir = os.getcwd()
template_file = open(current_dir + "/scripts/textfsm/show_interface_switchport.template", "r")
template = TextFSM(template_file)
parsed_interfaces = template.ParseText(output_interfaces)
interface_list = []
for interface_data in parsed_interfaces:
resultDict = {}
resultDict["interface"] = interface_data[0]
resultDict["admin_mode"] = interface_data[5]
resultDict["access_vlan"] = interface_data[6]
resultDict["voice_vlan"] = interface_data[8]
resultDict["trunking_vlans"] = interface_data[9]
interface_list.append(resultDict)
Return interface_list
I would like to add another command to add more info from the switch
output_interfaces1 = device.send_command('show interfaces description')
current_dir = os.getcwd()
template_file = open(current_dir + "/scripts/textfsm/show_interface_description.template", "r")
template = TextFSM(template_file)
parsed_interfaces1 = template.ParseText(output_interfaces1)
interface_list1 = []
for interface_data1 in parsed_interfaces1:
resultDict["descrip"] = interface_data1
interface_list.append(interface_list1)
return interface_list
I would like to combine this into a single list and return that info in an HTML
If I understood correctly, you are currently saving information about an interface in a dictionary and storing that dict in a list. You then want to add more information about the interface. I think there are two approaches you can take here:
Run a single for loop on both parsed_interfaces and parsed_interfaces1 and store all of the info in one shot.
Store the info from your first loop in another dictionary instead of a list where the key is the interface name. Then in the second loop use that key to access the nested dict and store the new info.
I'm making a program in Python where I need to interact with "hypothetical" paths, (aka paths that don't and won't exist on the actual filesystem) and I need to be able to listdir them like normal (path['directory'] would return every item inside the directory like os.listdir()).
The solution I came up with was to convert a list of string paths to a dictionary of dictionaries. I came up with this recursive function (it's inside a class):
def DoMagic(self,paths):
structure = {}
if not type(paths) == list:
raise ValueError('Expected list Value, not '+str(type(paths)))
for i in paths:
print(i)
if i[0] == '/': #Sanity check
print('trailing?',i) #Inform user that there *might* be an issue with the input.
i[0] = ''
i = i.split('/') #Split it, so that we can test against different parts.
if len(i[1:]) > 1: #Hang-a-bout, there's more content!
structure = {**structure, **self.DoMagic(['/'.join(i[1:])])}
else:
structure[i[1]] = i[1]
But when I go to run it with ['foo/e.txt','foo/bar/a.txt','foo/bar/b.cfg','foo/bar/c/d.txt'] as input, I get:
{'e.txt': 'e.txt', 'a.txt': 'a.txt', 'b.cfg': 'b.cfg', 'd.txt': 'd.txt'}
I want to be able to just path['foo']['bar'] to get everything in the foo/bar/ directory.
Edit:
A more desirable output would be:
{'foo':{'e.txt':'e.txt','bar':{'a.txt':'a.txt','c':{'d.txt':'d.txt'}}}}
Edit 10-14-22 My first answer matches what the OP asks but isn't really the ideal approach nor the cleanest output. Since this question appears to be used more often, see a cleaner approach below that is more resilient to Unix/Windows paths and the output dictionary makes more sense.
from pathlib import Path
import json
def get_path_dict(paths: list[str | Path]) -> dict:
"""Builds a tree like structure out of a list of paths"""
def _recurse(dic: dict, chain: tuple[str, ...] | list[str]):
if len(chain) == 0:
return
if len(chain) == 1:
dic[chain[0]] = None
return
key, *new_chain = chain
if key not in dic:
dic[key] = {}
_recurse(dic[key], new_chain)
return
new_path_dict = {}
for path in paths:
_recurse(new_path_dict, Path(path).parts)
return new_path_dict
l1 = ['foo/e.txt', 'foo/bar/a.txt', 'foo/bar/b.cfg', Path('foo/bar/c/d.txt'), 'test.txt']
result = get_path_dict(l1)
print(json.dumps(result, indent=2))
Output:
{
"foo": {
"e.txt": null,
"bar": {
"a.txt": null,
"b.cfg": null,
"c": {
"d.txt": null
}
}
},
"test.txt": null
}
Older approach
How about this. It gets your desired output, however a tree structure may be cleaner.
from collections import defaultdict
import json
def nested_dict():
"""
Creates a default dictionary where each value is an other default dictionary.
"""
return defaultdict(nested_dict)
def default_to_regular(d):
"""
Converts defaultdicts of defaultdicts to dict of dicts.
"""
if isinstance(d, defaultdict):
d = {k: default_to_regular(v) for k, v in d.items()}
return d
def get_path_dict(paths):
new_path_dict = nested_dict()
for path in paths:
parts = path.split('/')
if parts:
marcher = new_path_dict
for key in parts[:-1]:
marcher = marcher[key]
marcher[parts[-1]] = parts[-1]
return default_to_regular(new_path_dict)
l1 = ['foo/e.txt','foo/bar/a.txt','foo/bar/b.cfg','foo/bar/c/d.txt', 'test.txt']
result = get_path_dict(l1)
print(json.dumps(result, indent=2))
Output:
{
"foo": {
"e.txt": "e.txt",
"bar": {
"a.txt": "a.txt",
"b.cfg": "b.cfg",
"c": {
"d.txt": "d.txt"
}
}
},
"test.txt": "test.txt"
}
Wouldn't simple tree, implemented via dictionaries, suffice?
Your implementation seems a bit redundant. It's hard to tell easily to which folder a file belongs.
https://en.wikipedia.org/wiki/Tree_(data_structure)
There's a lot of libs on pypi, if you need something extra.
treelib
There're also Pure paths in pathlib.
My code is as follows:
import json
def reformat(importscompanies):
#print importscompanies
container={}
child=[]
item_dict={}
for name, imports in importscompanies.iteritems():
item_dict['name'] = imports
item_dict['size'] = '500'
child.append(dict(item_dict))
container['name'] = name
container['children'] = child
if __name__ == '__main__':
raw_data = json.load(open('data/bricsinvestorsfirst.json'))
run(raw_data)
def run(raw_data):
raw_data2 = raw_data[0]
the_output = reformat(raw_data2)
My issue is, the code isn't going through the whole file. It's only outputting one entry. Why is this? Am I rewriting something and do I need another dict that appends with every loop?
Also, it seems as though the for loop is going through the iteritems for each dict key. Is there a way to make it pass only once?
The issue is indeed
raw_data2 = raw_data[0]
I ended up creating an iterator to access the dict values.
Thanks.
Lastly, I'm hoping my final Json file looks this way, using the data I provided above:
{'name': u'name', 'children': [{'name': u'500 Startups', 'size': '500'}, {'name': u'AffinityChina', 'size': '500'}]}
Try this. Though your sample input and output data don't really give many clues as to where the "name" fields should come from. I've assumed you wanted the name of the original item in your list.
original_json = json.load(open('data/bricsinvestorsfirst.json'),'r')
response_json = {}
response_json["name"] = "analytics"
# where your children list will go
children = []
size = 500 # or whatever else you want
# For each item in your original list
for item in original_json:
children.append({"name" : item["name"],
"size" : size})
response_json["children"] = children
print json.dumps(response_json,indent=2)
"It's only outputting one entry" because you only select the first dictionary in the JSON file when you say raw_data2 = raw_data[0]
Try something like this as a starting point (I haven't tested/ran it):
import json
def run():
with open('data/bricsinvestorsfirst.json') as input_file:
raw_data = json.load(input_file)
children = []
for item in raw_data:
children.append({
'name': item['name'],
'size': '500'
})
container = {}
container['name'] = 'name'
container['children'] = children
return json.dumps(container)
if __name__ == '__main__':
print run()
I want to find the shortest path from goal to root working backwards
My input for root is {'4345092': ['6570646', '40586', '484']}
My input for goal is {'886619': ['GOAL']}
My input for path_holder is an input but it gets converted to dct and is used for this function. I am getting stuck regarding the while loop as it creates the path for me backwards. Right now I can't get q to print because that part of the code isn't being ran. dct is basically a directed graph representation that contains cycles. I can't seem to figure out how to start from GOAL and end up at the root node. I was wondering if someone could help me figure this out thanks!
dct:
dct =
{ '612803266': ['12408765', '46589', '5880', '31848'],
'8140983': ['7922972', '56008'],
'7496838': ['612803266'],
'1558536111': ['7496838'],
'31848': ['DEADEND'],
'1910530': ['8140983'],
'242010': ['58644', '886619'],
'727315568': ['DEADEND'],
'12408765': ['DEADEND'],
'56008': ['DEADEND'],
'58644': ['DEADEND'],
'886619': ['GOAL'],
'40586': ['931', '727315568', '242010', '1910530'],
'5880': ['1558536111'],
'46589': ['DEADEND'],
'6570646': ['2549003','43045', '13830'],
'931': ['299159122'],
'484': ['1311310', '612803266'],
'1311310': ['DEADEND'],
'7922972': ['DEADEND']
}
my function:
def backtrace(path_holder, root, goal):
dct = {}
for d in path_holder:
dct.update(d)
rootnode = root.keys()[0]
goal = goal.keys()[0]
path = []
path.append(goal)
q = 0
while goal != rootnode:
# find key that contains goal in list
for i in dct: #iterate keys
if i in dct: # prevent repeat of path
continue
for j in dct[i]: #iterate though children
if j == goal:
path.append(i)
goal = i # look for new goal
q += 1
print q
#print goal
# append key that has goal in the list
# set goal to be the key that was appended
# repeat
return path
Just find the paths and then invert them.
UPDATED: Added "[]" to "DEADEND" and "GOAL" in end conditions.
import copy as cp
DCT = {...} # You already know what goes here.
FOUND_PATHS = [] # In case of more than one path to GOAL.
FOUND_REVERSE_PATHS = []
COUNTER = len(DCT)
def back_track(root, target_path = [], counter=COUNTER):
"""
#param root: DCT key.
#type root: str.
#param target_path: Reference to the path we are constructing.
#type target_path: list.
"""
global FOUND_PATHS
# Avoiding cycles.
if counter == 0:
return
# Some nodes aren't full generated.
try:
DCT[root]
except KeyError:
return
# End condition.
if DCT[root] == ['DEADEND']:
return
# Path found.
if DCT[root] == ['GOAL']:
FOUND_PATHS.append(target_path) # The normal path.
reverse_path = cp.copy(target_path)
reverse_path.reverse()
FOUND_REVERSE_PATHS.append(reverse_path) # The path you want.
return
for node in DCT[root]:
# Makes copy of target parh and add the node.
path_copy = cp.copy(target_path)
path_copy.append(node)
# Call back_track with current node and the copy
# of target_path.
back_track(node, path_copy, counter=(counter - 1))
if __name__ == '__main__':
back_track('4345092')
print(FOUND_PATHS)
print(FOUND_REVERSE_PATHS)
I got two functions that return a list of dictionary and i'm trying to get json to encode it, it works when i try doing it with my first function, but now i'm appending second function with a syntax error of ": expected". I will eventually be appending total of 7 functions that each output a list of dict. Is there a better way of accomplishing this?
import dmidecode
import simplejson as json
def get_bios_specs():
BIOSdict = {}
BIOSlist = []
for v in dmidecode.bios().values():
if type(v) == dict and v['dmi_type'] == 0:
BIOSdict["Name"] = str((v['data']['Vendor']))
BIOSdict["Description"] = str((v['data']['Vendor']))
BIOSdict["BuildNumber"] = str((v['data']['Version']))
BIOSdict["SoftwareElementID"] = str((v['data']['BIOS Revision']))
BIOSdict["primaryBIOS"] = "True"
BIOSlist.append(BIOSdict)
return BIOSlist
def get_board_specs():
MOBOdict = {}
MOBOlist = []
for v in dmidecode.baseboard().values():
if type(v) == dict and v['dmi_type'] == 2:
MOBOdict["Manufacturer"] = str(v['data']['Manufacturer'])
MOBOdict["Model"] = str(v['data']['Product Name'])
MOBOlist.append(MOBOdict)
return MOBOlist
def get_json_dumps():
jsonOBJ = json
#Syntax error is here, i can't use comma to continue adding more, nor + to append.
return jsonOBJ.dumps({'HardwareSpec':{'BIOS': get_bios_specs()},{'Motherboard': get_board_specs()}})
Use multiple items within your nested dictionary.
jsonOBJ.dumps({
'HardwareSpec': {
'BIOS': get_bios_specs(),
'Motherboard': get_board_specs()
}
})
And if you want multiple BIOS items or Motherboard items, just use a list.
...
'HardwareSpec': {
'BIOS': [
get_bios_specs(),
get_uefi_specs()
]
...
}
If you want a more convenient lookup of specs, you can just embed a dict:
jsonOBJ.dumps({'HardwareSpec':{'BIOS': get_bios_specs(),
'Motherboard': get_board_specs()
}
})