Identify root folder in a file system in Python

Identify root folder in a file system in Python - python

I have a recursive method that traverses a file system structure and creates a dictionary from it.
This is the code:
def path_to_dict(path):
d = {'name': os.path.basename(path)}
if os.path.isdir(path):
d['type'] = "directory"
d['path'] = os.path.relpath(path).strip('..\\').replace('\\','/')
d['children'] = [path_to_dict(os.path.join(path, x)) for x in os.listdir\
(path)]
else:
d['type'] = "file"
d['path'] = os.path.relpath(path).strip('..\\').replace('\\','/')
with open(path, 'r', encoding="utf-8", errors='ignore') as myfile:
content = myfile.read().splitlines()
d['content'] = content
At the moment, it checks if it is a folder then puts the keys name, type, path and children where children is an array which can contain further folders or files. If it is a file it has the keys name, type, path and content.
After converting it to JSON, the final structure is like this.
{
"name": "nw",
"type": "directory",
"path": "Parsing/nw",
"children": [{
"name": "New folder",
"type": "directory",
"path": "Parsing/nw/New folder",
"children": [{
"name": "abc",
"type": "directory",
"path": "Parsing/nw/New folder/abc",
"children": [{
"name": "text2.txt",
"type": "file",
"path": "Parsing/nw/New folder/abc/text2.txt",
"content": ["abc", "def", "dfg"]
}]
}, {
"name": "text2.txt",
"type": "file",
"path": "Parsing/nw/New folder/text2.txt",
"content": ["abc", "def", "dfg"]
}]
}, {
"name": "text1.txt",
"type": "file",
"path": "Parsing/nw/text1.txt",
"content": ["aaa "]
}, {
"name": "text2.txt",
"type": "file",
"path": "Parsing/nw/text2.txt",
"content": []
}]
}
Now I want the script to always set the type in only the root folder to the value root. How can I do this?

I think you want something similar than the following implementation. The directories and files in root folder will contain the "type": "root" and the child elements won't contain this key-value pair.
def path_to_dict(path, child=False):
d = {'name': os.path.basename(path)}
if os.path.isdir(path):
if not child:
d['type'] = "root"
d['path'] = os.path.relpath(path).strip('..\\').replace('\\','/')
d['children'] = [path_to_dict(os.path.join(path, x), child=True) for x in os.listdir\
(path)]
else:
if not child:
d['type'] = "root"
d['path'] = os.path.relpath(path).strip('..\\').replace('\\','/')
with open(path, 'r', encoding="utf-8", errors='ignore') as myfile:
content = myfile.read().splitlines()
d['content'] = content

Related

Compare and append to json file in python

I have 2 files like these:
file1.json
[
{
"name": "john",
"version": "0.0"
},
{
"name": "peter",
"version": "1.0"
},
{
"name": "bob",
"version": "2.0",
"single": "true"
}
]
file2.json
[
{
"name": "jane",
"version": "0.0"
},
{
"name": "peter",
"version": "1.0"
},
{
"name": "bob",
"version": "2.0",
"single": "true"
}
]
I want to compare the "name" values in file1.json with the "name" values in file2.json. If file2.json has some "name" values not in file1.json, I want to append that json object to file1.json.
For the above 2 files, I want file1 to be appended with the "name": "jane" object since that is not present in file1. So file1 should be updated to:
[
{
"name": "john",
"version": "0.0"
},
{
"name": "peter",
"version": "1.0"
},
{
"name": "bob",
"version": "2.0",
"single": "true"
},
{
"name": "jane",
"version": "0.0"
}
]
What I have tried:
with open('file1.json', 'r') as file1, open('file2.json', 'r') as file2:
file1_json = json.load(file1)
file2_json = json.load(file2)
for object in file2_json:
if object['name'] not in file1_json.values():
file1_json.update(object)
with open('file1.json', 'w') as file1:
json.dump(file1_json, file1, indent=4)

Collect the names in file1_json, find the missing names in file2_json, then add them to file1_json:
import json
with open('file1.json', 'r') as file1, open('file2.json', 'r') as file2:
file1_json = json.load(file1)
file2_json = json.load(file2)
names = [o['name'] for o in file1_json]
missing = [o for o in file2_json if o['name'] not in names]
file1_json.extend(missing)
with open('file1.json', 'w') as file1:
json.dump(file1_json, file1, indent=2)
Output:
[
{
"name": "john",
"version": "0.0"
},
{
"name": "peter",
"version": "1.0"
},
{
"name": "bob",
"version": "2.0",
"single": "true"
},
{
"name": "jane",
"version": "0.0"
}
]

This should work if all dictionary values are immutable (which they are):
with open('file1.json', 'r') as file1, open('file2.json', 'r') as file2:
file1_json = json.load(file1)
file2_json = json.load(file2)
print([dict(s) for s in set(frozenset(d.items()) for d in file1_json+file2_json)])

You could create an index into json2 to speed lookups. Then take the set of names from each and subtract. The result is the missing names. Use the index to add the missing values.
with open('file1.json', 'r') as file1, open('file2.json', 'r') as file2:
file1_json = json.load(file1)
file2_json = json.load(file2)
idx2 = {d["name"]:d for d in file2_json}
missing = set(idx2) - set(d["name"] for d in file1_json)
file1_json.extend(idx2[name] for name in missing)

I believe what you want is to merge dictionaries essentially. The fact it comes from a json does not really matter here.
Take a look at this.
https://stackoverflow.com/a/62820532/4944708
Here's the full solution, assuming you have read the jsons in:
def to_dict(json_):
return {item['name']: item for item in json_}
list({**to_dict(json1), **to_dict(json2)}.values())

convert list of dir paths into a json object

I am trying to build a py script that converts a list of paths into a json object as below; the output of the script should be structured as below.
json_out is a list of dictionaries that have four elements (1)type (2)name (3)path and (4)children
json_out = [
{
"type": "folder",
"name": "dir1",
"path": "/dir1",
"children": [
{
"type": "folder",
"name": "photos",
"path": "/dir1/photos",
"children": [
{
"type": "file",
"name": "mydir1.pdf",
"path": "/dir1/photos/mydir1.pdf"
},
{
"type": "file",
"name": "yourdir1.pdf",
"path": "/dir1/photos/yourdir1.pdf"
}
]
}
]
},
{
"type": "folder",
"name": "dir2",
"path": "/dir2",
"children": [
{
"type": "folder",
"name": "photos",
"path": "/dir2/photos",
"children": [
{
"type": "file",
"name": "mydir2.pdf",
"path": "/dir2/photos/mydir2.pdf"
},
{
"type": "file",
"name": "yourdir2.pdf",
"path": "/dir2/photos/yourdir2.pdf"
}
]
}
]
}
]
This is what I have so far, but this does not return the correct output structure
def my_fx(paths):
for path in paths:
file_path=path
l=file_path.split('/')[1:]
def gen_json(l=l, d=dict()):
tmp = {}
if not d:
d["name"] = l.pop(-1)
tmp["children"]=d
tmp["name"]=l.pop(-1)
return gen_json(l,tmp) if l else tmp
print(json.dumps(gen_json(l), ensure_ascii=False))
My Input
list_of_paths = [
"dir1/photos/mydir1.pdf",
"dir1/photos/yourdir1.pdf",
"dir2/photos/mydir2.pdf",
"dir2/photos/yourdir2.pdf"
]
My Output
{"children": {"name": "mydir1.pdf"}, "name": "photos"}
{"children": {"name": "yourdir1.pdf"}, "name": "photos"}
{"children": {"name": "mydir2.pdf"}, "name": "photos"}
{"children": {"name": "yourdir2.pdf"}, "name": "photos"}
Thanks in advance

Search a Json directory using python

I have a Json file created that holds a specific directory. I am trying to write something that will allow the user to go in and out of each "folder" creating almost a command line "file explorer"
my json file is formatted as such:
{
"children": [
{
"children": [
{
"name": "somefile.cmd",
"path": "C:\\some\\directory\\somefile.cmd",
"type": "file"
},
{
"name": "otherfile.ps1",
"path": "C:\\some\\directory\\somefile.ps1",
"type": "file"
},
{
"name": "somefile.exe",
"path": "C:\\some\\directory\\somefile.exe",
"type": "file"
}
],
"name": "somefile",
"path": "C:\\some\\directory",
"type": "folder"
},
{
"children": [
.
.
.
My function I am using
def search_json(filename):
json_file = open(filename)
data = json.load(json_file)
subsyst_count = 1
subsyst_list = []
#list of subsystems
for i in data['children']:
print(subsyst_count, i['name'])
subsyst_list.append(i['name'])
subsyst_count = subsyst_count + 1
user = int(input('Which Subsystem?'))
#search json for children of subsyst_list[user]
print(subsyst_list[user])
for i in data['children']:
if i['name'] == subsyst_list[user]:
print(i['name'])
for j in i['name']:
print(j[0])
I expect it to go into the first children folder count all the folders under it and prompt user to select a number as to which subsystem to go into. I then wanted it to search that new "children" subsystem directory and again number which file to go into or file to select. instead it throws a keyerror when I have
print(j['name'])
and then the function just spells the name of the subsystem when I have:
print(j['0'])

Adding entries to Python dict in variable locations

I have a json file that looks something like this (I've left a lot out to keep it shorter so ignore missing brackets in it):
{
"id": "79cb20b0-02be-42c7-9b45-96407c888dc2",
"tenantId": "00000000-0000-0000-0000-000000000000",
"name": "2-stufiges Stirnradgetriebe",
"description": null,
"visibility": "None",
"method": "IDM_CALCULATE_GEAR_COUPLED",
"created": "2018-10-16T10:25:20.874Z",
"createdBy": "00000000-0000-0000-0000-000000000000",
"lastModified": "2018-10-16T10:25:28.226Z",
"lastModifiedBy": "00000000-0000-0000-0000-000000000000",
"client": "STRING_BEARINX_ONLINE",
"project": {
"id": "10c37dcc-0e4e-4c4d-a6d6-12cf65cceaf9",
"name": "proj 2",
"isBookmarked": false
},
"rootObject": {
"id": "6ff0010c-00fe-485b-b695-4ddd6aca4dcd",
"type": "IDO_GEAR",
"children": [
{
"id": "1dd94d1a-e52d-40b3-a82b-6db02a8fbbab",
"type": "IDO_SYSTEM_LOADCASE",
"children": [],
"childList": "SYSTEMLOADCASE",
"properties": [
{
"name": "IDCO_IDENTIFICATION",
"value": "1dd94d1a-e52d-40b3-a82b-6db02a8fbbab"
},
{
"name": "IDCO_DESIGNATION",
"value": "Lastfall 1"
},
{
"name": "IDSLC_TIME_PORTION",
"value": 100
},
{
"name": "IDSLC_DISTANCE_PORTION",
"value": 100
},
{
"name": "IDSLC_OPERATING_TIME_IN_HOURS",
"value": 1
},
{
"name": "IDSLC_OPERATING_TIME_IN_SECONDS",
"value": 3600
},
{
"name": "IDSLC_OPERATING_REVOLUTIONS",
"value": 1
},
{
"name": "IDSLC_OPERATING_DISTANCE",
"value": 1
},
{
"name": "IDSLC_ACCELERATION",
"value": 9.81
},
{
"name": "IDSLC_EPSILON_X",
"value": 0
},
{
"name": "IDSLC_EPSILON_Y",
"value": 0
},
{
"name": "IDSLC_EPSILON_Z",
"value": 0
},
{
"name": "IDSLC_CALCULATION_WITH_OWN_WEIGHT",
"value": "CO_CALCULATION_WITHOUT_OWN_WEIGHT"
},
{
"name": "IDSLC_CALCULATION_WITH_TEMPERATURE",
"value": "CO_CALCULATION_WITH_TEMPERATURE"
},
{
"name": "IDSLC_FLAG_FOR_LOADCASE_CALCULATION",
"value": "LB_CALCULATE_LOADCASE"
},
{
"name": "IDSLC_STATUS_OF_LOADCASE_CALCULATION",
"value": false
}
],
"position": 1,
"order": 1,
"support_vector": {
"x": 0,
"y": 0,
"z": 0
},
"u_axis_vector": {
"x": 1,
"y": 0,
"z": 0
},
"w_axis_vector": {
"x": 0,
"y": 0,
"z": 1
},
"role": "_none_"
},
{
"id": "ab7fbf37-17bb-4e60-a543-634571a0fd73",
"type": "IDO_SHAFT_SYSTEM",
"children": [
{
"id": "7f034e5c-24df-4145-bab8-601f49b43b50",
"type": "IDO_RADIAL_ROLLER_BEARING",
"children": [
{
"id": "0b3e695b-6028-43af-874d-4826ab60dd3f",
"type": "IDO_RADIAL_BEARING_INNER_RING",
"children": [
{
"id": "330aa09d-60fb-40d7-a190-64264b3d44b7",
"type": "IDO_LOADCONTAINER",
"children": [
{
"id": "03036040-fc1a-4e52-8a69-d658e18a8d4a",
"type": "IDO_DISPLACEMENT",
"children": [],
"childList": "DISPLACEMENT",
"properties": [
{
"name": "IDCO_IDENTIFICATION",
"value": "03036040-fc1a-4e52-8a69-d658e18a8d4a"
},
{
"name": "IDCO_DESIGNATION",
"value": "Displacement 1"
}
]
I want to add entries to it but the problem is the location I want to add it isn't uniform. The id key will change, for example sometimes I may want to add the entries to "id": "ab7fbf37-17bb-4e60-a543-634571a0fd73" and another I might want to add the entries to "id": "0b3e695b-6028-43af-874d-4826ab60dd3f".
The code I have runs through another file in a loop and every time it finds a id and a property name it stores the id, name, and value.
The code I'm currently using is:
import os
import json
import shutil
import re
import fileinput
#Finds and lists the folders that have been provided
d='.'
folders = list(filter (lambda x: os.path.isdir(os.path.join(d, x)), os.listdir(d)))
print("Folders found: ")
print(folders)
print("\n")
def processModelFolder(inFolder):
#Creating the file names
fileName = os.path.join(d, inFolder, inFolder + ".mdl")
fileNameTwo = os.path.join(d, inFolder, inFolder + ".vg2.json")
fileNameThree = os.path.join(d, inFolder, inFolder + "APPENDED.vg2.json")
#copying the json file so the new copy can be appended
shutil.copyfile(fileNameTwo, fileNameThree)
#assigning IDs and properties to search for in the mdl file
IDs = ["7f034e5c-24df-4145-bab8-601f49b43b50"]
Properties = ["IDSU_FX[0]","IDSU_FY[0]","IDSU_FZ[0]"]
#Basic check to see if IDs and Properties are valid
for i in IDs:
if len(i) != 36:
print("ID may not have been valid and might not return the results you expect, check to ensure the characters are correct: ")
print(i)
print("\n")
if len(IDs) == 0:
print("No IDs were given!")
elif len(Properties) == 0:
print("No Properties were given!")
#Reads code untill an ID is found
else:
with open(fileName , "r") as in_file:
IDCO = None
for n, line in enumerate(in_file, 1):
if line.startswith('IDCO_IDENTIFICATION'):
#Checks if the second part of each line is a ID tag in IDs
if line.split('"')[1] in IDs:
#If ID found it is stored as IDCO
IDCO = line.split('"')[1]
else:
if IDCO:
pass
IDCO = None
#Checks if the first part of each line is a Prop in Propterties
elif IDCO and line.split(' ')[0] in Properties:
print('Found! ID:{} Prop:{} Value: {}'.format(IDCO, line.split('=')[0][:-1], line.split('=')[1][:-1]))
print("\n")
#Stores the property name and value
name = str(line.split(' ')[0])
value = str(line.split(' ')[2])
print(name)
print(value)
#json file editing
with open(fileNameThree , "r+") as json_data:
python_obj = json.load(json_data)
new_element = [{"name": name, "value":value}]
python_obj['"id": "485f5bf4-fb97-415b-8b42-b46e9be080da"']
#foreach in new_elements:
#data['rootObject']['children'][0]['properties'].append(each)
print('Processed {} lines in file {}'.format(n , fileName))
for modelFolder in folders:
processModelFolder(modelFolder)
Is there any obvious way to dynamically change where it places the new entries? I'm having a hard time because the indent level it may be changes.
**Updated code:
import os
import json
import shutil
import re
import fileinput
#Finds and lists the folders that have been provided
d='.'
folders = list(filter (lambda x: os.path.isdir(os.path.join(d, x)), os.listdir(d)))
print("Folders found: ")
print(folders)
print("\n")
def processModelFolder(inFolder):
#Creating the file names
fileName = os.path.join(d, inFolder, inFolder + ".mdl")
fileNameTwo = os.path.join(d, inFolder, inFolder + ".vg2.json")
fileNameThree = os.path.join(d, inFolder, inFolder + "APPENDED.vg2.json")
#copying the json file so the new copy can be appended
shutil.copyfile(fileNameTwo, fileNameThree)
#assigning IDs and properties to search for in the mdl file
IDs = ["7f034e5c-24df-4145-bab8-601f49b43b50"]
Properties = ["IDSU_FX[0]","IDSU_FY[0]","IDSU_FZ[0]"]
#Basic check to see if IDs and Properties are valid
for i in IDs:
if len(i) != 36:
print("ID may not have been valid and might not return the results you expect, check to ensure the characters are correct: ")
print(i)
print("\n")
if len(IDs) == 0:
print("No IDs were given!")
elif len(Properties) == 0:
print("No Properties were given!")
#Reads code untill an ID is found
else:
with open(fileName , "r") as in_file:
IDCO = None
for n, line in enumerate(in_file, 1):
if line.startswith('IDCO_IDENTIFICATION'):
#Checks if the second part of each line is a ID tag in IDs
if line.split('"')[1] in IDs:
#If ID found it is stored as IDCO
IDCO = line.split('"')[1]
else:
if IDCO:
pass
IDCO = None
#Checks if the first part of each line is a Prop in Propterties
elif IDCO and line.split(' ')[0] in Properties:
print('Found! ID:{} Prop:{} Value: {}'.format(IDCO, line.split('=')[0][:-1], line.split('=')[1][:-1]))
print("\n")
#Stores the property name and value
name = str(line.split(' ')[0])
value = str(line.split(' ')[2])
key = os.path.join('"id": "'+IDCO+'"')
print(key)
print(name)
print(value)
#json file editing
with open(fileNameThree , "r+") as json_data:
python_obj = json.load(json_data)
new_element = [{"name": name, "value":value}]
print("NEW ELEMENT:")
print(new_element)
for each in new_element:
children = {x['id']: x for x in python_obj['rootObject']['children']}
children[IDs]['properties'].append(each)
python_obj['rootObject']['children'] = [x for _, x in children.items()]
with open(fileNameThree , "w") as json_data:
json.dump(python_obj, json_data, indent = 3)
print('Processed {} lines in file {}'.format(n , fileName))
for modelFolder in folders:
processModelFolder(modelFolder)

You can convert the 'children' list into an dict.
children = {x['id']: x for x in data['rootObject']['children']}
Or if order matters to you do:
od = OrderedDict()
for child in children:
od[child['id']] = child
Then do the insertion based on id as
children[<<id here>>]['properties'].append(each)
Then convert dict back to list
data['rootObject']['children'] = [x for _, x in children.items()]

Create a JSON type nested dictionary from a list in Python

I wish to create a JSON type nested dictionary from a list of lists. The lists contained a full directory path, but I broke them into their individual components as I thought it would make the creation of the nested dictionaries easier.
An example list:
["root", "dir1", "file.txt"]
The expected result:
{
"type": "directory",
"name": "root",
"children": [
{
"type": "directory",
"name": "dir1",
"children": [
{
"type": "file",
"name": "file.txt",
}
]
}
]
}
I've tried using a recursive method but couldn't quite get there (new to recursive methods and my head continually spun out). Also tried an iterative method from an idea I found here (stack overflow) which inverted the list and build the dict backwards, which I kind of got to work, but was unable to solve one of the solution requirements, which is that the code can deal with duplication in parts of the directory paths as it iterates over the list of lists.
For example following on from the last example, the next inputted list is this:-
["root", "dir1", "dir2", "file2.txt"]
and it need to build onto the JSON dictionary to produce this:-
{
"type": "directory",
"name": "root",
"children": [
{
"type": "directory",
"name": "dir1",
"children": [
{
"type": "file",
"name": "file.txt",
}
{
"type": "directory",
"name": "dir2",
"children": [
{
"type": "file",
"name": "file2.txt"
}
]
}
]
}
]
}
and so on with an unknown number of lists containing directory paths.
Thanks.

A recursive solution with itertools.groupby is as follows (assuming all paths are absolute paths). The idea is to group paths by the first element in the path list. This groups similar directory roots together, allowing us to call the function recursively on that group.
Also note that file names cannot be duplicated in a directory, so all files will be grouped as single element lists by groupby:
from itertools import groupby
from operator import itemgetter
def build_dict(paths):
if len(paths) == 1 and len(paths[0]) == 1:
return {"type": "file", "name": paths[0][0]}
dirname = paths[0][0]
d = {"type": "directory", "name": dirname, "children": []}
for k, g in groupby(sorted([p[1:] for p in paths], key=itemgetter(0)),
key=itemgetter(0)):
d["children"].append(build_dict(list(g)))
return d
paths = [["root", "dir1", "file.txt"], ["root", "dir1", "dir2", "file2.txt"]]
print(build_dict(paths))
Output
{
"type": "directory",
"name": "root",
"children": [
{
"type": "directory",
"name": "dir1",
"children": [
{
"type": "directory",
"name": "dir2",
"children": [
{
"type": "file",
"name": "file2.txt"
}
]
},
{
"type": "file",
"name": "file.txt"
}
]
}
]
}

Here's a naive recursive solution that simply walks through the tree structure, adding children as necessary, until the last element of path is reached (assumed to be a file).
import json
def path_to_json(path, root):
if path:
curr = path.pop(0)
if not root:
root["type"] = "file"
root["name"] = curr
if path:
root["children"] = [{}]
root["type"] = "directory"
path_to_json(path, root["children"][0])
elif path:
try:
i = [x["name"] for x in root["children"]].index(path[0])
path_to_json(path, root["children"][i])
except ValueError:
root["children"].append({})
path_to_json(path, root["children"][-1])
return root
if __name__ == "__main__":
paths = [["root", "dir1", "file.txt"],
["root", "dir1", "dir2", "file2.txt"]]
result = {}
print(json.dumps([path_to_json(x, result) for x in paths][0], indent=4))
Output:
{
"type": "directory",
"name": "root",
"children": [
{
"type": "directory",
"name": "dir1",
"children": [
{
"type": "file",
"name": "file.txt"
},
{
"type": "directory",
"name": "dir2",
"children": [
{
"type": "file",
"name": "file2.txt"
}
]
}
]
}
]
}
Try it!

Given not much detail has been provided, here is a solution that uses a reference to enter each nested dict
In [537]: structure = ["root", "dir1", "dir2", "file2.txt"]
In [538]: d = {}
# Create a reference to the current dict
In [541]: curr = d
In [542]: for i, s in enumerate(structure):
...: curr['name'] = s
...: if i != len(structure) - 1:
...: curr['type'] = 'directory'
...: curr['children'] = {}
...: curr = curr['children'] # New reference is the child dict
...: else:
...: curr['type'] = 'file'
...:
In [544]: from pprint import pprint
In [545]: pprint(d)
{'children': {'children': {'children': {'name': 'file2.txt', 'type': 'file'},
'name': 'dir2',
'type': 'directory'},
'name': 'dir1',
'type': 'directory'},
'name': 'root',
'type': 'directory'}
I don't know if this will work for all of your questions as the spec isn't very detailed

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Identify root folder in a file system in Python - python

Related

Compare and append to json file in python

convert list of dir paths into a json object

Search a Json directory using python

Adding entries to Python dict in variable locations

Create a JSON type nested dictionary from a list in Python

Categories

Resources