convert list of dir paths into a json object - python

I am trying to build a py script that converts a list of paths into a json object as below; the output of the script should be structured as below.
json_out is a list of dictionaries that have four elements (1)type (2)name (3)path and (4)children
json_out = [
{
"type": "folder",
"name": "dir1",
"path": "/dir1",
"children": [
{
"type": "folder",
"name": "photos",
"path": "/dir1/photos",
"children": [
{
"type": "file",
"name": "mydir1.pdf",
"path": "/dir1/photos/mydir1.pdf"
},
{
"type": "file",
"name": "yourdir1.pdf",
"path": "/dir1/photos/yourdir1.pdf"
}
]
}
]
},
{
"type": "folder",
"name": "dir2",
"path": "/dir2",
"children": [
{
"type": "folder",
"name": "photos",
"path": "/dir2/photos",
"children": [
{
"type": "file",
"name": "mydir2.pdf",
"path": "/dir2/photos/mydir2.pdf"
},
{
"type": "file",
"name": "yourdir2.pdf",
"path": "/dir2/photos/yourdir2.pdf"
}
]
}
]
}
]
This is what I have so far, but this does not return the correct output structure
def my_fx(paths):
for path in paths:
file_path=path
l=file_path.split('/')[1:]
def gen_json(l=l, d=dict()):
tmp = {}
if not d:
d["name"] = l.pop(-1)
tmp["children"]=d
tmp["name"]=l.pop(-1)
return gen_json(l,tmp) if l else tmp
print(json.dumps(gen_json(l), ensure_ascii=False))
My Input
list_of_paths = [
"dir1/photos/mydir1.pdf",
"dir1/photos/yourdir1.pdf",
"dir2/photos/mydir2.pdf",
"dir2/photos/yourdir2.pdf"
]
My Output
{"children": {"name": "mydir1.pdf"}, "name": "photos"}
{"children": {"name": "yourdir1.pdf"}, "name": "photos"}
{"children": {"name": "mydir2.pdf"}, "name": "photos"}
{"children": {"name": "yourdir2.pdf"}, "name": "photos"}
Thanks in advance

Related

Python Script to convert multiple json files in to single csv

{
"type": "Data",
"version": "1.0",
"box": {
"identifier": "abcdef",
"serial": "12345678"
},
"payload": {
"Type": "EL",
"Version": "1",
"Result": "Successful",
"Reference": null,
"Box": {
"Identifier": "abcdef",
"Serial": "12345678"
},
"Configuration": {
"EL": "1"
},
"vent": [
{
"ventType": "Arm",
"Timestamp": "2020-03-18T12:17:04+10:00",
"Parameters": [
{
"Name": "Arm",
"Value": "LT"
},
{
"Name": "Status",
"Value": "LD"
}
]
},
{
"ventType": "Arm",
"Timestamp": "2020-03-18T12:17:24+10:00",
"Parameters": [
{
"Name": "Arm",
"Value": "LT"
},
{
"Name": "Status",
"Value": "LD"
}
]
},
{
"EventType": "TimeUpdateCompleted",
"Timestamp": "2020-03-18T02:23:21.2979668Z",
"Parameters": [
{
"Name": "ActualAdjustment",
"Value": "PT0S"
},
{
"Name": "CorrectionOffset",
"Value": "PT0S"
},
{
"Name": "Latency",
"Value": "PT0.2423996S"
}
]
}
]
}
}
If you're looking to transfer information from a JSON file to a CSV, then you can use the following code to read in a JSON file into a dictionary in Python:
import json
with open('data.txt') as json_file:
data_dict = json.load(json_file)
You could then convert this dictionary into a list with either data_dict.items() or data_dict.values().
Then you just need to write this list to a CSV file which you can easily do by just looping through the list.

Fast way of adding fields to a nested dict

I need a help with improving my code.
I've got a nested dict with many levels:
{
"11": {
"FacLC": {
"immty": [
"in_mm",
"in_mm"
],
"moood": [
"in_oo",
"in_oo"
]
}
},
"22": {
"FacLC": {
"immty": [
"in_mm",
"in_mm",
"in_mm"
]
}
}
}
And I want to add additional fields on every level, so my output looks like this:
[
{
"id": "",
"name": "11",
"general": [
{
"id": "",
"name": "FacLC",
"specifics": [
{
"id": "",
"name": "immty",
"characteristics": [
{
"id": "",
"name": "in_mm"
},
{
"id": "",
"name": "in_mm"
}
]
},
{
"id": "",
"name": "moood",
"characteristics": [
{
"id": "",
"name": "in_oo"
},
{
"id": "",
"name": "in_oo"
}
]
}
]
}
]
},
{
"id": "",
"name": "22",
"general": [
{
"id": "",
"name": "FacLC",
"specifics": [
{
"id": "",
"name": "immty",
"characteristics": [
{
"id": "",
"name": "in_mm"
},
{
"id": "",
"name": "in_mm"
},
{
"id": "",
"name": "in_mm"
}
]
}
]
}
]
}
]
I managed to write a 4-times nested for loop, what I find inefficient and inelegant:
for main_name, general in my_dict.items():
generals = []
for general_name, specific in general.items():
specifics = []
for specific_name, characteristics in specific.items():
characteristics_dicts = []
for characteristic in characteristics:
characteristics_dicts.append({
"id": "",
"name": characteristic,
})
specifics.append({
"id": "",
"name": specific_name,
"characteristics": characteristics_dicts,
})
generals.append({
"id": "",
"name": general_name,
"specifics": specifics,
})
my_new_dict.append({
"id": "",
"name": main_name,
"general": generals,
})
I am wondering if there is more compact and efficient solution.
In the past I created a function to do it. Basically you call this function everytime that you need to add new fields to a nested dict, independently on how many levels this nested dict have. You only have to inform the 'full path' , that I called the 'key_map'.
Like ['node1','node1a','node1apart3']
def insert_value_using_map(_nodes_list_to_be_appended, _keys_map, _value_to_be_inserted):
for _key in _keys_map[:-1]:
_nodes_list_to_be_appended = _nodes_list_to_be_appended.setdefault(_key, {})
_nodes_list_to_be_appended[_keys_map[-1]] = _value_to_be_inserted

Flatten nested JSON arrays with inherits properties in Python

I have a big json/dictionary with different levels of nested json arrays, I would like to flatten it, and also capture the relationship of the structure,
Part of my json looks like:
{
"name": "root",
"type": "all",
"children": [
{
"name": "properties",
"type": "feature",
"children": [
{
"name": "print",
"type": "feature",
"children": [
{
"name": "graphic print",
"type": "feature",
"inherits": true
},
{
"name": "striped print",
"type": "feature",
"inherits": true,
"children": [
{
"name": "pinstriped",
"type": "feature",
"inherits": true
},
{
"name": "light stripe",
"type": "feature",
"inherits": true
},
{
"name": "wide stripe",
"type": "feature",
"inherits": true
}
]
}
]
}
]
},
{
"name": "colours",
"type": "colour",
"children": [
{
"name": "main colours",
"type": "colour",
"children": [
{
"name": "black",
"type": "colour",
"children": [
{
"name": "light black",
"type": "colour",
"inherits": true
},
{
"name": "blue black",
"type": "colour",
"inherits": true
}
]
},
{
"name": "red",
"type": "colour",
"children": [
{
"name": "bright red",
"type": "colour",
"inherits": true
},
{
"name": "light red",
"type": "colour"
}
]
}
]
}
]
},
{
"name": "genders",
"type": "gender",
"children": [
{
"name": "female",
"type": "gender"
},
{
"name": "male",
"type": "gender"
}
]
}
]
}
The depth of nests is not all the same. I
- want all the nodes (values of "name")
- also want all its parents if the node has "Inherit" key of True value.
Something like:
But if there are better ideas on how to store this data, will be happy to accept as well!
Many Thanks!
I think this should do your need
def parse_dict_of_dict(_dict, _parent = '', ret_dict={}):
_name, _children, _inherit = _dict["name"], _dict.get('children', None), _dict.get('inherits', False)
if _children is not None:
if isinstance(_children, list):
for _child in _children:
parse_dict_of_dict(_child, _name+ ', ' + _parent if _inherit else _name , ret_dict)
ret_dict[ _name] = _parent.strip(' ').strip(',') if _inherit else None
return ret_dict
Can you elaborate more on your output?
OR you can use this function to flatten a nested JSON to a simple JSON.
def parse_dict_of_dict(_dict, _str = ''):
ret_dict = {}
for k, v in _dict.iteritems():
if isinstance(v, dict):
ret_dict.update(parse_dict_of_dict(v, _str= _str+k+'_'))
elif isinstance(v, list):
for index, item in enumerate(v):
if isinstance(item, dict):
ret_dict.update(parse_dict_of_dict(item, _str=_str+k+'_%d_'%(index)))
else:
ret_dict.update({k+'_%d'%(index): item})
else:
try:
ret_dict[_str + k] = str(v)
except Exception as e:
ret_dict[_str + k] = unicode.encode(v, errors='ignore')
return ret_dict

folder structure to json with python(unicode)

This program creates a folder/file structure in json
#!/usr/bin/env python
import os
import errno
import json
import sys
def path_hierarchy(path):
hierarchy = {
'type': 'folder',
'name': os.path.basename(path),
'path': path,
}
try:
hierarchy['children'] = [
path_hierarchy(os.path.join(path, contents))
for contents in os.listdir(path)
]
except OSError as e:
if e.errno != errno.ENOTDIR:
raise
hierarchy['type'] = os.path.splitext(path)[1]
if hierarchy['type'] == "":
hierarchy['type'] = "Unknown"
return hierarchy
if __name__ == '__main__':
try:
directory = sys.argv[1]
except IndexError:
directory = os.getcwd()
fo = open("output.json", "w")
fo.write(json.dumps(path_hierarchy(directory), indent=2, sort_keys=False,))
fo.close()
It is actually working but a can't make it work with Greek and Japanesse characters. This is the output of the file.Is it an encoding issue or is it something with the json class in python. I use python3 so everything should be unicode but still...
{
"children": [
{
"children": [
{
"name": ".name",
"type": "Unknown",
"path": "I:\\Development\\Python\\JsonFileStruct\\.idea\\.name"
},
{
"name": "encodings.xml",
"type": ".xml",
"path": "I:\\Development\\Python\\JsonFileStruct\\.idea\\encodings.xml"
},
{
"name": "JsonFileStruct.iml",
"type": ".iml",
"path": "I:\\Development\\Python\\JsonFileStruct\\.idea\\JsonFileStruct.iml"
},
{
"name": "misc.xml",
"type": ".xml",
"path": "I:\\Development\\Python\\JsonFileStruct\\.idea\\misc.xml"
},
{
"name": "modules.xml",
"type": ".xml",
"path": "I:\\Development\\Python\\JsonFileStruct\\.idea\\modules.xml"
},
{
"name": "workspace.xml",
"type": ".xml",
"path": "I:\\Development\\Python\\JsonFileStruct\\.idea\\workspace.xml"
}
],
"name": ".idea",
"type": "folder",
"path": "I:\\Development\\Python\\JsonFileStruct\\.idea"
},
{
"name": "main.py",
"type": ".py",
"path": "I:\\Development\\Python\\JsonFileStruct\\main.py"
},
{
"name": "output.json",
"type": ".json",
"path": "I:\\Development\\Python\\JsonFileStruct\\output.json"
},
{
"name": "second.py",
"type": ".py",
"path": "I:\\Development\\Python\\JsonFileStruct\\second.py"
},
{
"children": [
{
"name": "\u03ba\u03b1\u03bb\u03cc\u03c0\u03b1\u03b9\u03b4\u03bf.json",
"type": ".json",
"path": "I:\\Development\\Python\\JsonFileStruct\\\u03ba\u03b1\u03ba\u03cc\u03c0\u03b1\u03b9\u03b4\u03bf\\\u03ba\u03b1\u03bb\u03cc\u03c0\u03b1\u03b9\u03b4\u03bf.json"
}
],
"name": "\u03ba\u03b1\u03ba\u03cc\u03c0\u03b1\u03b9\u03b4\u03bf",
"type": "folder",
"path": "I:\\Development\\Python\\JsonFileStruct\\\u03ba\u03b1\u03ba\u03cc\u03c0\u03b1\u03b9\u03b4\u03bf"
},
{
"name": "\u03ba\u03b1\u03bb\u03cc\u03c0\u03b1\u03b9\u03b4\u03bf.json",
"type": ".json",
"path": "I:\\Development\\Python\\JsonFileStruct\\\u03ba\u03b1\u03bb\u03cc\u03c0\u03b1\u03b9\u03b4\u03bf.json"
}
],
"name": "JsonFileStruct",
"type": "folder",
"path": "I:\\Development\\Python\\JsonFileStruct"
}

Json to CSV using python and blender 2.74

I have a project in which i have to convert a json file into a CSV file.
The Json sample :
{
"P_Portfolio Group": {
"depth": 1,
"dataType": "PortfolioOverview",
"levelId": "P_Portfolio Group",
"path": [
{
"label": "Portfolio Group",
"levelId": "P_Portfolio Group"
}
],
"label": "Portfolio Group",
"header": [
{
"id": "Label",
"label": "Security name",
"type": "text",
"contentType": "text"
},
{
"id": "SecurityValue",
"label": "MioCHF",
"type": "text",
"contentType": "number"
},
{
"id": "SecurityValuePct",
"label": "%",
"type": "text",
"contentType": "pct"
}
],
"data": [
{
"dataValues": [
{
"value": "Client1",
"type": "text"
},
{
"value": 2068.73,
"type": "number"
},
{
"value": 14.0584,
"type": "pct"
}
]
},
{
"dataValues": [
{
"value": "Client2",
"type": "text"
},
{
"value": 1511.9,
"type": "number"
},
{
"value": 10.2744,
"type": "pct"
}
]
},
{
"dataValues": [
{
"value": "Client3",
"type": "text"
},
{
"value": 1354.74,
"type": "number"
},
{
"value": 9.2064,
"type": "pct"
}
]
},
{
"dataValues": [
{
"value": "Client4",
"type": "text"
},
{
"value": 1225.78,
"type": "number"
},
{
"value": 8.33,
"type": "pct"
}
]
}
],
"summary": [
{
"value": "Total",
"type": "text"
},
{
"value": 11954.07,
"type": "number"
},
{
"value": 81.236,
"type": "pct"
}
]
}
}
And i want o obtain something like:
Client1,2068.73,14.0584
Client2,1511.9,10.2744
Client3,871.15,5.92
Client4,11954.07,81.236
Can you please give me a hint.
import csv
import json
with open("C:\Users\SVC\Desktop\test.json") as file:
x = json.load(file)
f = csv.writer(open("C:\Users\SVC\Desktop\test.csv", "wb+"))
for x in x:
f.writerow(x["P_Portfolio Group"]["data"]["dataValues"]["value"])
but it doesn't work.
Can you please give me a hint.
import csv
import json
with open('C:\Users\SVC\Desktop\test.json') as json_file:
portfolio_group = json.load(json_file)
with open('C:\Users\SVC\Desktop\test.csv', 'w') as csv_file:
csv_obj = csv.writer(csv_file)
for data in portfolio_group['P_Portfolio Group']['data']:
csv_obj.writerow([d['value'] for d in data['dataValues']])
This results in the following C:\Users\SVC\Desktop\test.csv content:
Client1,2068.73,14.0584
Client2,1511.9,10.2744
Client3,1354.74,9.2064
Client4,1225.78,8.33
Use the pandas library:
import pandas as pd
data = pd.read_csv("C:\Users\SVC\Desktop\test.json")
data.to_csv('test.csv')
done

Categories

Resources