folder structure to json with python(unicode) - python

This program creates a folder/file structure in json
#!/usr/bin/env python
import os
import errno
import json
import sys
def path_hierarchy(path):
hierarchy = {
'type': 'folder',
'name': os.path.basename(path),
'path': path,
}
try:
hierarchy['children'] = [
path_hierarchy(os.path.join(path, contents))
for contents in os.listdir(path)
]
except OSError as e:
if e.errno != errno.ENOTDIR:
raise
hierarchy['type'] = os.path.splitext(path)[1]
if hierarchy['type'] == "":
hierarchy['type'] = "Unknown"
return hierarchy
if __name__ == '__main__':
try:
directory = sys.argv[1]
except IndexError:
directory = os.getcwd()
fo = open("output.json", "w")
fo.write(json.dumps(path_hierarchy(directory), indent=2, sort_keys=False,))
fo.close()
It is actually working but a can't make it work with Greek and Japanesse characters. This is the output of the file.Is it an encoding issue or is it something with the json class in python. I use python3 so everything should be unicode but still...
{
"children": [
{
"children": [
{
"name": ".name",
"type": "Unknown",
"path": "I:\\Development\\Python\\JsonFileStruct\\.idea\\.name"
},
{
"name": "encodings.xml",
"type": ".xml",
"path": "I:\\Development\\Python\\JsonFileStruct\\.idea\\encodings.xml"
},
{
"name": "JsonFileStruct.iml",
"type": ".iml",
"path": "I:\\Development\\Python\\JsonFileStruct\\.idea\\JsonFileStruct.iml"
},
{
"name": "misc.xml",
"type": ".xml",
"path": "I:\\Development\\Python\\JsonFileStruct\\.idea\\misc.xml"
},
{
"name": "modules.xml",
"type": ".xml",
"path": "I:\\Development\\Python\\JsonFileStruct\\.idea\\modules.xml"
},
{
"name": "workspace.xml",
"type": ".xml",
"path": "I:\\Development\\Python\\JsonFileStruct\\.idea\\workspace.xml"
}
],
"name": ".idea",
"type": "folder",
"path": "I:\\Development\\Python\\JsonFileStruct\\.idea"
},
{
"name": "main.py",
"type": ".py",
"path": "I:\\Development\\Python\\JsonFileStruct\\main.py"
},
{
"name": "output.json",
"type": ".json",
"path": "I:\\Development\\Python\\JsonFileStruct\\output.json"
},
{
"name": "second.py",
"type": ".py",
"path": "I:\\Development\\Python\\JsonFileStruct\\second.py"
},
{
"children": [
{
"name": "\u03ba\u03b1\u03bb\u03cc\u03c0\u03b1\u03b9\u03b4\u03bf.json",
"type": ".json",
"path": "I:\\Development\\Python\\JsonFileStruct\\\u03ba\u03b1\u03ba\u03cc\u03c0\u03b1\u03b9\u03b4\u03bf\\\u03ba\u03b1\u03bb\u03cc\u03c0\u03b1\u03b9\u03b4\u03bf.json"
}
],
"name": "\u03ba\u03b1\u03ba\u03cc\u03c0\u03b1\u03b9\u03b4\u03bf",
"type": "folder",
"path": "I:\\Development\\Python\\JsonFileStruct\\\u03ba\u03b1\u03ba\u03cc\u03c0\u03b1\u03b9\u03b4\u03bf"
},
{
"name": "\u03ba\u03b1\u03bb\u03cc\u03c0\u03b1\u03b9\u03b4\u03bf.json",
"type": ".json",
"path": "I:\\Development\\Python\\JsonFileStruct\\\u03ba\u03b1\u03bb\u03cc\u03c0\u03b1\u03b9\u03b4\u03bf.json"
}
],
"name": "JsonFileStruct",
"type": "folder",
"path": "I:\\Development\\Python\\JsonFileStruct"
}

Related

convert list of dir paths into a json object

I am trying to build a py script that converts a list of paths into a json object as below; the output of the script should be structured as below.
json_out is a list of dictionaries that have four elements (1)type (2)name (3)path and (4)children
json_out = [
{
"type": "folder",
"name": "dir1",
"path": "/dir1",
"children": [
{
"type": "folder",
"name": "photos",
"path": "/dir1/photos",
"children": [
{
"type": "file",
"name": "mydir1.pdf",
"path": "/dir1/photos/mydir1.pdf"
},
{
"type": "file",
"name": "yourdir1.pdf",
"path": "/dir1/photos/yourdir1.pdf"
}
]
}
]
},
{
"type": "folder",
"name": "dir2",
"path": "/dir2",
"children": [
{
"type": "folder",
"name": "photos",
"path": "/dir2/photos",
"children": [
{
"type": "file",
"name": "mydir2.pdf",
"path": "/dir2/photos/mydir2.pdf"
},
{
"type": "file",
"name": "yourdir2.pdf",
"path": "/dir2/photos/yourdir2.pdf"
}
]
}
]
}
]
This is what I have so far, but this does not return the correct output structure
def my_fx(paths):
for path in paths:
file_path=path
l=file_path.split('/')[1:]
def gen_json(l=l, d=dict()):
tmp = {}
if not d:
d["name"] = l.pop(-1)
tmp["children"]=d
tmp["name"]=l.pop(-1)
return gen_json(l,tmp) if l else tmp
print(json.dumps(gen_json(l), ensure_ascii=False))
My Input
list_of_paths = [
"dir1/photos/mydir1.pdf",
"dir1/photos/yourdir1.pdf",
"dir2/photos/mydir2.pdf",
"dir2/photos/yourdir2.pdf"
]
My Output
{"children": {"name": "mydir1.pdf"}, "name": "photos"}
{"children": {"name": "yourdir1.pdf"}, "name": "photos"}
{"children": {"name": "mydir2.pdf"}, "name": "photos"}
{"children": {"name": "yourdir2.pdf"}, "name": "photos"}
Thanks in advance

Python Script to convert multiple json files in to single csv

{
"type": "Data",
"version": "1.0",
"box": {
"identifier": "abcdef",
"serial": "12345678"
},
"payload": {
"Type": "EL",
"Version": "1",
"Result": "Successful",
"Reference": null,
"Box": {
"Identifier": "abcdef",
"Serial": "12345678"
},
"Configuration": {
"EL": "1"
},
"vent": [
{
"ventType": "Arm",
"Timestamp": "2020-03-18T12:17:04+10:00",
"Parameters": [
{
"Name": "Arm",
"Value": "LT"
},
{
"Name": "Status",
"Value": "LD"
}
]
},
{
"ventType": "Arm",
"Timestamp": "2020-03-18T12:17:24+10:00",
"Parameters": [
{
"Name": "Arm",
"Value": "LT"
},
{
"Name": "Status",
"Value": "LD"
}
]
},
{
"EventType": "TimeUpdateCompleted",
"Timestamp": "2020-03-18T02:23:21.2979668Z",
"Parameters": [
{
"Name": "ActualAdjustment",
"Value": "PT0S"
},
{
"Name": "CorrectionOffset",
"Value": "PT0S"
},
{
"Name": "Latency",
"Value": "PT0.2423996S"
}
]
}
]
}
}
If you're looking to transfer information from a JSON file to a CSV, then you can use the following code to read in a JSON file into a dictionary in Python:
import json
with open('data.txt') as json_file:
data_dict = json.load(json_file)
You could then convert this dictionary into a list with either data_dict.items() or data_dict.values().
Then you just need to write this list to a CSV file which you can easily do by just looping through the list.

How do you deploy a python azure function with an arm template?

The following deploys a azure function that run the specified C#. How do I do the same for a function that should run python?
I tried just changing the name to __init__.py as is generated when you use the azure-function-core-tools func command with the --python switch, but couldn't even find error messages as to why things weren't working.
{
"$schema": "http://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"parameters": {
"appName": {
"type": "string",
"metadata": {
"description": "The name of the function app that you wish to create."
}
},
"storageAccountType": {
"type": "string",
"defaultValue": "Standard_LRS",
"allowedValues": [
"Standard_LRS",
"Standard_GRS",
"Standard_ZRS",
"Premium_LRS"
],
"metadata": {
"description": "Storage Account type"
}
}
},
"variables": {
"functionAppName": "[parameters('appName')]",
"hostingPlanName": "[parameters('appName')]",
"storageAccountName": "[concat(uniquestring(resourceGroup().id), 'azfunctions')]"
},
"resources": [
{
"type": "Microsoft.Storage/storageAccounts",
"name": "[variables('storageAccountName')]",
"apiVersion": "2015-06-15",
"location": "[resourceGroup().location]",
"properties": {
"accountType": "[parameters('storageAccountType')]"
}
},
{
"type": "Microsoft.Web/serverfarms",
"apiVersion": "2015-04-01",
"name": "[variables('hostingPlanName')]",
"location": "[resourceGroup().location]",
"properties": {
"name": "[variables('hostingPlanName')]",
"computeMode": "Dynamic",
"sku": "Dynamic"
}
},
{
"apiVersion": "2015-08-01",
"type": "Microsoft.Web/sites",
"name": "[variables('functionAppName')]",
"location": "[resourceGroup().location]",
"kind": "functionapp",
"properties": {
"name": "[variables('functionAppName')]",
"serverFarmId": "[resourceId('Microsoft.Web/serverfarms', variables('hostingPlanName'))]"
},
"dependsOn": [
"[resourceId('Microsoft.Web/serverfarms', variables('hostingPlanName'))]",
"[resourceId('Microsoft.Storage/storageAccounts', variables('storageAccountName'))]"
],
"resources": [
{
"apiVersion": "2016-03-01",
"name": "appsettings",
"type": "config",
"dependsOn": [
"[resourceId('Microsoft.Web/sites', variables('functionAppName'))]",
"[resourceId('Microsoft.Storage/storageAccounts', variables('storageAccountName'))]"
],
"properties": {
"AzureWebJobsStorage": "[concat('DefaultEndpointsProtocol=https;AccountName=',variables('storageAccountName'),';AccountKey=',listkeys(resourceId('Microsoft.Storage/storageAccounts', variables('storageAccountName')), '2015-05-01-preview').key1,';')]",
"AzureWebJobsDashboard": "[concat('DefaultEndpointsProtocol=https;AccountName=',variables('storageAccountName'),';AccountKey=',listkeys(resourceId('Microsoft.Storage/storageAccounts', variables('storageAccountName')), '2015-05-01-preview').key1,';')]",
"FUNCTIONS_EXTENSION_VERSION": "latest"
}
},
{
"apiVersion": "2015-08-01",
"name": "TestFunctionCM",
"type": "functions",
"dependsOn": [
"[resourceId('Microsoft.Web/sites', variables('functionAppName'))]"
],
"properties": {
"config": {
"bindings": [
{
"authLevel": "anonymous",
"name": "req",
"type": "httpTrigger",
"direction": "in"
},
{
"name": "res",
"type": "http",
"direction": "out"
}
]
},
"files": {
"run.csx": "using System.Net;\r\n\r\n public static HttpResponseMessage Run(HttpRequestMessage req, TraceWriter log)\r\n\r\n {\r\n\r\nreturn req.CreateResponse(\"Hello from MyFunction\", HttpStatusCode.OK);\r\n\r\n }"
}
}
}
]
}
]
}
Thank you.
You will probably need the following:
Runtime under appsettings
"FUNCTIONS_WORKER_RUNTIME": "python"
My template looks bit different but does deploy a python function, here is the resource from the same:
{
"type": "Microsoft.Web/sites",
"apiVersion": "2018-11-01",
"name": "[parameters('name')]",
"location": "[parameters('location')]",
"dependsOn": [
"microsoft.insights/components/mycoolfunction",
"[concat('Microsoft.Web/serverfarms/', parameters('hostingPlanName'))]",
"[concat('Microsoft.Storage/storageAccounts/', parameters('storageAccountName'))]"
],
"tags": {},
"kind": "functionapp,linux",
"properties": {
"name": "[parameters('name')]",
"siteConfig": {
"appSettings": [
{
"name": "FUNCTIONS_WORKER_RUNTIME",
"value": "python"
},
{
"name": "FUNCTIONS_EXTENSION_VERSION",
"value": "~2"
},
{
"name": "APPINSIGHTS_INSTRUMENTATIONKEY",
"value": "[reference('microsoft.insights/components/mycoolfunction', '2015-05-01').InstrumentationKey]"
},
{
"name": "AzureWebJobsStorage",
"value": "[concat('DefaultEndpointsProtocol=https;AccountName=',parameters('storageAccountName'),';AccountKey=',listKeys(resourceId('Microsoft.Storage/storageAccounts', parameters('storageAccountName')), '2019-06-01').keys[0].value,';EndpointSuffix=','core.windows.net')]"
}
]
},
"serverFarmId": "[concat('/subscriptions/', parameters('subscriptionId'),'/resourcegroups/', parameters('serverFarmResourceGroup'), '/providers/Microsoft.Web/serverfarms/', parameters('hostingPlanName'))]",
"hostingEnvironment": "[parameters('hostingEnvironment')]",
"clientAffinityEnabled": false
}
}

Confluent Python Avro Producer: The datum {'..'} is not an example of the schema

I'm unable to produce data for a specific schema and I'm unable to understand why. The Example data inlcuded as dictionary in the code was created directly from using the confluent "avro-random-generator", so the example data must be correct, since it's directly derived from the schema. Both, Schema Registry, and Avro Random Generator are Confluent Tools, so it can't be that there their tools produces example data that does not work with the their schema registry.
This is the Schema:
{
"type": "record",
"name": "schemaV1",
"namespace": "net.avro.schemaV1",
"doc": "",
"fields": [
{
"name": "orderId",
"type": {
"type": "string",
"avro.java.string": "String"
},
"doc": ""
},
{
"name": "offerId",
"type": {
"type": "string",
"avro.java.string": "String"
},
"doc": ""
},
{
"name": "redeemerId",
"type": [
"null",
{
"type": "string",
"avro.java.string": "String"
}
],
"doc": "",
"default": null
},
{
"name": "eventCancellationType",
"type": "int",
"doc": ""
},
{
"name": "ruleIds",
"type": {
"type": "array",
"items": {
"type": "string",
"avro.java.string": "String"
},
"doc": ""
}
},
{
"name": "eventOriginator",
"type": {
"type": "record",
"name": "AvroEventPartnerV1",
"doc": "",
"fields": [
{
"name": "partnerShortName",
"type": {
"type": "string",
"avro.java.string": "String"
},
"doc": ""
},
{
"name": "businessUnitShortName",
"type": [
"null",
{
"type": "string",
"avro.java.string": "String"
}
],
"doc": "",
"default": null
},
{
"name": "branchShortName",
"type": [
"null",
{
"type": "string",
"avro.java.string": "String"
}
],
"doc": "",
"default": null
}
]
}
},
{
"name": "roundedDelta",
"doc": "",
"type": {
"type": "record",
"name": "AvroAmountV1",
"doc": "Amount with a currency",
"fields": [
{
"name": "amount",
"type": {
"type": "bytes",
"logicalType": "decimal",
"precision": 21,
"scale": 3
},
"doc": "The amount as a decimal number"
},
{
"name": "currency",
"type": {
"type": "string",
"avro.java.string": "String"
},
"doc": ""
}
]
}
},
{
"name": "rewardableLegalDelta",
"type": [
"null",
"AvroAmountV1"
],
"doc": "",
"default": null
},
{
"name": "receiptNumber",
"type": {
"type": "string",
"avro.java.string": "String"
},
"doc": ""
},
{
"name": "referenceReceiptNumber",
"type": [
"null",
{
"type": "string",
"avro.java.string": "String"
}
],
"doc": "",
"default": null
},
{
"name": "eventEffectiveTime",
"type": {
"type": "long"
},
"doc": ""
}
]
}
This is my script:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from confluent_kafka import avro
from confluent_kafka.avro import AvroProducer, ClientError, ValueSerializerError
BOOTSTRAP_SERVER = 'localhost:9092'
SCHEMA_REGISTRY = 'http://localhost:8081'
TOPIC = 'topicV1'
SCHEMA_PATH = 'schemas/schemaV1.avsc'
def schemaReader(SCHEMA_PATH):
with open(SCHEMA_PATH, 'r') as file:
data = file.read()
return data
def main():
kafka_config = {
'bootstrap.servers': BOOTSTRAP_SERVER,
'schema.registry.url': SCHEMA_REGISTRY
}
value_schema = avro.loads( schemaReader(SCHEMA_PATH) )
null = None
value = {
"orderId": "a9bcc55f-e2c0-43d6-b793-ff5f295d051d",
"offerId": "119475017578242889",
"redeemerId": "1176a01b-b2dc-45a9-91cc-232361e14f99",
"eventCancellationType": 0,
"ruleIds": ["ID-IPM00001"],
"eventOriginator": {"partnerShortName":
"partner","businessUnitShortName": null,"branchShortName": null},
"roundedDelta": {"amount": "\u8463","currency": "PTS"},
"rewardableLegalDelta": {"amount": "\u8463","currency": "EUR"},
"receiptNumber": "19b2ff68-ed06-48f0-9ce9-d697c0eadc19",
"referenceReceiptNumber": null,
"eventEffectiveTime": 1569494696656
}
avroProducer = AvroProducer(kafka_config, default_value_schema=value_schema )
avroProducer.produce(topic=TOPIC, value=value, value_schema=value_schema)
avroProducer.flush()
if __name__== "__main__":
main()
This is the traceback I'm receiving:
File "producer.py", line 64, in <module>
main()
File "producer.py", line 60, in main
avroProducer.produce(topic=TOPIC, value=value, value_schema=value_schema)
File "/apps/python/python2.7/lib/python2.7/site-packages/confluent_kafka/avro/__init__.py", line 80, in produce
value = self._serializer.encode_record_with_schema(topic, value_schema, value)
File "/apps/python/python2.7/lib/python2.7/site-packages/confluent_kafka/avro/serializer/message_serializer.py", line 115, in encode_record_with_schema
return self.encode_record_with_schema_id(schema_id, record, is_key=is_key)
File "/apps/python/python2.7/lib/python2.7/site-packages/confluent_kafka/avro/serializer/message_serializer.py", line 149, in encode_record_with_schema_id
writer(record, outf)
File "/apps/python/python2.7/lib/python2.7/site-packages/confluent_kafka/avro/serializer/message_serializer.py", line 86, in <lambda>
return lambda record, fp: writer.write(record, avro.io.BinaryEncoder(fp))
File "/apps/python/python2.7/lib/python2.7/site-packages/avro/io.py", line 1042, in write
raise AvroTypeException(self.writers_schema, datum)
avro.io.AvroTypeException: The datum {'..'} is not an example of the schema { ..}
It seems that the problem is that the amount should be a bytes type but you have a normal string of \u8463. The library you mentioned that you used to generate the random data creates a byte string by using the java default charset: https://github.com/confluentinc/avro-random-generator/blob/master/src/main/java/io/confluent/avro/random/generator/Generator.java#L373
However, perhaps that default isn't iso-8859-1 which is what the java implementation (the reference implementation) uses: https://github.com/apache/avro/blob/bf47ec97e0b7f5701042fac067b73b421a9177b7/lang/java/avro/src/main/java/org/apache/avro/io/JsonEncoder.java#L220

Json to CSV using python and blender 2.74

I have a project in which i have to convert a json file into a CSV file.
The Json sample :
{
"P_Portfolio Group": {
"depth": 1,
"dataType": "PortfolioOverview",
"levelId": "P_Portfolio Group",
"path": [
{
"label": "Portfolio Group",
"levelId": "P_Portfolio Group"
}
],
"label": "Portfolio Group",
"header": [
{
"id": "Label",
"label": "Security name",
"type": "text",
"contentType": "text"
},
{
"id": "SecurityValue",
"label": "MioCHF",
"type": "text",
"contentType": "number"
},
{
"id": "SecurityValuePct",
"label": "%",
"type": "text",
"contentType": "pct"
}
],
"data": [
{
"dataValues": [
{
"value": "Client1",
"type": "text"
},
{
"value": 2068.73,
"type": "number"
},
{
"value": 14.0584,
"type": "pct"
}
]
},
{
"dataValues": [
{
"value": "Client2",
"type": "text"
},
{
"value": 1511.9,
"type": "number"
},
{
"value": 10.2744,
"type": "pct"
}
]
},
{
"dataValues": [
{
"value": "Client3",
"type": "text"
},
{
"value": 1354.74,
"type": "number"
},
{
"value": 9.2064,
"type": "pct"
}
]
},
{
"dataValues": [
{
"value": "Client4",
"type": "text"
},
{
"value": 1225.78,
"type": "number"
},
{
"value": 8.33,
"type": "pct"
}
]
}
],
"summary": [
{
"value": "Total",
"type": "text"
},
{
"value": 11954.07,
"type": "number"
},
{
"value": 81.236,
"type": "pct"
}
]
}
}
And i want o obtain something like:
Client1,2068.73,14.0584
Client2,1511.9,10.2744
Client3,871.15,5.92
Client4,11954.07,81.236
Can you please give me a hint.
import csv
import json
with open("C:\Users\SVC\Desktop\test.json") as file:
x = json.load(file)
f = csv.writer(open("C:\Users\SVC\Desktop\test.csv", "wb+"))
for x in x:
f.writerow(x["P_Portfolio Group"]["data"]["dataValues"]["value"])
but it doesn't work.
Can you please give me a hint.
import csv
import json
with open('C:\Users\SVC\Desktop\test.json') as json_file:
portfolio_group = json.load(json_file)
with open('C:\Users\SVC\Desktop\test.csv', 'w') as csv_file:
csv_obj = csv.writer(csv_file)
for data in portfolio_group['P_Portfolio Group']['data']:
csv_obj.writerow([d['value'] for d in data['dataValues']])
This results in the following C:\Users\SVC\Desktop\test.csv content:
Client1,2068.73,14.0584
Client2,1511.9,10.2744
Client3,1354.74,9.2064
Client4,1225.78,8.33
Use the pandas library:
import pandas as pd
data = pd.read_csv("C:\Users\SVC\Desktop\test.json")
data.to_csv('test.csv')
done

Categories

Resources