Parse JSON array in Python - python

I have some JSON file:
{
"cis" : [ {
"ucmdbId" : "835cfedfaabc32a1358b322ff3bae056",
"type" : "running_software",
"properties" : {
"display_label" : "jboss (site1.ru)"
}
}, {
"ucmdbId" : "7ef9f21c132c12b3d8d2af0964cc5970",
"type" : "node",
"properties" : {
"display_label" : "site2.ru"
}
} ],
"relations" : [ {
"ucmdbId" : "80c42edbe32fbb4c25621756ec9e09d2",
"type" : "compound_f",
"properties" : null,
"end1Id" : "23e30baf2320a3274d0aa1e7f56cdaef",
"end2Id" : "15af0ba134327d32a0c5c72450e63fcd"
}, {
"ucmdbId" : "7fe9fb15d4462d1212aeee4aef2f32b4",
"type" : "compound_f",
"properties" : null,
"end1Id" : "23e30baf2320a3274d0aa327f56cdaef",
"end2Id" : "9232dd2621b814da632932e8cd33ffc8"
} ]
}
I only need the cis array. So this is what I need to parse:
[{
"ucmdbId" : "835cfedfaabc32a1358b322ff3bae056",
"type" : "running_software",
"display_label" : "jboss (site1.ru)"
}, {
"ucmdbId" : "7ef9f21c132c12b3d8d2af0964cc5970",
"type" : "node",
"display_label" : "site2.ru"
}]
Python script:
#!/usr/bin/python
import sys
import os
import tablib
import pandas as pd
import json
from pandas.io.json import json_normalize
f = open('/home/nik/test.json', 'rw')
jsonArray = f.read()
f.close
data = json.dumps(json.loads(jsonArray)['cis'])
jsonResult = pd.read_json(data)
array = json.loads(jsonArray)
print jsonArray
jsonResult.to_excel('/home/nik/output.xlsx', sheet_name='Sheet1')
But how can I get key parameters? I try to use:
print data['type'].keys()
print data['type']
But it gives me error:
AttributeError: 'str' object has no attribute 'keys'
How can I get the proper JSON format?
Update. Solution:
Thanks, it works. My complete code to export JSON into xlsx file:
#!/usr/bin/python
import subprocess
import sys
import os
import tablib
import pandas as pd
import json
import glob
import string
path = '/home/nik/json'
for jsonfile in glob.glob(os.path.join(path, '*.json')):
#jsonfile = '/home/nik/test.json'
with open(jsonfile) as data_file:
data = json.load(data_file)
JSON = '[{ \n'
for index, item in enumerate(data['cis']):
ucmdbId = (item['ucmdbId'])
type = (item['type'])
display_label = (item['properties']['display_label'])
Text1 = ' \"ucmdbId\" : \"%s\",' %(ucmdbId)
Text2 = ' \"type\" : \"%s\",' %(type)
Text3 = ' \"display_label\" : \"%s\",' %(display_label)
if index==(len(data['cis'])-1):
End = '}]'
else:
End = '}, {'
JSON += Text3+'\n'+Text2+'\n'+Text1+'\n'+End+'\n'
JSON = JSON.translate({ord(c): None for c in '\/'})
jsonResult = pd.read_json(JSON)
jsonResult = jsonResult.sort_values(by='type')
jsonResult.to_excel(jsonfile+'.xlsx', sheet_name='Object monitoring', index=False)

import json
from pprint import pprint
jsonfile = 'C:\\temp\\temp.json' # path to your json file
with open(jsonfile) as data_file:
data = json.load(data_file)
pprint(data['cis'])
The above will give you just the cis array.
Below is a more granular output
for item in data['cis']:
ucmdbId = (item['ucmdbId'])
type = (item['type'])
display_label = (item['properties']['display_label'])
print(ucmdbId)
print(type)
print(display_label)
If you want it with key labels then use
for item in data['cis']:
ucmdbId = (item['ucmdbId'])
type = (item['type'])
display_label = (item['properties']['display_label'])
print('ucmdbId:{}'.format(ucmdbId))
print('type:{}'.format(type))
print('display_label:{}'.format(display_label))

Related

JSONDecodeError: Expecting value: line 2 column 13 (char 15)

I have a nested json file which I got from json.
I am trying to convert it in to csv through python code.
I tried all the possible way to convert it to csv but couldn't succeed.
I also followed previous question and solution but didn't work for me.
My json format is
{
"d1" : ("value1"),
"d2" : (value2-int),
"d3" : [
{
"sub-d1" : sub-value1(int),
"sub-d2" : sub-value2(int),
"sub-d3" : sub-value3(int),
"sub-d4" : [
{
"sub-sub-d1" : "sub-sub-value3",
"sub-sub-d2" : sub-value3(int)
},
{
"sub-sub-d1" : sub-sub-value3(int),
"sub-sub-d2" : "sub-sub-value3"}
]
],
"sub-d5" : "sub-value4",
"sub-d6" : "sub-value5"
}
],
"d4" : "value3",
"d5" : "value4",
"d6" : "value5,
"d7" : "value6"
}
{ another entry with same pattern..and so on}
Some of the value and sub value has integers and str + int.
What I tried
import json
import csv
import requests
with open('./data/inverter.json', 'r') as myfile:
json_data = myfile.read()
def get_leaves(item, key=None):
if isinstance(item, dict):
leaves = {}
for i in item.keys():
leaves.update(get_leaves(item[i], i))
return leaves
elif isinstance(item, list):
leaves = {}
for i in item:
leaves.update(get_leaves(i, key))
return leaves
else:
return {key : item}
# First parse all entries to get the complete fieldname list
fieldnames = set()
for entry in json_data:
fieldnames.update(get_leaves(entry).keys())
with open('output.csv', 'w', newline='') as f_output:
csv_output = csv.DictWriter(f_output, fieldnames=sorted(fieldnames))
csv_output.writeheader()
csv_output.writerows(get_leaves(entry) for entry in json_data)
This one saves all my data in single column with split values.
I tried to use :
https://github.com/vinay20045/json-to-csv.git
but this also didn't work.
I also tried to parse and do simple trick with following code:
with open("./data/inverter.json") as data_file:
data = data_file.read()
#print(data)
data_content = json.loads(data)
print(data_content)
but it throws an error : 'JSONDecodeError: Expecting value: line 2 column 13 (char 15)'
Can any one help me to convert my nested json to csv ?
It would be appreciated.
Thank you
It looks like the NumberInt(234234) issue you describe was a bug in MongoDB: how to export mongodb without any wrapping with NumberInt(...)?
If you cannot fix it by upgrading MongoDB, I can recommend preprocessing the data with regular expressions and parsing it as regular JSON after that.
For the sake of example, let's say you've got "test.json" that looks like this, which is valid except for the NumberInt(...) stuff:
{
"d1" : "value1",
"d2" : NumberInt(1234),
"d3" : [
{
"sub-d1" : 123,
"sub-d2" : 123,
"sub-d3" : 123,
"sub-d4" : [
{
"sub-sub-d1" : "sub-sub-value3",
"sub-sub-d2" : NumberInt(123)
},
{
"sub-sub-d1" : 43242,
"sub-sub-d2" : "sub-sub-value3"
}
]
}
],
"d4" : "value3",
"d5" : "value4",
"d6" : "value5",
"d7" : "value6"
}
You could import this into Python as follows:
import re
import json
with open("test.json") as f:
data = f.read()
# This regular expression finds/replaces the NumberInt bits with just the contents
fixed_data = re.sub(r"NumberInt\((\d+)\)", r"\1", data)
loaded_data = json.loads(fixed_data)
print(json.dumps(loaded_data, indent=4))

How to get an array of first elements from a json array

I have a config.json file, which contains an array of organisations:
config.json
{
"organisations": [
{ "displayName" : "org1", "bucketName" : "org1_bucket" },
{ "displayName" : "org2", "bucketName" : "org2_bucket" },
{ "displayName" : "org3", "bucketName" : "org3_bucket" }
]
}
How can I get an array of all organisation names?
This is what I have tried:
from python_json_config import ConfigBuilder
def read_config():
builder = ConfigBuilder()
org_array = builder.parse_config('config.json')
# return all firstNames in org_array
import json
def read_config():
display_names = []
with open('yourfilename.json', 'r', encoding="utf-8") as file:
orgs = json.load(file)
display_names = [ o["displayName"] for o in orgs["organizations"] ]
return display_names
Also, we don't have any way to know what happens with ConfigBuilder or builder.parse_config since we don't have access to that code, so sorry to not take into account your example
a = {
"organisations": [
{ "displayName" : "org1", "bucketName" : "org1_bucket" },
{ "displayName" : "org2", "bucketName" : "org2_bucket" },
{ "displayName" : "org3", "bucketName" : "org3_bucket" }
]
}
print([i["displayName"] for i in a["organisations"]])
Output:
['org1', 'org2', 'org3']
Use list comprehension, it's very easy. In order to read a json file.
import json
data = json.load(open("config.json"))
Use lambda with map to get array of only organizations names
>>> list(map(lambda i:i['displayName'],x['organisations']))
>>> ['org1', 'org2', 'org3']
If you want to read json data from file into dictionary you can achieve this as following.
import json
with open('config.json') as json_file:
data = json.load(json_file)
org_array = list(map(lambda i:i['displayName'],data['organisations']))

Python - appending json file

I am trying to append the json files from a folder into variable so i can parse it out later. Here is the code I have:
# Importing dependencies
import os
import shutil
import glob
from zipfile import ZipFile
from datetime import datetime
import zipfile
import json
from pandas.io.json import json_normalize
import urllib
import sqlalchemy as sa
# Define the folder sources and destinations
MainDir = 'C:/Test/'
LoadingDir = 'C:/Test/Loading/'
ArchiveDir = 'C:/Test/Archive/'
glob_data = []
# Look for all json files in directory
for file in glob.glob(LoadingDir + '*.json'):
with open(file) as json_file:
# Load each json file and append it
data = json.load(json_file)
i = 0
while i < len(data):
glob_data.append(data[i])
i += 1
with open(LoadingDir + 'Combined.json', 'w') as f:
json.dump(glob_data, f, indent=4)
# Load Json file for parsing
file = open(LoadingDir + 'Combined.json')
data = json.load(file)
# Parsing of data
df = json_normalize(data,meta=['timestamp'])
df.to_csv(LoadingDir + "Combined.csv",sep=',', encoding='utf-8')
try:
df.to_csv(LoadingDir + "Combined.csv",sep=',', encoding='utf-8')
except:
pass
When I try running it I get this message below:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-1-ea0f48aa463e> in <module>
24 i = 0
25 while i < len(data):
---> 26 glob_data.append(data[i])
27 i += 1
28 with open(LoadingDir + 'Combined.json', 'w') as f:
KeyError: 0
Here is the sample of my Json file:
{
"sensor-time" : {
"timezone" : "America/Los_Angeles",
"time" : "2019-11-05T14:18:36-08:00"
},
"status" : {
"code" : "OK"
},
"content" : {
"element" : [ {
"element-id" : 0,
"element-name" : "Line 0",
"sensor-type" : "SINGLE_SENSOR",
"data-type" : "LINE",
"from" : "2019-11-01T00:00:00-07:00",
"to" : "2019-11-05T15:00:00-08:00",
"resolution" : "ONE_HOUR",
"measurement" : [ {
"from" : "2019-11-01T00:00:00-07:00",
"to" : "2019-11-01T01:00:00-07:00",
"value" : [ {
"value" : 0,
"label" : "fw"
}, {
"value" : 0,
"label" : "bw"
} ]
}, {
"from" : "2019-11-01T01:00:00-07:00",
"to" : "2019-11-01T02:00:00-07:00",
"value" : [ {
"value" : 0,
"label" : "fw"
}, {
"value" : 0,
"label" : "bw"
} ]
}, {
"from" : "2019-11-01T02:00:00-07:00",
"to" : "2019-11-01T03:00:00-07:00",
"value" : [ {
"value" : 0,
"label" : "fw"
}, {
"value" : 0,
"label" : "bw"
} ]
},
So what I noticed is that this json file does not start with [ which means its not list of dictionaries. But when i have json that does start with [ my code does work.
How do I adjust this to work for this sample of json?
Change your code to:
import os
import shutil
import glob
from zipfile import ZipFile
from datetime import datetime
import zipfile
import json
from pandas.io.json import json_normalize
import urllib
import sqlalchemy as sa
# Define the folder sources and destinations
MainDir = 'C:/Test/'
LoadingDir = 'C:/Test/Loading/'
ArchiveDir = 'C:/Test/Archive/'
glob_data = []
# Look for all json files in directory
for file in glob.glob(LoadingDir + '*.json'):
with open(file) as json_file:
# Load each json file and append it
data = json.load(json_file)
glob_data.append(data)
with open(LoadingDir + 'Combined.json', 'w') as f:
json.dump(glob_data, f, indent=4)
# Load Json file for parsing
file = open(LoadingDir + 'Combined.json')
data = json.load(file)
# Parsing of data
df = json_normalize(data,meta=['timestamp'])
df.to_csv(LoadingDir + "Combined.csv",sep=',', encoding='utf-8')
try:
df.to_csv(LoadingDir + "Combined.csv",sep=',', encoding='utf-8')
except:
pass
You don't need to iterate over the return value returned by json.load(), it's already parsed and converted to a dict, just append it directly.

How to create a filename with the current date and time in python when query is ran

When I run my query below, it creates a file called ‘mycsvfile’. However is there a way to add the current date and timestamp when the CSV file is created? For example if I run this query now the file should be named mycsvfile20171012 – 10:00:00 (something like that).
Could someone edit my code and show me how to do this please?
My code:
from elasticsearch import Elasticsearch
import csv
es = Elasticsearch(["9200"])
# Replace the following Query with your own Elastic Search Query
res = es.search(index="search", body=
{
"_source": ["DTDT", "TRDT", "SPLE", "RPLE"],
"query": {
"bool": {
"should": [
{"wildcard": {"CN": "TEST1"}}
]
}
}
}, size=10)
header_names = { 'DTDT': 'DATE', 'TRDT': 'TIME', ...}
with open('mycsvfile.csv', 'w') as f: # Just use 'w' mode in 3.x
header_present = False
for doc in res['hits']['hits']:
my_dict = doc['_source']
if not header_present:
w = csv.DictWriter(f, my_dict.keys())
w.writerow(header_names) # will write DATE, TIME, ... in correct place
header_present = True
w.writerow(my_dict)
Thank you in advance!
It is better to use underscore in filename than any other special character since it widely accepted
Therefore constructing file name as below :
csv_file = 'myfile_' + str(datetime.now().strftime('%Y_%m_%d_%H_%M_%S')) + '.csv'
Use datetime as below :
from elasticsearch import Elasticsearch
import csv
es = Elasticsearch(["9200"])
# Replace the following Query with your own Elastic Search Query
res = es.search(index="search", body=
{
"_source": ["DTDT", "TRDT", "SPLE", "RPLE"],
"query": {
"bool": {
"should": [
{"wildcard": {"CN": "TEST1"}}
]
}
}
}, size=10)
from datetime import datetime
import os
file_path = <PASS YOUR FILE HERE>
csv_file = 'myfile_' + str(datetime.now().strftime('%Y_%m_%d_%H_%M_%S')) + '.csv'
csv_file_full = os.path.join(file_path, os.sep, csv_file)
header_names = { 'DTDT': 'DATE', 'TRDT': 'TIME', ...}
with open(csv_file_full, 'w') as f: # Just use 'w' mode in 3.x
header_present = False
for doc in res['hits']['hits']:
my_dict = doc['_source']
if not header_present:
w = csv.DictWriter(f, my_dict.keys())
w.writerow(header_names) # will write DATE, TIME, ... in correct place
header_present = True
w.writerow(my_dict)
Yes, you can do like this:
However ":" is not supported in filenames so 20171010–10.00.00
>>> import time
>>> fname = lambda : "mycsvfile{}.csv".format(time.strftime("%Y%m%d-%H.%M.%S"))
>>>
>>> fname()
'mycsvfile20171012-17.24.59.csv'
>>> with open(fname()) as f:
>>> pass
Have a variable for file name as file_name and use datetime.now()
from datetime import datetime
file_name = 'mycsvfile' + str(datetime.now()) + '.csv'

Parsing text file after certain word

I would like to parse the following file and get the values after the "ID" and "Label":
{"data" : [{
"id" : "3743",
"fgColor" : "#000000",
"Comment" : [ "GLIO" ],
"Group" : "0",
"Shape" : "roundrectangle",
"GraphicsName" : "TITLE:Glioma",
"Matching_Attribute" : [ "TITLE:Glioma" ],
"Entry_id" : "78",
"Label" : "TITLE:Glioma",
"EntrezIDs" : "05214, ",
"shared_name" : "path:hsa05214",
"Type" : "map",
"kegg_x" : "86.0",
"kegg_y" : "58.0",
"bgColor" : "#FFFFFF",
"name" : "path:hsa05214",
"SUID" : 3743,
"Height" : "25",
"Width" : "92",
"Link" : "http://www.kegg.jp/dbget-bin/www_bget?hsa05214",
"selected" : false
}]}
I'm using the following code there is nothing being written to the file specified:
import re
cyjs = open("/users/skylake/desktop/cyjs-example.txt", "r")
jsonfile = open("/users/skylake/desktop/jsonfile.txt", "w")
for line in cyjs:
if line.startswith('"id"'):
print(line)
jsonfile.write(line)
jsonfile.close()
There is a better suited for the problem tool.
It is a JSON file which can be parsed with json built-in module:
In [1]: import json
In [2]: with open("data.txt", "r") as f:
...: data = json.load(f)
In [3]: obj = data["data"][0]
In [4]: obj["id"]
Out[4]: u'3743'
In [5]: obj["Label"]
Out[5]: u'TITLE:Glioma'

Categories

Resources