Json to RDF/XML file in Python - python

I am trying to save this json object:
[
{
"patient_id": "59b70b0e-51cb-4215-b65a-db470067c8de"
},
[
{
"tel_1_preferred_p": true,
"adr_region": "OK",
"tel_2_number": "800-979-6786",
"__modelname__": "Demographics",
"adr_city": "Bixby",
"ethnicity": null,
"adr_postalcode": "74008",
"name_family": "John",
"name_middle": null,
"tel_1_type": "h",
"tel_2_type": "c",
"name_prefix": null,
"email": "william.robinson#example.com",
"name_given": "Smith",
"adr_street": "23 Church Rd",
"bday": "1965-08-09",
"__documentid__": "65cd1101-b047-4ce5-afc0-778d033229ca",
"tel_1_number": "800-870-3011",
"preferred_language": "EN",
"gender": "male",
"name_suffix": null,
"tel_2_preferred_p": true,
"race": null,
"adr_country": "USA"
}
],
[
{
"startDate": "2014-10-02T00:00:00Z",
"name_identifier": "195967001",
"name_system": null,
"notes": "None",
"name_title": "Asthma",
"__modelname__": "Problem",
"endDate": "2014-10-13T00:00:00Z",
"__documentid__": "eaba4e92-3aaf-4d8c-a7db-8cfd563290c6"
},
{
"startDate": "2014-12-02T00:00:00Z",
"name_identifier": "161155000",
"name_system": null,
"notes": "None",
"name_title": "School problem (finding)",
"__modelname__": "Problem",
"endDate": "2014-12-14T00:00:00Z",
"__documentid__": "3058b12d-434d-48da-8b14-05dd485946cb"
},
{
"startDate": "2008-07-29T00:00:00Z",
"name_identifier": "185903001",
"name_system": "http://purl.bioontology.org/ontology/SNOMEDCT/",
"notes": null,
"name_title": "Needs influenza immunization",
"__modelname__": "Problem",
"endDate": "2010-09-13T00:00:00Z",
"__documentid__": "d982c117-43c3-407f-bd9e-27dc59008938"
}
],
[
{
"name": "Amputation of the foot (procedure)",
"comments": null,
"provider_institution": null,
"date_performed": "2014-10-22T00:00:00Z",
"name_value": "None",
"__modelname__": "Procedure",
"__documentid__": "221df5bd-16a8-4763-9827-06fb305b91e5",
"provider_name": null,
"name_abbrev": "180030006",
"name_type": "http://purl.bioontology.org/ontology/SNOMEDCT/",
"location": null
}
]
]
to an XML file or RDF in python code. I didn't find posts about rdf so I tried for XML. I tried also to change the list to dict like in this post:
Python Json loads() returning string instead of dictionary?
but it is not working. In views.py I have:
fileroot = '[{"patient_id":"'+record_id+'"},'+content0+','+content1+','+content2+']'
jsonData = simplejson.loads(fileroot)
It returns 'list' object has no attribute 'items'. And if I change it to :
fileroot = '[{"patient_id":"'+record_id+'"},'+content0+','+content1+','+content2+']'
jsonData = simplejson.loads(fileroot)[0]
It returns nothing.
I also tried from this link:
https://www.safaribooksonline.com/library/view/python-cookbook-3rd/9781449357337/ch06s05.html
but it is not working again.

This will tranlsate your json object to xml
import simplejson
content = simplejson.loads(YourJsonObject)
from xml.etree.ElementTree import Element
from xml.etree.ElementTree import tostring
def dict_to_xml(tag, d):
'''
Turn a simple dict of key/value pairs into XML
'''
elem = Element(tag)
for key, val in d.items():
child = Element(key)
child.text = str(val)
elem.append(child)
return elem
for i in range(1,len(content)):
e = dict_to_xml('SomeNode',content[i][0])
# print e
print tostring(e)

Related

Python json.dumps using new dictionary not returning valid json format

I'm currently using a python module that helps with the tenable API to export asset data from tenable. The export function returns an "ExportIterator" type object to walk through the results of the export.
Essentially this returns too much data per asset, and I'm having difficulty figuring out how to filter out the data being returned so I can use it.
This returns thousands of json objects with hundreds of keys (I've removed and obfuscated several) like this:
{
"id": "1a2b3c",
"has_plugin_results": true,
"created_at": "xxx",
"terminated_at": null,
"terminated_by": null,
"updated_at": "xxx",
"deleted_at": null,
"deleted_by": null,
"first_seen": "",
"last_seen": "",
"first_scan_time": "xxx",
"last_scan_time": "xxx",
"last_authenticated_scan_date": "xxx",
"last_licensed_scan_date": "xxx,
"last_scan_id": "xxx,
"last_schedule_id": "xxx",
"azure_vm_id": null,
"azure_resource_id": null,
"gcp_project_id": null,
"gcp_zone": null,
"gcp_instance_id": null,
"aws_ec2_instance_ami_id": null,
"aws_ec2_instance_id": null,
"agent_uuid": "xxx",
"bios_uuid": "xxx",
"network_id": "xxx",
"network_name": "Default",
"aws_owner_id": null,
"aws_availability_zone": null,
"aws_region": null,
"aws_vpc_id": null,
"aws_ec2_instance_group_name": null,
"aws_ec2_instance_state_name": null,
"aws_ec2_instance_type": null,
"aws_subnet_id": null,
"aws_ec2_product_code": null,
"aws_ec2_name": null,
"mcafee_epo_guid": "{xxx}",
"mcafee_epo_agent_guid": "{xxx}",
"servicenow_sysid": null,
"agent_names": [
"aaabbbccc123"
],
"installed_software": [],
"ipv4s": [
"1.1.1.1",
"2.2.2.2"
],
"ipv6s": [],
"fqdns": [
"aaabbbbccc"
],
"mac_addresses": [
"aa:bb:cc"
],
"netbios_names": [
"aaabbbccc123"
],
"operating_systems": [
"foobar 10"
],
"system_types": [
"general-purpose"
],
"hostnames": [
"aaabbbccc123"
],
"sources": [
{
"name": "AGENT",
"first_seen": "xxx",
"last_seen": "xxx"
}
],
}
This module function for exporting doesn't support any arguments for filtering the json object itself.
To filter, I'm using this to map the "hostnames": value to a new key named "vmName" in a new dictioary:
from tenable.io import TenableIO
import json
tio = TenableIO()
wr = open('tioasset.json','w')
for asset in tio.exports.assets():
new_data = {'vmName' : asset['hostnames'],},
wr.write(json.dumps(new_data, indent = 2, separators=(',', ':')))
wr.close()
This drops all the unnecessary keys from the api response , but the formatting seems to be all wrong:
output from code:
][
{
"vmName":[
"aaabbbccc123"
]
}
][
{
"vmName":[
"dddeeefff123"
]
}
][
{
"vmName":[
"ggghhhiii123"
]
}
][
{
"vmName":[
"jjjkkklll123"
]
}
][
{
"vmName":[
"mmmnnooo123"
]
}
][
Any idea how to make the code return appropriately formatted json data dictionaries? something like this:
[
{
"vmName":"aaabbbccc123"
},
{
"vmName":"dddeeefff123"
},
{
"vmName":"ggghhhiii123"
},
{
"vmName":"jjjkkklll123"
}
]
that's because hostnames is an array:
if you want just take the first element (just replace this):
new_data = {'vmName' : asset['hostnames'][0]}
or you can do this if you have many hostnames in each array :
for asset in tio.exports.assets():
for a in asset['hostnames']:
new_data = {'vmName' : a,},
wr.write(json.dumps(new_data, indent = 2, separators=(',', ':')))
from tenable.io import TenableIO
import json
tio = TenableIO()
wr = open('tioasset.json','w')
result = []
for asset in tio.exports.assets():
for a in asset['hostnames']:
new_data = {'vmName' : a}
result.append(new_data)
wr.write(json.dumps(result))
wr.close()

Parsing list of dictionaries in a dictionary to retrieve a specific key's value from each dictionary

I got a JSON response and converted it to a python dictionary using json.loads(). So the dictionary looks like this:
{u'body': u'[{"id":"1","entity":"zone","status":"PROCESSING","url":null,"createdOn":"2019-10-11T05:49:11Z"},{"id":"2","entity":"floor","status":"FAILED","url":null,"createdOn":"2019-10-11T05:49:15Z"},{"id":"3","entityType":"apartment","status":"SUCCESS","url":null,"createdOn":"2019-10-11T05:49:18Z"}]',u'isBase64Encoded': False, u'statusCode': 200}
I named this as testStatusList. I want to retrieve the value of "status" key of every dictionary inside "body". I was able to retrieve the "body" by giving body = testStatusList['body']. Now, the dictionary looks like:
[
{
"id": "1",
"entityType": "zone",
"status": "PROCESSING",
"url": null,
"createdOn": "2019-03-07T12:47:10Z"
},
{
"id": "2",
"entityType": "floor",
"status": "FAILED",
"url": null,
"createdOn": "2019-08-19T16:46:13Z"
},
{
"id": "3",
"entityType": "apartment",
"status": "SUCCESS",
"url": null,
"createdOn": "2019-08-19T16:46:13Z"
}
]
I tried out this solution [Parsing a dictionary to retrieve a key in Python 3.6
testStatusList= json.loads(status_response['Payload'].read())
body = testStatusList['body']
status =[]
for b in body:
for k,v in b.items():
if k == 'status':
status.append(v)
but I keep getting AttributeError: 'unicode' object has no attribute 'items'. Is there a different method to get items for unicode objects?
So I basically want to retrieve all the statuses i.e., PROCESSING, FAILED AND SUCCESS so that I can put an 'if' condition to display appropriate messages when something failed for that particular "id". I am very unsure about my approach as I am totally new to Python. Any help would be much appreciated thanks!
body is still a (unicode) string in your top blob. Use json.loads again on that string:
body = """[
{
"id": "1",
"entityType": "zone",
"status": "PROCESSING",
"url": null,
"createdOn": "2019-03-07T12:47:10Z"
},
{
"id": "2",
"entityType": "floor",
"status": "FAILED",
"url": null,
"createdOn": "2019-08-19T16:46:13Z"
},
{
"id": "3",
"entityType": "apartment",
"status": "SUCCESS",
"url": null,
"createdOn": "2019-08-19T16:46:13Z"
}
]"""
import json
body = json.loads(body)
status =[]
for b in body:
for k,v in b.items():
if k == 'status':
status.append(v)
print(status)
Result:
['PROCESSING', 'FAILED', 'SUCCESS']

JSON or Python dict / list decoding problem

I have been using the Python script below to try and retrieve and extract some data from Flightradar24, it would appear that it extracts the data in JSON format and will print the data out ok fully using json.dumps, but when I attempt to select the data I want (the status text in this case) using get it gives the following error:
'list' object has no attribute 'get'
Is the Data in JSON or a List ? I'm totally confused now.
I'm fairly new to working with data in JSON format, any help would be appreciated!
Script:
import flightradar24
import json
flight_id = 'BA458'
fr = flightradar24.Api()
flight = fr.get_flight(flight_id)
y = flight.get("data")
print (json.dumps(flight, indent=4))
X= (flight.get('result').get('response').get('data').get('status').get('text'))
print (X)
Sample of output data:
{
"result": {
"request": {
"callback": null,
"device": null,
"fetchBy": "flight",
"filterBy": null,
"format": "json",
"limit": 25,
"page": 1,
"pk": null,
"query": "BA458",
"timestamp": null,
"token": null
},
"response": {
"item": {
"current": 16,
"total": null,
"limit": 25
},
"page": {
"current": 1,
"total": null
},
"timestamp": 1546241512,
"data": [
{
"identification": {
"id": null,
"row": 4852575431,
"number": {
"default": "BA458",
"alternative": null
},
"callsign": null,
"codeshare": null
},
"status": {
"live": false,
"text": "Scheduled",
"icon": null,
"estimated": null,
"ambiguous": false,
"generic": {
"status": {
"text": "scheduled",
"type": "departure",
"color": "gray",
"diverted": null
},
You can use print(type(variable_name)) to see what type it is. The .get(key[,default]) is not supported on lists - it is supported for dict's.
X = (flight.get('result').get('response').get('data').get('status').get('text'))
# ^^^^^^^^ does not work, data is a list of dicts
as data is a list of dicts:
"data": [ # <<<<<< this is a list
{
"identification": {
"id": null,
"row": 4852575431,
"number": {
"default": "BA458",
"alternative": null
},
"callsign": null,
"codeshare": null
},
"status": {
This should work:
X = (flight.get('result').get('response').get('data')[0].get('status').get('text')
The issue, as pointed out by #PatrickArtner, is your data is actually a list rather than a dictionary. As an aside, you may find your code more readable if you were to use a helper function to apply dict.get repeatedly on a nested dictionary:
from functools import reduce
def ng(dataDict, mapList):
"""Nested Getter: Iterate nested dictionary"""
return reduce(dict.get, mapList, dataDict)
X = ng(ng(flight, ['result', 'response', 'data'])[0], ['status'[, 'text']])

Json extraction of specfic field via Python

Trying to get the "externalCode" field from the below incomplete json file, however i am lost, i used python to only get to second element and get the error. I am not sure how to go about traversing through a nested JSON as such below
output.writerow([row['benefitCategories'], row['benefitValueSets']] + row['disabled'].values())
KeyError: 'benefitValueSets'
import csv, json, sys
input = open('C:/Users/kk/Downloads/foo.js', 'r')
data = json.load(input)
input.close()
output = csv.writer(sys.stdout)
output.writerow(data[0].keys()) # header row
for row in data:
output.writerow([row['benefitCategories'], row['benefitValueSets']] + row['disabled'].values())
Json file
[
{
"benefitCategories": [
{
"benefits": [
{
"benefitCode": "NutritionLabel",
"benefitCustomAttributeSets": [
],
"benefitValueSets": [
{
"benefitValues": [
null
],
"costDifferential": 0,
"default": false,
"disabled": false,
"displayValue": "$500",
"externalCode": null,
"id": null,
"internalCode": "$500",
"selected": false,
"sortOrder": 0
}
],
"configurable": false,
"displayName": "DEDUCTIBLE",
"displayType": null,
"externalCode": "IndividualInNetdeductibleAmount",
"id": null,
"key": "IndividualInNetdeductibleAmount",
"productBenefitRangeValue": null,
"sortOrder": 0,
"values": [
{
"code": null,
"description": null,
"id": null,
"numericValue": null,
"selected": false,
"value": "$500"
}
]
},
{
"benefitCode": "NutritionLabel",
"benefitCustomAttributeSets": [
],
"benefitValueSets": [
{
"benefitValues": [
null
],
"costDifferential": 0,
"default": false,
"disabled": false,
"displayValue": "100%",
"externalCode": null,
"id": null,
"internalCode": "100%",
"selected": false,
"sortOrder": 0
}
],
"configurable": false,
"displayName": "COINSURANCE",
"displayType": null,
"externalCode": "PhysicianOfficeInNetCoInsurancePct",
"id": null,
"key": "PhysicianOfficeInNetCoInsurancePct",
"productBenefitRangeValue": null,
"sortOrder": 0,
"values": [
{
"code": null,
"description": null,
"id": null,
"numericValue": null,
"selected": false,
"value": "100%"
}
]
},
{
Try this code:
import csv, json, sys
input = open('C:/Users/spolireddy/Downloads/foo.js', 'r')
data = json.load(input)
input.close()
output = csv.writer(sys.stdout)
output.writerow(data[0].keys()) # header row
for row in data:
output.writerow([row['benefitCategories'], row['benefitCategories'][0]['benefits'][0]['benefitValueSets'][0], row['benefitCategories'][0]['benefits'][0]['benefitValueSets'][0]['disabled']])
# for externalCode:
row['benefitCategories'][0]['benefits'][0]['benefitValueSets'][0]['externalCode']
I'm not quite sure I understand what you're looking to do with your code. There are multiple externalCode values for each element in the array, at least from the sample you've posted. But you can get the data you're looking for with this syntax:
data[0]["benefitCategories"][0]["benefits"][0]["externalCode"]
data[0]["benefitCategories"][0]["benefits"][1]["externalCode"]
The code below iterates through the data you're interested in (with a slightly modified JSON file so that it's complete) and works as desired:
import csv, json, sys
input = open('junk.json', 'r')
data = json.load(input)
input.close()
for x in data[0]["benefitCategories"][0]["benefits"]:
print x["externalCode"] + "\n\n"

Getting Deeper Level JSON Values in Python

I have a Python script that make an API call to retrieve data from Zendesk. (Using Python 3.x) The JSON object has a structure like this:
{
"id": 35436,
"url": "https://company.zendesk.com/api/v2/tickets/35436.json",
"external_id": "ahg35h3jh",
"created_at": "2009-07-20T22:55:29Z",
"updated_at": "2011-05-05T10:38:52Z",
"type": "incident",
"subject": "Help, my printer is on fire!",
"raw_subject": "{{dc.printer_on_fire}}",
"description": "The fire is very colorful.",
"priority": "high",
"status": "open",
"recipient": "support#company.com",
"requester_id": 20978392,
"submitter_id": 76872,
"assignee_id": 235323,
"organization_id": 509974,
"group_id": 98738,
"collaborator_ids": [35334, 234],
"forum_topic_id": 72648221,
"problem_id": 9873764,
"has_incidents": false,
"due_at": null,
"tags": ["enterprise", "other_tag"],
"via": {
"channel": "web"
},
"custom_fields": [
{
"id": 27642,
"value": "745"
},
{
"id": 27648,
"value": "yes"
}
],
"satisfaction_rating": {
"id": 1234,
"score": "good",
"comment": "Great support!"
},
"sharing_agreement_ids": [84432]
}
Where I am running into issues is in the "custom_fields" section specifically. I have a particular custom field inside of each ticket I need the value for, and I only want that particular value.
To spare you too many specifics of the Python code, I am reading through each value below for each ticket and adding it to an output variable before writing that output variable to a .csv. Here is the particular place the breakage is occuring:
output += str(ticket['custom_fields'][id:23825198]).replace(',', '')+','
All the replace nonsense is to make sure that since it is going into a comma delimited file, any commas inside of the values are removed. Anyway, here is the error I am getting:
output += str(ticket['custom_fields'][id:int(23825198)]).replace(',', '')+','
TypeError: slice indices must be integers or None or have an __index__ method
As you can see I have tried a couple different variations of this to try and resolve the issue, and have yet to find a fix. I could use some help!
Thanks...
Are you using json.loads()? If so you can then get the keys, and do an if statement against the keys. An example on how to get the keys and their respective values is shown below.
import json
some_json = """{
"id": 35436,
"url": "https://company.zendesk.com/api/v2/tickets/35436.json",
"external_id": "ahg35h3jh",
"created_at": "2009-07-20T22:55:29Z",
"updated_at": "2011-05-05T10:38:52Z",
"type": "incident",
"subject": "Help, my printer is on fire!",
"raw_subject": "{{dc.printer_on_fire}}",
"description": "The fire is very colorful.",
"priority": "high",
"status": "open",
"recipient": "support#company.com",
"requester_id": 20978392,
"submitter_id": 76872,
"assignee_id": 235323,
"organization_id": 509974,
"group_id": 98738,
"collaborator_ids": [35334, 234],
"forum_topic_id": 72648221,
"problem_id": 9873764,
"has_incidents": false,
"due_at": null,
"tags": ["enterprise", "other_tag"],
"via": {
"channel": "web"
},
"custom_fields": [
{
"sid": 27642,
"value": "745"
},
{
"id": 27648,
"value": "yes"
}
],
"satisfaction_rating": {
"id": 1234,
"score": "good",
"comment": "Great support!"
},
"sharing_agreement_ids": [84432]
}"""
# load the json object
zenJSONObj = json.loads(some_json)
# Shows a list of all custom fields
print("All the custom field data")
print(zenJSONObj['custom_fields'])
print("----")
# Tells you all the keys in the custom_fields
print("How keys and the values")
for custom_field in zenJSONObj['custom_fields']:
print("----")
for key in custom_field.keys():
print("key:",key," value: ",custom_field[key])
You can then modify the JSON object by doing something like
print(zenJSONObj['custom_fields'][0])
zenJSONObj['custom_fields'][0]['value'] = 'something new'
print(zenJSONObj['custom_fields'][0])
Then re-encode it using the following:
newJSONObject = json.dumps(zenJSONObj, sort_keys=True, indent=4)
I hope this is of some help.

Categories

Resources