Json in python 3 get element - python

I build this json file
{
"systems-under-test": [{
"type": "url",
"sytems": [
"www.google.com",
"www.google.com",
"www.google.com"
]
},
{
"type": "api",
"sytems": [
"api.com",
"api.fr"
]
},
{
"type": "ip",
"sytems": [
"172.168 .1 .1",
"172.168 .1 .0"
]
}
],
"headers-default-configuration": {
"boolean": false
},
"headers-custom-configuration": {
"boolean": true,
"settings": {
"headers": {
"header-name": "x - frame - options",
"expected-value": ["deny", "sameorigin"]
}
}
},
"header-results": []
}
I want to add system under test to 3 different list, based on the type, for example type = URL to url_list and so on.
def loadConfigFile(self, urls_list=None):
path = self.validate_path()
with open(path) as f:
data = json.load(f)
pprint(data)
for key, value in data.items():
if key == "systems-under-test":
for x in value:
print(x.keys()[0])
if x.values[0] == "url":
url = x.get("systems")
print(url)
urls_list.add[url]
the output needs to be like:
all this :
"www.google.com"
"www.google.com"
"www.google.com"
needs to be added to url_list
when I try to access key value by using : x.values[0] == "URL", I keep getting this error
TypeError: 'dict_keys' object does not support indexing

the problem is solved by adding () as shown bellow:
def loadConfigFile(self, urls_list=None):
path = self.validate_path()
with open(path) as f:
data = json.load(f)
pprint(data)
for key, value in data.items():
if key == "systems-under-test":
for x in value:
if list(x.values())[0] == "url":
urls = list(x.values())[1]
for url in urls:
print(url)
results will be
www.google.com
www.google.com
www.google.com

This seems like an easy way to do it:
from json import load
with open("data.json") as json_file:
data = load(json_file)
test_data = data["systems-under-test"][0]
if test_data["type"] == "url":
print(test_data["sytems"])
Output:
['www.google.com', 'www.google.com', 'www.google.com']

Related

Parse complex JSON in Python

EDITED WITH LARGER JSON:
I have the following JSON and I need to get id element: 624ff9f71d847202039ec220
results": [
{
"id": "62503d2800c0d0004ee4636e",
"name": "2214524",
"settings": {
"dataFetch": "static",
"dataEntities": {
"variables": [
{
"id": "624ffa191d84720202e2ed4a",
"name": "temp1",
"device": {
"id": "624ff9f71d847202039ec220",
"name": "282c0240ea4c",
"label": "282c0240ea4c",
"createdAt": "2022-04-08T09:01:43.547702Z"
},
"chartType": "line",
"aggregationMethod": "last_value"
},
{
"id": "62540816330443111016e38b",
"device": {
"id": "624ff9f71d847202039ec220",
"name": "282c0240ea4c",
},
"chartType": "line",
}
]
}
...
Here is my code (EDITED)
url = "API_URL"
response = urllib.urlopen(url)
data = json.loads(response.read().decode("utf-8"))
print url
all_ids = []
for i in data['results']: # i is a dictionary
for variable in i['settings']['dataEntities']['variables']:
print(variable['id'])
all_ids.append(variable['id'])
But I have the following error:
for variable in i['settings']['dataEntities']['variables']:
KeyError: 'dataEntities'
Could you please help?
Thanks!!
What is it printing when you print(fetc)? If you format the json, it will be easier to read, the current nesting is very hard to comprehend.
fetc is a string, not a dict. If you want the dict, you have to use the key.
Try:
url = "API_URL"
response = urllib.urlopen(url)
data = json.loads(response.read().decode("utf-8"))
print url
for i in data['results']:
print(json.dumps(i['settings']))
print(i['settings']['dataEntities']
EDIT: To get to the id field, you'll need to dive further.
i['settings']['dataEntities']['variables'][0]['id']
So if you want all the ids you'll have to loop over the variables (assuming the list is more than one)`, and if you want them for all the settings, you'll need to loop over that too.
Full solution for you to try (EDITED after you uploaded the full JSON):
url = "API_URL"
response = urllib.urlopen(url)
data = json.loads(response.read().decode("utf-8"))
print url
all_ids = []
for i in data['results']: # i is a dictionary
for variable in i['settings']['dataEntities']['variables']:
print(variable['id'])
all_ids.append(variable['id'])
all_ids.append(variable['device']['id']
Let me know if that works.
The shared JSON is not valid. A valid JSON similar to yours is:
{
"results": [
{
"settings": {
"dataFetch": "static",
"dataEntities": {
"variables": [
{
"id": "624ffa191d84720202e2ed4a",
"name": "temp1",
"span": "inherit",
"color": "#2ccce4",
"device": {
"id": "624ff9f71d847202039ec220"
}
}
]
}
}
}
]
}
In order to get a list of ids from your JSON you need a double for cycle. A Pythonic code to do that is:
all_ids = [y["device"]["id"] for x in my_json["results"] for y in x["settings"]["dataEntities"]["variables"]]
Where my_json is your initial JSON.

Finding the headers for values in Json files

Hi I've got json data that looks something like this:
{
"content": {
"Header 1": [
{
"name": "Name1",
}
},
{
"name": "Name2",
}
}
],
"Header 2": [
{
"name": "Name3",
}
}
],
}
}
I'm looking to convert this into lists that look something like this:
header1 = ["Name1", "Name2"]
header2 = ["Name3"]
So far I've been able to get all the names that I want using objectpath.
from importlib.resources import path
import json
from unicodedata import name
import objectpath
path = (r"C:\Users\path\example.json")
with open(path) as json_file:
data = json.load(json_file)
tree_obj = objectpath.Tree(data)
names = list(tree_obj.execute('$..name'))
print (names)
But I've been unable to get the appropriate headers for each name as it appears to be nested under the 'content' header
Any help would be appreciated. Thanks :)
This does what you ask. Just iterate through the keys of "content", and grab the keys in the subobjects.
import json
jsonx = """{
"content": {
"Header 1": [
{
"name": "Name1"
},
{
"name": "Name2"
}
],
"Header 2": [
{
"name": "Name3"
}
]
}
}"""
data = json.loads(jsonx)
gather = {}
for k, v in data["content"].items():
k1 = k.lower().replace(' ','')
v1 = [vv['name'] for vv in v]
gather[k1] = v1
print(gather)
Output:
{'header1': ['Name1', 'Name2'], 'header2': ['Name3']}
And for those who like one-liners:
gather = dict(
(k.lower().replace(' ',''),[vv['name'] for vv in v])
for k, v in data["content"].items() )

Is there any way to convert specific JSON data to CSV?

I have JSON format which looks like
Here is the link https://drive.google.com/file/d/1RqU2s0dqjd60dcYlxEJ8vnw9_z2fWixd/view?usp=sharing
result =
{
"ERROR":[
],
"LinkSetDbHistory":[
],
"LinkSetDb":[
{
"Link":[
{
"Id":"8116078"
},
{
"Id":"7654180"
},
{
"Id":"7643601"
},
{
"Id":"7017037"
},
{
"Id":"6190213"
},
{
"Id":"5902265"
},
{
"Id":"5441934"
},
{
"Id":"5417587"
},
{
"Id":"5370323"
},
{
"Id":"5362514"
},
{
"Id":"4818642"
},
{
"Id":"4330602"
}
],
"DbTo":"pmc",
"LinkName":"pubmed_pmc_refs"
}
],
"DbFrom":"pubmed",
"IdList":[
"25209241"
]
},
{
"ERROR":[
],
"LinkSetDbHistory":[
],
"LinkSetDb":[
{
"Link":[
{
"Id":"7874507"
},
{
"Id":"7378719"
},
{
"Id":"6719480"
},
{
"Id":"5952809"
},
{
"Id":"4944516"
}
],
"DbTo":"pmc",
"LinkName":"pubmed_pmc_refs"
}
],
"DbFrom":"pubmed",
"IdList":[
"25209630"
]
},
I want to fetch ID with a length which is 12 and list
"IdList":"25209241"
so the final output will be
IDList: length
25209241: 12 (Total number of Id in link array)
25209630 : 5 (Total number of Id in link array)
I have tried this code but not working with single or multiple values.
pmc_ids = [link["Id"] for link in results["LinkSetDb"]["Link"]]
len(pmc_ids)
How it can work with a large dataset if there?
You have "LinkSetDb" as a list containing a single dictionary but you are indexing it as if it is a dictionary. Use:
pmc_ids = [link["Id"] for link in result["LinkSetDb"][0]["Link"]]
len(pmc_ids)
The 'Link' key is inside a list. So, change pmc_ids = [link["Id"] for link in results["LinkSetDb"]["Link"]] to pmc_ids = [link["Id"] for link in results["LinkSetDb"][0]["Link"]].
To generate csv file, the code would be something like this:
import json
import csv
with open('Citation_with_ID.json', 'r') as f_json:
json_data = f_json.read()
f_json.close()
json_dict = json.loads(json_data)
csv_headers = ["IdList", "length"]
csv_values = []
for i in json_dict:
if len(i["LinkSetDb"])>0:
pmc_ids = [link["Id"] for link in i["LinkSetDb"][0]["Link"]]
else:
pmc_ids = []
length = len(pmc_ids)
if len(i['IdList'])==1:
IdList = i['IdList'][0]
else:
IdList = None
csv_values.append([IdList,length])
with open('mycsvfile.csv', 'w') as f_csv:
w = csv.writer(f_csv)
w.writerow(csv_headers)
w.writerows(csv_values)
f_csv.close()
If you want to store the values in a dictionary then something like this can be used:
values_list = list(zip(*csv_values))
dict(zip(values_list[0],values_list[1]))

Json nested encryption value - Python

I have a json output file and I am trying to encrypt a value of key(name) in it using sha256 encryption method. Have two occurence of name in a list of dict but everytime when I write, the changes reflecting once. Can anybody tell me where I am missing?
Json structure:
Output.json
{
"site": [
{
"name": "google",
"description": "Hi I am google"
},
{
"name": "microsoft",
"description": "Hi, I am microsoft"
}
],
"veg": [
{
"status": "ok",
"slot": null
},
{
"status": "ok"
}
]
}
Code:
import hashlib
import json
class test():
def __init__(self):
def encrypt(self):
with open("Output.json", "r+") as json_file:
res = json.load(json_file)
for i in res['site']:
for key,val in i.iteritems():
if 'name' in key:
hs = hashlib.sha256(val.encode('utf-8')).hexdigest()
res['site'][0]['name'] = hs
json_file.seek(0)
json_file.write(json.dumps(res,indent=4))
json_file.truncate()
Current Output.json
{
"site": [
{
"name": "bbdefa2950f49882f295b1285d4fa9dec45fc4144bfb07ee6acc68762d12c2e3",
"description": "Hi I am google"
},
{
"name": "microsoft",
"description": "Hi, I am microsoft"
}
],
"veg": [
{
"status": "ok",
"slot": null
},
{
"status": "ok"
}
]
}
I think your problem is in this line:
res['site'][0]['name'] = hs
you are always changing the name field of the first map in the site list. I think you want this to be:
i['name'] = hs
so that you are updating the map you are currently working on (pointed to by i).
Instead of iterating over each item in the dictionary, you could make use of the fact that dictionaries are made for looking up values by key, and do this:
if 'name' in i:
val = i['name']
hs = hashlib.sha256(val.encode('utf-8')).hexdigest()
i['name'] = hs
json_file.seek(0)
json_file.write(json.dumps(res, indent=4))
json_file.truncate()
instead of this:
for key,val in i.iteritems():
if 'name' in key:
...
Also, iteritems() should be items(), and if 'name' in key should be if key == 'name', as key is a string. As it is, you'd be matching any entry with a key name containing the substring 'name'.
UPDATE: I noticed that you are writing the entire file multiple times, once for each name entry that you encrypt. Even without this I would recommend that you open the file twice...once for reading and once for writing. This is preferred over opening a file for both reading and writing, and having to seek and truncate. So, here are all of my suggested changes, along with a few other tweaks, in a full version of your code:
import hashlib
import json
class Test:
def encrypt(self, infile, outfile=None):
if outfile is None:
outfile = infile
with open(infile) as json_file:
res = json.load(json_file)
for i in res['site']:
if 'name' in i:
i['name'] = hashlib.sha256(i['name'].encode('utf-8')).hexdigest()
with open(outfile, "w") as json_file:
json.dump(res, json_file, indent=4)
Test().encrypt("/tmp/input.json", "/tmp/output.json")
# Test().encrypt("/tmp/Output.json") # <- this form will read and write to the same file
Resulting file contents:
{
"site": [
{
"name": "bbdefa2950f49882f295b1285d4fa9dec45fc4144bfb07ee6acc68762d12c2e3",
"description": "Hi I am google"
},
{
"name": "9fbf261b62c1d7c00db73afb81dd97fdf20b3442e36e338cb9359b856a03bdc8",
"description": "Hi, I am microsoft"
}
],
"veg": [
{
"status": "ok",
"slot": null
},
{
"status": "ok"
}
]
}

Python - Problem extracting data from nested json

I have a problem extracting data from json, I tried n different ways. I was able to extract the ID itself, unfortunately I can't manage to show the details of the field.
Below is my json
{
"params": {
"cid": "15482782896",
"datemax": "20190831",
"datemin": "20190601",
"domains": [
"url.com"
],
},
"results": {
"59107": {
"url.com": {
"1946592": {
"data": {
"2019-06-01": {
"ENGINE": {
"DEVICE": {
"": {
"position": 21,
"url": "url3.com"
}
}
}
},
"2019-07-01": {
"ENGINE": {
"DEVICE": {
"": {
"position": 4,
"url": "url3.com"
}
}
}
},
"2019-08-01": {
"ENGINE": {
"DEVICE": {
"": {
"position": 2,
"url": "url3.com"
}
}
}
}
},
"keyword": {
"title": "python_1",
"volume": 10
}
},
"1946602": {
"data": {
"2019-06-01": {
"ENGINE": {
"DEVICE": {
"": {
"position": 5,
"url": "url1.com"
}
}
}
},
"2019-07-01": {
"ENGINE": {
"DEVICE": {
"": {
"position": 12,
"url": "url1.com"
}
}
}
},
"2019-08-01": {
"ENGINE": {
"DEVICE": {
"": {
"position": 10.25,
"url": "url1.com"
}
}
}
}
},
"keyword": {
"title": "python_2",
"volume": 20
}
}
}
}
}
}
I tried the following code but I got the result in the form of id itself
import json
import csv
def get_leaves(item, key=None):
if isinstance(item, dict):
leaves = {}
for i in item.keys():
leaves.update(get_leaves(item[i], i))
return leaves
elif isinstance(item, list):
leaves = {}
for i in item:
leaves.update(get_leaves(i, key))
return leaves
else:
return {key : item}
with open('me_filename') as f_input:
json_data = json.load(f_input)
fieldnames = set()
for entry in json_data:
fieldnames.update(get_leaves(entry).keys())
with open('output.csv', 'w', newline='') as f_output:
csv_output = csv.DictWriter(f_output, fieldnames=sorted(fieldnames))
csv_output.writeheader()
csv_output.writerows(get_leaves(entry) for entry in json_data)
I also tried to use the pandas but also failed to parse properly
import io
import json
import pandas as pd
with open('me_filename', encoding='utf-8') as f_input:
df = pd.read_json(f_input , orient='None')
df.to_csv('output.csv', encoding='utf-8')
The result I'd need to get it :
ID Name page volume url 2019-06-01 2019-07-01 2019-08-01 2019-09-01
1946592 python_1 url.com 10 url3.com 21 4 2 null
1946602 python_2 url.com 20 url1.com 5 12 10,25 null
What could I do wrong?
Hmm this is a bit of a convoluted solution and it looks very messy and no-longer looks like the code provided however I believe it will resolve your issue.
First of all I had a problem with the provided Json (due to the trailing ',' on line 8) however have managed to generate:
Output (temp.csv)
ID,Name,Page,Volume,Url,2019-08-01,2019-07-01,2019-06-01,
1946592,python_1,url.com,10,url3.com,2,4,21,
1946602,python_2,url.com,20,url1.com,10.25,12,5,
using the following:
import json
dates: set = set()
# Collect the data
def get_breakdown(json):
collected_data = []
for result in json['results']:
for page in json['results'][result]:
for _id in json['results'][result][page]:
data_struct = {
'ID': _id,
'Name': json['results'][result][page][_id]['keyword']['title'],
'Page': page,
'Volume': json['results'][result][page][_id]['keyword']['volume'],
'Dates': {}
}
for date in dates:
if date in json['results'][result][page][_id]['data']:
data_struct['URL'] = json['results'][result][page][_id]['data'][date]['ENGINE']['DEVICE']['']['url']
data_struct['Dates'][date] = {'Position' : json['results'][result][page][_id]['data'][date]['ENGINE']['DEVICE']['']['position']}
else:
data_struct['Dates'][date] = {'Position' : 'null'}
collected_data.append(data_struct)
return collected_data
# Collect all dates across the whole data
# structure and save them to a set
def get_dates(json):
for result in json['results']:
for page in json['results'][result]:
for _id in json['results'][result][page]:
for date in json['results'][result][page][_id]['data']:
dates.add(date)
# Write to .csv file
def write_csv(collected_data, file_path):
f = open(file_path, "w")
# CSV Title
date_string = ''
for date in dates:
date_string = '{0}{1},'.format(date_string, date)
f.write('ID,Name,Page,Volume,Url,{0}\n'.format(date_string))
# Data
for data in collected_data:
position_string = ''
for date in dates:
position_string = '{0}{1},'.format(position_string, data['Dates'][date]['Position'])
f.write('{0},{1},{2},{3},{4},{5}\n'.format(
data['ID'],
data['Name'],
data['Page'],
data['Volume'],
data['URL'],
position_string
))
# Code Body
with open('me_filename.json') as f_input:
json_data = json.load(f_input)
get_dates(json_data)
write_csv(get_breakdown(json_data), "output.csv")
Hopefully you can follow the code and it does what is expected. I am sure that it can be made much more reliable - however as previously mentioned I couldn't make it work with the base code you provided.
After a small modification your code works great, but I noticed that showing the date as the next line would be a better solution in the format.
I tried to modify your solution to this form, but I'm still too weak in python to easily deal with it. Can you still tell me how you can do it to achieve this csv file format?
Output(temp.csv)
ID,Name,Page,Volume,Url,data,value,
1946592,python_1,url.com,10,url3.com,2019-08-01,2
1946592,python_1,url.com,10,url3.com,2019-07-01,4
1946592,python_1,url.com,10,url3.com,2019-06-01,21
1946602,python_2,url.com,20,url1.com,2019-08-01,10.25,
1946602,python_2,url.com,20,url1.com,2019-07-01,12,
1946602,python_2,url.com,20,url1.com,2019-06-01,5,

Categories

Resources