parsing/extracting nested JSON data with Python under Conditions - python

I am trying to extract/parse the values from specifics in a JSON file that I did a post request.
Here is the JSON File. I am trying to get the values from the Key "AN". I want to be able to extract values such as "shannoncampbell_znyq1", "katiekapprelmac", etc. such that the values
from the second row does not equal to the number zero. For example, since the second row values (the for this row is T7) of katiekapprelmac does not equal to zero, my code should spit that out (katiekapprelmac should be the output). However it does not.
JSON File:
{
"id": "jsonrpc",
"jsonrpc": "2.0",
"result": {
"result": [
{
"AccountId": 697429,
"Flags": [
"AutoDeployed"
],
"PartnerId": 287562,
"Settings": [
{
"AN": "shannoncampbell_znyq1"
},
{
"T7": "0"
}
]
},
{
"AccountId": 725177,
"Flags": null,
"PartnerId": 287562,
"Settings": [
{
"AN": "katiekapprelmac"
},
{
"T7": "5"
}
]
},
{
"AccountId": 689130,
"Flags": [
"AutoDeployed"
],
"PartnerId": 287562,
"Settings": [
{
"AN": "sara-pc_wpv7h"
},
{
"T7": "0"
}
]
},
{
"AccountId": 697531,
"Flags": null,
"PartnerId": 287562,
"Settings": [
{
"AN": "kaelaweeksmac"
},
{
"T7": "0"
}
]
},
{
"AccountId": 615877,
"Flags": null,
"PartnerId": 249098,
"Settings": [
{
"AN": "elenimacbookpro"
},
{
"T7": "0"
}
]
},
{
"AccountId": 700661,
"Flags": null,
"PartnerId": 287562,
"Settings": [
{
"AN": "sethnickersonmac"
},
{
"T7": "0"
}
]
},
Here is my python code:
response2 = requests.request("POST", url, data=payload2, headers=headers)
j = json.loads(response2.text)
def find_all(item, level):
if isinstance(item, dict):
for k in item:
(find_all(item[k], level+1))
else:
print(item)
def find_only(item, level):
if isinstance(item, dict):
for k in item:
(find_only(item[k], level+1))
for each in j['result']['result']:
if (find_only(each['Settings'][1], 0)) != json.loads("0"):
find_all(each['Settings'][0], 0)
Instead, I get all the keys in the output. I get the following:
shannoncampbell_znyq1
katiekapprelmac
sara-pc_wpv7h
kaelaweeksmac
elenimacbookpro
sethnickersonmac
Rather than just katiekapprelmac
Please help. Thanks

In the code:
for each in j['result']['result']:
if (find_only(each['Settings'][1], 0)) != json.loads("0"):
find_all(each['Settings'][0], 0)
I actually see, your condition is always True, as you are not returning anything in find_only().
I don't know, why you are using level and so many recursive function. Although it's easy to extract result as per your data posted. please find below code.
response2 = requests.request("POST", url, data=payload2, headers=headers)
j = json.loads(response2.text)
for each in j['result']['result']:
if each['Settings'][1]['T7'] not in ["0", 0]:
print(each['Settings'][0]['AN'])
If your response data is little complex then please post for exact solution.
If you have multiple key name then please look at below code:
response2 = requests.request("POST", url, data=payload2, headers=headers)
j = json.loads(response2.text)
def find_all(item):
if isinstance(item, dict):
for k in item:
return item[k]
# If item is non dict and you want to return this as well on `True`.
# Uncomment below commented lines.
# else:
# item
def find_only(item):
if isinstance(item, dict):
for k in item:
return item[k]
for each in j['result']['result']:
if (find_only(each['Settings'][1])) != str(json.loads("0")):
print(find_all(each['Settings'][0]))

jsonpath-ng can help you with this.
from jsonpath_ng.ext import parse
found = parse(f"$..Settings").find(data)
if found:
for i in found:
if ''.join(i.value[1].values()) != '0':
print(i.value[0]['AN'])

Related

JSON dump appending random characters to end of file

I am writing a parser that goes through a list of data that is roughly formatted:
{
"teachers": [
{
"fullName": "Testing",
"class": [
{
"className": "Counselor",
"school": {
"id": "2b6671cb-617d-48d6-b0b5-3d44ce4da21c"
}
}
]
},
...
}
The parser is supposed to check for duplicate names within this json object, and when it stumbles upon said duplicate name, append the class to the class array.
So for example:
{
"teachers": [
{
"fullName": "Testing",
"class": [
{
"className": "Counselor",
"school": {
"id": "2b6671cb-617d-48d6-b0b5-3d44ce4da21c"
}
}
]
},
{
"fullName": "Testing",
"class": [
{
"className": "Math 8",
"school": {
"id": "2b6671cb-617d-48d6-b0b5-3d44ce4da21c"
}
}
]
},
...
}
Would return
{
"teachers": [
{
"fullName": "Testing",
"class": [
{
"className": "Counselor",
"school": {
"id": "2b6671cb-617d-48d6-b0b5-3d44ce4da21c"
}
},
{
"className": "Math 8",
"school": {
"id": "2b6671cb-617d-48d6-b0b5-3d44ce4da21c"
}
},
]
},
...
}
My current parser works just fine for most objects, however for some reason it doesn't catch some of the duplicates despite the names being the exact same, and also is appending the string
}7d-48d6-b0b5-3d44ce4da21c"
}
}
]
}
]
to the end of the json document. I am not sure why it would do this considering I am just dumping the modified json (which only is modified within the array).
My parser code is:
i_duplicates = []
name_duplicates = []
def converter():
global i_duplicates
file = open("final2.json", "r+")
infinite = json.load(file)
for i, teacher in enumerate(infinite["teachers"]):
class_name = teacher["class"][0]["className"]
class_data = {
"className": class_name,
"school": {
"id": "2b6671cb-617d-48d6-b0b5-3d44ce4da21c"
}
}
d = {
"fullName": teacher["fullName"],
"index": i
}
c = {
"fullName": teacher["fullName"]
}
if c in name_duplicates:
infinite["teachers"][search(i_duplicates, c["fullName"])]["class"].append(class_data)
infinite["teachers"].pop(i)
file.seek(0)
json.dump(infinite, file, indent=4)
else:
i_duplicates.append(d)
name_duplicates.append(c)
def search(a, t):
for i in a:
if i["fullName"] == t:
return i["index"]
print(Fore.RED + "not found" + Fore.RESET)
I know I am going about this inefficiently, but I am not sure how to fix the issues the current algorithm is having. Any feedback appreciated.

Print JSON Data with python where there is no key & value

I'm currently working with some JSON data that is presenting a challenge, i need to print the device name in my script next to the latency float, i can easily print the latency float as there is a key:value , however the device name does not sit the same, therefore i cannot figure out how to print this especially as it changes for each API Url i am looping through to retrieve the data
The data i want to print is "DEVICE123-Et10"
See JSON data below,
{
"notifications": [
{
"timestamp": "511513234234",
"path_elements": [
"Devices",
"DEVICE1",
"versioned-data",
"connectivityMonitor",
"status",
"hostStatus",
"DEVICE123-Et10",
"defaultStats"
],
"updates": {
"httpResponseTime": {
"key": "httpResponseTime",
"value": {
"float": 0
}
}
}
},
{
"timestamp": "15153324243",
"path_elements": [
"Devices",
"DEVICE1",
"versioned-data",
"connectivityMonitor",
"status",
"hostStatus",
"DEVICE123-Et10",
"defaultStats"
],
"updates": {
"packetLoss": {
"key": "packetLoss",
"value": {
"int": 0
}
}
}
},
{
"timestamp": "151522324234",
"path_elements": [
"Devices",
"DEVICE1",
"versioned-data",
"connectivityMonitor",
"status",
"hostStatus",
"DEVICE123-Et10",
"defaultStats"
],
"updates": {
"latency": {
"key": "latency",
"value": {
"float": 0.238756565643454
}
}
}
},
{
"timestamp": "158056745645645",
"path_elements": [
"Devices",
"DEVICE1",
"versioned-data",
"connectivityMonitor",
"status",
"hostStatus",
"DEVICE123-Et10",
"defaultStats"
],
"updates": {
"jitter": {
"key": "jitter",
"value": {
"float": 0.03500000213213
}
}
}
}
]
}
Current code i am using to loop through my URL list and get the latency:
jsonrequest = requests.get(url, cookies=cookies, verify=False).json()
try:
print(jsonrequest['notifications'][2]['updates']['latency']['value']['float'])
except KeyError:
print(jsonrequest['notifications'][1]['updates']['latency']['value']['float'])```
I went ahead and wrote a script to do what you wanted. It loops through all the notifications until a "latency" update is found. Then it takes the second-to-last item from the list, since it's always second to last.
import json
import requests
data = requests.get(url, cookies=cookies, verify=False).json()
notifications = data["notifications"]
for notification in notifications:
if notification["updates"].get("latency"):
latency = notification["updates"]["latency"]["value"]["float"]
name = notification["path_elements"][-2]
print(name, latency)

Find a list inside of json data with regular expressions

I am trying to find a list inside of JSON data with RegEx. Here is my code:
import requests
from bs4 import BeautifulSoup
import re
import json
source = requests.get('https://www.tripadvisor.ch/Hotel_Review-g188113-d228146-Reviews-Coronado_Hotel-Zurich.html#REVIEWS').text
soup = BeautifulSoup(source, 'lxml')
pattern = re.compile(r'window.__WEB_CONTEXT__={pageManifest:(\{.*\})};')
script = soup.find("script", text=pattern)
dictData = pattern.search(script.text).group(1)
jsonData = json.loads(dictData)
pattern2 = re.compile(r'^\"[0-9]*\":{\"data\":{\"locations\":(.*)},')
data_list = pattern2.search(str(jsonData)).group(1)
print(data_list)
With this regular expression pattern2 = re.compile(r'^\"[0-9]*\":{\"data\":{\"locations\":(.*)},') I want to find the value (list) of locations, but I get an error AttributeError: 'NoneType' object has no attribute 'group'.
The part of JSON data that I want to find looks like this:
"3960485871": {
"data": {
"locations": [
{
"detail": {
"hotel": {
"aliases": [
{
"id": 1099146,
"locale": "de",
"score": 390000,
"text": "hotel coronado"
},
{
"id": 1261196,
"locale": "es",
"score": 260000,
"text": "hotel coronado"
},
{
"id": 261321,
"locale": null,
"score": 112500,
"text": "coronado hotel z\u00fcrich"
}
],
"details": {
"numRooms": 40
}
},
"priceRange": {
"maximum": 212,
"minimum": 133
}
},
"formerName": null,
"locationId": 228146,
"neighborhoods": [],
"parents": [
{
"locationId": 188113,
"name": "Z\u00fcrich",
"placeType": "MUNICIPALITY"
},
{
"locationId": 188111,
"name": "Kanton Z\u00fcrich",
"placeType": "CANTON"
},
{
"locationId": 188045,
"name": "Schweiz",
"placeType": "COUNTRY"
},
{
"locationId": 4,
"name": "Europa",
"placeType": "CONTINENT"
},
{
"locationId": 1,
"name": "Welt",
"placeType": null
}
]
}
]
}
},
Try this (input data reduced as it was too big)
jsonData = {
"3960485871": {
"data": {
"locations": [
{
"detail": {},
"formerName": None,
"locationId": 228146,
"neighborhoods": [],
"parents": []
}
]
}
},
}
def find_recursive(data, type_):
# If we found what we are looking for, return it
if isinstance(data, type_):
return data
# If we didn't find it, recursively look for it
# Lists and tuples
if isinstance(data, (list, tuple)):
for item in data:
item = find_recursive(item, type_)
if item is not None:
return item
# Dicts
elif isinstance(data, dict):
for item in data.values():
item = find_recursive(item, type_)
if item is not None:
return item
# Add here other containers that you want to recursively look for
# If we weren't able to find it recursively, return None
return None
find_recursive(jsonData, list)
Usage: find_recursive(DATA, TYPE) where DATA is the nested containers and TYPE the python type you want to find. It does accept lists and dicts as nested containers but it could be extended to others (such as set) just by iterating over their items and returning them if they are not None. You can use a single if for multiple types like I did with list and tuple in case they behave the same.

Navigating through a JSON with multiples arrays in Python

I'm trying to go through a JSON by using python but I can't access the "mbid" node. I want to print only the first "mbid" node.
Here is my function :
def get_data():
newJsonx = dict()
for item in data["resultsPage"]["results"]["calendarEntry"]:
mbid = item["event"]["performance"][0]["artist"]["identifier"][0]["mbid"]
With this function i get this error : IndexError: list index out of range
but when I'm doing
def get_data():
newJsonx = dict()
for item in data["resultsPage"]["results"]["calendarEntry"]:
mbid = item["event"]["performance"][0]["artist"]["identifier"]
And print(mbid), I'm getting a correct answer :
"identifier": [
{
"mbid": "6655955b-1c1e-4bcb-84e4-81bcd9efab30"
},
{
"mbid": "1b1b1b1b-1c1d"
}
]
So means I don't have a problem with the data. Maybe I'm doing something wrong with the second array?
Here is an example of the JSON structure :
{
"resultsPage": {
"status": "ok",
"results": {
"calendarEntry": [
{
"reason": {
},
"event": {
"performance": [
{
"id": 72641494,
"displayName": "Arnalds",
"artist": {
"id": 590465,
"identifier": [
{
"mbid": "6655955b-1c1e-4bcb-84e4-81bcd9efab30"
},
{
"mbid": "1b1b1b1b-1c1d"
}
]
}
}
]
}
}
]
}
}
}
Thanks for your time
def get_data():
newJsonx = dict()
for item in data["resultsPage"]["results"]["calendarEntry"]:
performance=item["event"]["performance"]
if performace:
identifier=performace[0]["artist"]["identifier"]
if identifier:
mbid=identifier[0]["mbid"]

manipulating json in python using recursion

All,
I am trying to change the way some json looks by going through and formatting it in the following way:
1. flatten all of the fields lists
2. Then remove the fields lists and replace them with the name : flatten list
Example:
{
"name": "",
"fields": [{
"name": "keys",
"fields": [{
"node-name": "0/0/CPU0"
},
{
"interface-name": "TenGigE0/0/0/47"
},
{
"device-id": "ASR9K-H1902.corp.cisco.com"
}
]
},
{
"name": "content",
"fields": [{
"name": "lldp-neighbor",
"fields": [{
"receiving-interface-name": "TenGigE0/0/0/47"
},
{
"receiving-parent-interface-name": "Bundle-Ether403"
},
{
"device-id": "ASR9K-H1902.corp.cisco.com"
},
{
"chassis-id": "78ba.f975.a64f"
},
{
"port-id-detail": "Te0/1/0/4/0"
},
{
"header-version": 0
},
{
"hold-time": 120
},
{
"enabled-capabilities": "R"
},
{
"platform": ""
}
]
}]
}
]
}
Would turn into:
{
"": [{
"keys": [{
"node-name": "0/0/CPU0",
"interface-name": "TenGigE0/0/0/47",
"device-id": "ASR9K-H1902.corp.cisco.com"
}]
},
{
"content": [{
"lldp-neighbor": [{
"receiving-interface-name": "TenGigE0/0/0/47",
"receiving-parent-interface-name": "Bundle-Ether403",
"device-id": "ASR9K-H1902.corp.cisco.com",
"chassis-id": "78ba.f975.a64f",
"port-id-detail": "Te0/1/0/4/0",
"header-version": 0,
"hold-time": 120,
"enabled-capabilities": "R",
"platform": ""
}]
}]
}
]
}
I have tried the following to get the list flattened:
def _flatten_fields(self, fields_list):
c = {}
for b in [d for d in fields_list if bool(d)]:
c.update(b)
return c
This seems to work but I can't figure out a way to get into the sub levels using recursion, I am saving all flatten lists and names into a new dictionary, is there a way to do it by just manipulating the original dictionary?
This worked on the example you provided:
import json
def flatten(data):
result = dict()
if isinstance(data, dict):
if 'name' in data:
name = data['name']
result[name] = flatten(data['fields'])
else:
key = data.keys()[0]
value = data.values()[0]
result[key] = value
else:
for entry in data:
result.update(flatten(entry))
return result
print json.dumps(flatten(data), indent=4)
Output
{
"": {
"keys": {
"node-name": "0/0/CPU0",
"interface-name": "TenGigE0/0/0/47",
"device-id": "ASR9K-H1902.corp.cisco.com"
},
"content": {
"lldp-neighbor": {
"receiving-interface-name": "TenGigE0/0/0/47",
"receiving-parent-interface-name": "Bundle-Ether403",
"header-version": 0,
"port-id-detail": "Te0/1/0/4/0",
"chassis-id": "78ba.f975.a64f",
"platform": "",
"device-id": "ASR9K-H1902.corp.cisco.com",
"hold-time": 120,
"enabled-capabilities": "R"
}
}
}
}
It doesn't have the extra list layers shown in your expected output, but I don't think you want those.
This worked on the example you provided:
def flatten_fields(fields_list):
c = {}
for item in fields_list:
for key in item:
if key == "fields":
c[item["name"]] = flatten_fields(item["fields"])
elif key != "name":
c[key] = item[key]
break
return [c]
But it works on a list of dictionaries, so you should call it like flatten_fields([data])[0].
The output is:
{
"": [{
"keys": [{
"node-name": "0/0/CP0",
"interface-name": "TenGigE0/0/0/47",
"device-id": "ASR9K-H1902.corp.cisco.com"
}],
"content": [{
"lldp-neighbor": [{
"chassis-id": "78ba.f975.a64f",
"receiving-parent-interface-name": "Bndle-Ether403",
"enabled-capabilities": "R",
"device-id": "ASR9K-H1902.corp.cisco.com",
"hold-time": 120,
"receiving-interface-name": "TenGigE0/0/0/47",
"platform": "",
"header-version": 0,
"port-id-detail": "Te0/1/0/4/0"
}]
}]
}]
}

Categories

Resources