How to extract specific nested JSON value doing loop? (Python)

How to extract specific nested JSON value doing loop? (Python) - python

{
"127.0.0.1":{
"addresses":{
"ipv4":"127.0.0.1"
},
"hostnames":[
{
"name":"localhost",
"type":"PTR"
}
],
"status":{
"reason":"conn-refused",
"state":"up"
},
"tcp":{
"5000":{
"conf":"10",
"cpe":"cpe:/a:python:python:3.9.2",
"extrainfo":"Python 3.9.2",
"name":"http",
"product":"Werkzeug httpd",
"reason":"syn-ack",
"script":{
"vulners":"\n cpe:/a:python:python:3.9.2: \n \tCVE-2021-29921\t7.5\thttps://vulners.com/cve/CVE-2021-29921\n \tCVE-2021-23336\t4.0\thttps://vulners.com/cve/CVE-2021-23336\n \tMSF:ILITIES/DEBIAN-CVE-2021-3426/\t2.7\thttps://vulners.com/metasploit/MSF:ILITIES/DEBIAN-CVE-2021-3426/\t*EXPLOIT*\n \tCVE-2021-3426\t2.7\thttps://vulners.com/cve/CVE-2021-3426"
},
"state":"open",
"version":"1.0.1"
},
"6000":{
"conf":"10",
"cpe":"cpe:/a:python:python:3.9.2",
"extrainfo":"Python 3.9.2",
"name":"http",
"product":"Werkzeug httpd",
"reason":"syn-ack",
"script":{
"vulners":"\n cpe:/a:python:python:3.9.2: \n \tCVE-2021-29921\t7.5\thttps://vulners.com/cve/CVE-2021-29921\n \tCVE-2021-23336\t4.0\thttps://vulners.com/cve/CVE-2021-23336\n \tMSF:ILITIES/DEBIAN-CVE-2021-3426/\t2.7\thttps://vulners.com/metasploit/MSF:ILITIES/DEBIAN-CVE-2021-3426/\t*EXPLOIT*\n \tCVE-2021-3426\t2.7\thttps://vulners.com/cve/CVE-2021-3426"
},
"state":"open",
"version":"1.0.1"
}
},
"vendor":{
}
}
}
I want to extract "vulners" value here i tried this -
results = []
for x in collection.find({},{"scan": 1, "_id": 0 }):
results.append(json.loads(json_util.dumps(x)))
portnumber = []
datay = []
datapro = []
for result in results:
ips = result['scan']
for ip in ips:
ports = result['scan'][ip]['tcp']
ipdomain = result['scan'][ip]['hostnames']
for ip2 in ipdomain:
ip3 = ip2['name']
for port in ports:
portnumber.append(port)
datax = ports[port]['script']
datay.append(datax)
datapro2 = ports[port]['product']
datapro.append(datapro2)
date = datetime.datetime.now()
date_now = date.strftime("%x, %X")
pass_json_var = {'domain': ip3, 'ports': portnumber, 'product': datapro, 'vulnerabilities': datay, "date": date_now}
if isinstance(pass_json_var, list):
domaindata.insert_many(pass_json_var)
else:
domaindata.insert_one(pass_json_var)
Ok so here if the "results" output gives me one "vulners" value then it works fine but when it's multiple ports with vulners values it doesn't work!
How can i access the 'vulners' value? Hoping for someone to guide me also a bit, Please try to give a solution which is dynamic
Thanks a lot!

Model based approach
this approach is based on a model of your data you want to parse. From my point of view this is more work in the beginning. With the advantage, that you will have clean error messages and you can control the behaviour by adapting your data model.
make a model of the data you want to parse
from typing import Any, Optional
from pydantic import BaseModel, Field
class ExScript(BaseModel):
vulners:str = ""
class Ex30000(BaseModel):
script:ExScript = Field(default=Any)
class ExTcp(BaseModel):
root:Ex30000= Field(default=Any, alias="30000")
class ExRoot(BaseModel):
tcp:ExTcp = Field() # Required
class Base(BaseModel):
root:ExRoot = Field(default=Any, alias="35.0.0.0.0")
change your input data to a raw string outherwise you will have to escape \n and \t
input_will_work = r"""{
"35.0.0.0.0": {
"hostnames": [
{
"name": "domain.com",
"type": "PTR"
}
],
"addresses": {
"ipv4": "35.0.0.0"
},
"vendor": {},
"status": {
"state": "up",
"reason": "syn-ack"
},
"tcp": {
"30000": {
"state": "open",
"reason": "syn-ack",
"name": "http",
"product": "nginx",
"version": "1.20.0",
"extrainfo": "",
"conf": "10",
"cpe": "cpe:/a:igor_sysoev:nginx:1.20.0",
"script": {
"http-server-header": "nginx/1.20.0",
"vulners": "\n cpe:/a:igor_sysoev:nginx:1.20.0: \n \tNGINX:CVE-2021-23017\t6.8\thttps://vulners.com/nginx/NGINX:CVE-2021-23017\n \t9A14990B-D52A-56B6-966C-6F35C8B8EB9D\t6.8\thttps://vulners.com/githubexploit/9A14990B-D52A-56B6-966C-6F35C8B8EB9D\t*EXPLOIT*\n \t1337DAY-ID-36300\t6.8\thttps://vulners.com/zdt/1337DAY-ID-36300\t*EXPLOIT*\n \tPACKETSTORM:162830\t0.0\thttps://vulners.com/packetstorm/PACKETSTORM:162830\t*EXPLOIT*"
}
}
}
}
}
"""
input_will_fail = r"""{
"35.0.0.0.0": {}
}
"""
3.1 this should give you the expected result
obj1 = Base.parse_raw(input_will_work)
print(obj1.root.tcp.root.script.vulners)
3.2 this should throw an exception
obj2 = Base.parse_raw(input_will_fail)
Search data with jsonpath
should return all objects with the name vulners
from jsonpath_ng import jsonpath, parse
import json
obj = json.loads(input_will_work)
p = parse('$..vulners')
for match in p.find(obj):
print(match.value)

Update:
def extract_data(ip_address_data):
domains = ip_address_data["hostnames"]
ports_data = []
# Each port can have different products and vulners
# So that data is grouped together in a dictionary
for port in ip_address_data["tcp"].keys():
port_data = ip_address_data["tcp"][port]
product = port_data["product"]
vulners = port_data['script']['vulners']
ports_data.append({
"port": port,
"product": product,
"vulners": vulners
})
return {
"domains": domains,
"ports_data": ports_data
}
# Result is the data from mongo db
# result = collection.find({})["scan"]
result = {
"127.0.0.1": {
"addresses": {
"ipv4": "127.0.0.1"
},
"hostnames": [
{
"name": "localhost",
"type": "PTR"
}
],
"status": {
"reason": "conn-refused",
"state": "up"
},
"tcp": {
"5000": {
"conf": "10",
"cpe": "cpe:/a:python:python:3.9.2",
"extrainfo": "Python 3.9.2",
"name": "http",
"product": "Werkzeug httpd",
"reason": "syn-ack",
"script": {
"vulners": "\n cpe:/a:python:python:3.9.2: \n \tCVE-2021-29921\t7.5\thttps://vulners.com/cve/CVE-2021-29921\n \tCVE-2021-23336\t4.0\thttps://vulners.com/cve/CVE-2021-23336\n \tMSF:ILITIES/DEBIAN-CVE-2021-3426/\t2.7\thttps://vulners.com/metasploit/MSF:ILITIES/DEBIAN-CVE-2021-3426/\t*EXPLOIT*\n \tCVE-2021-3426\t2.7\thttps://vulners.com/cve/CVE-2021-3426"
},
"state": "open",
"version": "1.0.1"
},
"6000": {
"conf": "10",
"cpe": "cpe:/a:python:python:3.9.2",
"extrainfo": "Python 3.9.2",
"name": "http",
"product": "Werkzeug httpd",
"reason": "syn-ack",
"script": {
"vulners": "\n cpe:/a:python:python:3.9.2: \n \tCVE-2021-29921\t7.5\thttps://vulners.com/cve/CVE-2021-29921\n \tCVE-2021-23336\t4.0\thttps://vulners.com/cve/CVE-2021-23336\n \tMSF:ILITIES/DEBIAN-CVE-2021-3426/\t2.7\thttps://vulners.com/metasploit/MSF:ILITIES/DEBIAN-CVE-2021-3426/\t*EXPLOIT*\n \tCVE-2021-3426\t2.7\thttps://vulners.com/cve/CVE-2021-3426"
},
"state": "open",
"version": "1.0.1"
}
},
"vendor": {
}
}
}
def scandata():
for ip_address in result:
ip_address_data = extract_data(
result[ip_address]
)
print(ip_address, ip_address_data)
scandata()

def extract_data(ip_address_data):
domains = ip_address_data["hostnames"]
ports_data = []
# Each port can have different products and vulners
# So that data is grouped together in a dictionary
for port in ip_address_data["tcp"].keys():
port_data = ip_address_data["tcp"][port]
product = port_data["product"]
vulners = port_data['script']['vulners']
ports_data.append({
"port": port,
"product": product,
"vulners": vulners
})
return {
"domains": domains,
"ports_data": ports_data
}
#app.route('/api/vulnerableports', methods=['GET'])
def show_vulnerableports():
data = []
resultz = []
for x in collection.find({}, {"scan": 1, "_id": 0}):
resultz.append(json.loads(json_util.dumps(x)))
for resultx in resultz:
result = resultx['scan']
for ip_address in result:
ip_address_data = extract_data(
result[ip_address]
)
data.append({ip_address: ip_address_data})
return jsonify(data)
So this is the solution! i had to iterate over script and then access vulner!

Related

Writing resilient recursive code that will return results from a big json file

I have written a recursive code. I want more experienced people to tell me how resillient and fail-safe is my code:
I have a json file (Json file can be as big as 300MB):
[
{
"modules": {
"webpages": []
},
"webpages": {
"ip_addr": {
"value": "127.0.0.1",
"tags": []
},
"http": {
"status": {
"value": "Unavailable",
"tags": []
},
"title": {
"value": "403 Forbidden",
"tags": [
{
"category": "Server Code",
"match": "403"
},
{
"category": "Interesting Words",
"match": "Forbidden"
}
]
},
"server": {
"value": "Apache",
"tags": [
{
"category": "Apache Server",
"match": "Apache"
}
]
}
},
"redirects": [],
"robottxt": null
}
},
{
"modules": {
"webpages": []
}
}
]
I want to return value keys where tags are populated.
So I want to ignore:
"status": {
"value": "Unavailable",
"tags": []
},
But I want to return the title and server values. I also want to return ip_addr.value
I have written this code:
def getAllValues(nestedDictionary, firstArray, firstObj, firstUseful):
returnedArray = firstArray
tempValue = firstObj
useful = firstUseful
for key, value in nestedDictionary.items():
ipString = nestedDictionary.get("ip_addr")
if ipString is not None:
ipValue = ipString.get("value")
useful = {"ip_add": ipValue}
if isinstance(value, dict):
temp = {
"Key": key,
"useful": useful,
}
getAllValues(value, returnedArray, temp, useful)
else:
if key == "value":
tempValue["value"] = value
if key == "tags" and isinstance(value, list) and len(value) > 0:
tempValue["tags"] = value
returnedArray.append(tempValue)
return returnedArray
The above code should return:
[
{
"Key": "title",
"value": "403 Forbidden",
"useful": { "ip_addr": "127.0.0.1" },
"tags": [
{
"category": "Server Code",
"match": "403"
},
{
"category": "Interesting Words",
"match": "Forbidden"
}
]
},
{
"Key": "server",
"value": "Apache",
"useful": { "ip_addr": "127.0.0.1" },
"tags": [
{
"category": "Apache Server",
"match": "Apache"
}
]
}
]
Its a long post, but hopefully, someone can give me some assurance :)

Spotify API: Error parsing through JSON with Python

I have had trouble appending id's to a separate list as I parse through the JSON I receive from Spotify's "Users Saved Tracks" endpoint.
The JSON received looks like this:
{
"href": "https://api.spotify.com/v1/me/tracks?offset=0&limit=20",
"items": [
{
"added_at": "2021-11-16T13:56:51Z",
"track": {
"album": {
"album_type": "single",
"artists": [
{
"external_urls": {
"spotify": "https://open.spotify.com/artist/3iKDeO8yaOiWz7vkeljunk"
},
"href": "https://api.spotify.com/v1/artists/3iKDeO8yaOiWz7vkeljunk",
"id": "3iKDeO8yaOiWz7vkeljunk",
"name": "Heavenward",
"type": "artist",
"uri": "spotify:artist:3iKDeO8yaOiWz7vkeljunk"
}
],
"available_markets": [
],
"disc_number": 1,
"duration_ms": 224838,
"explicit": false,
"external_ids": {
"isrc": "QZK6P2040977"
},
"external_urls": {
"spotify": "https://open.spotify.com/track/6mJ1nbmQOm6iNClo71K5O6"
},
"href": "https://api.spotify.com/v1/tracks/6mJ1nbmQOm6iNClo71K5O6",
"id": "6mJ1nbmQOm6iNClo71K5O6",
"is_local": false,
"name": "Hole",
"popularity": 33,
"preview_url": "https://p.scdn.co/mp3-preview/c425dc91bdb19f1cddf2b35df08e30a03290c3c0?cid=8c9ee97b95854163a250399fda32d350",
"track_number": 1,
"type": "track",
"uri": "spotify:track:6mJ1nbmQOm6iNClo71K5O6"
}
}
Right now my code that I am using to parse looks like this:
def getLikedTrackIds(session):
url = 'https://api.spotify.com/v1/me/tracks'
payload = makeGetRequest(session, url)
if payload == None:
return None
liked_tracks_ids = []
for track in payload['items']:
for attribute in track['track']:
if (attribute == 'id'):
app.logger.info(f"\n\nTrack ID: {attribute}")
liked_tracks_ids.append(attribute)
return liked_tracks_ids
My liked_track_ids is filled with the string "id", for each song:
[ "id", "id", "id", "id"....]
Can anyone provide insight as to what I am doing wrong?

Already commented under the question but your code can be simplified by getting rid of the loop:
def getLikedTrackIds(session):
url = 'https://api.spotify.com/v1/me/tracks'
payload = makeGetRequest(session, url)
if payload == None:
return None
liked_tracks_ids = []
for track in payload['items']:
liked_id = track['track'].get('id', None)
if liked_id:
app.logger.info(f"\n\nTrack ID: {liked_id}")
liked_tracks_ids.append(liked_id)
return liked_tracks_ids

JSON dump appending random characters to end of file

I am writing a parser that goes through a list of data that is roughly formatted:
{
"teachers": [
{
"fullName": "Testing",
"class": [
{
"className": "Counselor",
"school": {
"id": "2b6671cb-617d-48d6-b0b5-3d44ce4da21c"
}
}
]
},
...
}
The parser is supposed to check for duplicate names within this json object, and when it stumbles upon said duplicate name, append the class to the class array.
So for example:
{
"teachers": [
{
"fullName": "Testing",
"class": [
{
"className": "Counselor",
"school": {
"id": "2b6671cb-617d-48d6-b0b5-3d44ce4da21c"
}
}
]
},
{
"fullName": "Testing",
"class": [
{
"className": "Math 8",
"school": {
"id": "2b6671cb-617d-48d6-b0b5-3d44ce4da21c"
}
}
]
},
...
}
Would return
{
"teachers": [
{
"fullName": "Testing",
"class": [
{
"className": "Counselor",
"school": {
"id": "2b6671cb-617d-48d6-b0b5-3d44ce4da21c"
}
},
{
"className": "Math 8",
"school": {
"id": "2b6671cb-617d-48d6-b0b5-3d44ce4da21c"
}
},
]
},
...
}
My current parser works just fine for most objects, however for some reason it doesn't catch some of the duplicates despite the names being the exact same, and also is appending the string
}7d-48d6-b0b5-3d44ce4da21c"
}
}
]
}
]
to the end of the json document. I am not sure why it would do this considering I am just dumping the modified json (which only is modified within the array).
My parser code is:
i_duplicates = []
name_duplicates = []
def converter():
global i_duplicates
file = open("final2.json", "r+")
infinite = json.load(file)
for i, teacher in enumerate(infinite["teachers"]):
class_name = teacher["class"][0]["className"]
class_data = {
"className": class_name,
"school": {
"id": "2b6671cb-617d-48d6-b0b5-3d44ce4da21c"
}
}
d = {
"fullName": teacher["fullName"],
"index": i
}
c = {
"fullName": teacher["fullName"]
}
if c in name_duplicates:
infinite["teachers"][search(i_duplicates, c["fullName"])]["class"].append(class_data)
infinite["teachers"].pop(i)
file.seek(0)
json.dump(infinite, file, indent=4)
else:
i_duplicates.append(d)
name_duplicates.append(c)
def search(a, t):
for i in a:
if i["fullName"] == t:
return i["index"]
print(Fore.RED + "not found" + Fore.RESET)
I know I am going about this inefficiently, but I am not sure how to fix the issues the current algorithm is having. Any feedback appreciated.

Problem with extrakting Data out of a List of JSONS

I want to evaluate json data using python. However, I get the error message: string indices must be integers
I have tested it with the following code, but it does not work. I tried it before with elasticsearch but im not that good in programming so i decided to try it with a homebrew solution
(sorry for the bad formating, i am new to stackoverflow)
```
import requests, json, os
from elasticsearch import Elasticsearch
directory = "C:\\Users\\Felix Bildstein\\Desktop\\Test1"
Dateien = os.listdir(directory)
index_len = len(Dateien) - 2
n = 1
# Create your dictionary class
class my_dictionary(dict):
# __init__ function
def __init__(self):
self = dict()
# Function to add key:value
def add(self, key, value):
self[key] = value
# Main Function
dict_obj = my_dictionary()
dict_obj.add(1, 'Geeks')
while n <= index_len:
try:
a, *Dateien = Dateien
n += 1
f = open(a, "r")
file_contents = (f.read())
f.close
dict_obj.add(n, file_contents)
finally:
print(n)
print(file_contents['unitPrice'])
output = dict_obj
print(output)
with open('result.json', 'w') as fp:
json.dump(output, fp)
f = open("dict.txt","w")
f.write( str(dict_obj) )
f.close()
```
It should spend the appropriate value
This is my Test Json
{
"merchantOrderId": "302-08423880-89823764",
"creationTime": {
"usecSinceEpochUtc": "15555040944000000",
"granularity": "MICROSECOND"
},
"transactionMerchant": {
"name": "Amazon.de"
},
"lineItem": [{
"purchase": {
"status": "ACCEPTED",
"unitPrice": {
"amountMicros": "4690000",
"currencyCode": {
"code": "EUR"
},
"displayString": "EUR 4,20"
},
"returnsInfo": {
"isReturnable": true,
"daysToReturn": 30
},
"fulfillment": {
"location": {
"address": [""]
},
"timeWindow": {
"startTime": {
"usecSinceEpochUtc": "155615040000000",
"granularity": "DAY"
},
"endTime": {
"usecSinceEpochUtc": "155615040000000",
"granularity": "DAY"
}
}
},
"landingPageUrl": {
"link": "https://www.amazon.de/gp/r.html?C\u003d3ILR4VQSVD3HI\u0026K\u0026M\u003durn:rtn:msg:20190417124222996c5bb6751e45b5ba12aff8d350p0eu\u0026R\u003d2FEGGCJMDBAOF\u0026T\u003dC\u0026U\u003dhttps%3A%2F%2Fwww.amazon.de%2Fdp%2FB001J8I7VG%2Fref%3Dpe_3044161_185740101_TE_item\u0026H\u003d6EXBPJA679MVNLICLRRO4K1XPFCA\u0026ref_\u003dpe_3044161_185740101_TE_item"
},
"productInfo": {
"name": "tesa Powerstrips Bildernagel, selbstklebend, weiß, 2 Stück"
}
},
"name": "tesa Powerstrips Bildernagel, selbstklebend, weiß, 2 Stück"
}],
"priceline": [{
"type": "SUBTOTAL",
"amount": {
"amountMicros": "4690000",
"currencyCode": {
"code": "EUR"
}
}
}, {
"type": "DELIVERY",
"amount": {
"amountMicros": "0",
"currencyCode": {
"code": "EUR"
},
"displayString": "EUR 0,00"
}
}]
}

Json comparison with different link among objects in python

I'm trying to compare 2 json that show a different order in the ids value, even thouh they are concettually the same, as you can see from the example.
For instance, each person object points to an Address object based on its id. The ids could be different but, concettually, the 2 json are the same. Here an example: The order of the address_id is different (as the address object position), but the 2 json are the same, (John lives in NYC and Pippo in BCN).
{
"Persons": [
{
"Name": "John",
"Address_Id": "0"
},
{
"Name": "Pippo",
"Address_Id": "1"
}
],
"Addresses": [
{
"City": "NYC"
},
{
"City": "BCN"
}
]
}
{
"Persons": [
{
"Name": "John",
"Address_Id": "1"
},
{
"Name": "Pippo",
"Address_Id": "0"
}
],
"Addresses": [
{
"City": "BCN"
},
{
"City": "NYC"
}
]
}
Is there a way to compare the 2 json considering this "special case" ??
Thanks.

You can deserialise the json yourself by creating the equivalent Python classes and implementing the comparison yourself
. For example:
import json
class Person_book:
def __init__(self, person_list):
self.person_list = person_list
def __eq__(self, other):
if len(self.person_list) != len(other.person_list):
return False
for person in self.person_list:
if person not in other.person_list:
return False
return True
class Person:
def __init__(self, person_id, address):
self.person_id = person_id
self.address = address
def __str__(self):
return str(self.person_id) + ' ' + str(self.address )
def __eq__(self, other):
if self.person_id == other.person_id and self.address == other.address:
return True
return False
def deserialize_persons(persons_array):
persons = persons_array['Persons']
addresses = persons_array['Addresses']
person_list = []
for person in persons:
person_entry = Person(person['Name'], addresses[int(person['Address_Id'])])
person_list.append(person_entry)
return Person_book(person_list)
json_resp_1 = """{
"Persons": [
{
"Name": "John",
"Address_Id": "0"
},
{
"Name": "Pippo",
"Address_Id": "1"
}
],
"Addresses": [
{
"City": "NYC"
},
{
"City": "BCN"
}
]
}"""
json_resp_2 = """
{
"Persons": [
{
"Name": "John",
"Address_Id": "1"
},
{
"Name": "Pippo",
"Address_Id": "0"
}
],
"Addresses": [
{
"City": "BCN"
},
{
"City": "NYC"
}
]
}"""
resp_1 = json.loads(json_resp_1)
resp_2 = json.loads(json_resp_2)
person_book_1 = deserialize_persons(resp_1)
person_book_2 = deserialize_persons(resp_2)
print(person_book_1 == person_book_2)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to extract specific nested JSON value doing loop? (Python) - python

Related

Writing resilient recursive code that will return results from a big json file

Spotify API: Error parsing through JSON with Python

JSON dump appending random characters to end of file

Problem with extrakting Data out of a List of JSONS

Json comparison with different link among objects in python

Categories

Resources