Problem with extrakting Data out of a List of JSONS

Problem with extrakting Data out of a List of JSONS - python

I want to evaluate json data using python. However, I get the error message: string indices must be integers
I have tested it with the following code, but it does not work. I tried it before with elasticsearch but im not that good in programming so i decided to try it with a homebrew solution
(sorry for the bad formating, i am new to stackoverflow)
```
import requests, json, os
from elasticsearch import Elasticsearch
directory = "C:\\Users\\Felix Bildstein\\Desktop\\Test1"
Dateien = os.listdir(directory)
index_len = len(Dateien) - 2
n = 1
# Create your dictionary class
class my_dictionary(dict):
# __init__ function
def __init__(self):
self = dict()
# Function to add key:value
def add(self, key, value):
self[key] = value
# Main Function
dict_obj = my_dictionary()
dict_obj.add(1, 'Geeks')
while n <= index_len:
try:
a, *Dateien = Dateien
n += 1
f = open(a, "r")
file_contents = (f.read())
f.close
dict_obj.add(n, file_contents)
finally:
print(n)
print(file_contents['unitPrice'])
output = dict_obj
print(output)
with open('result.json', 'w') as fp:
json.dump(output, fp)
f = open("dict.txt","w")
f.write( str(dict_obj) )
f.close()
```
It should spend the appropriate value
This is my Test Json
{
"merchantOrderId": "302-08423880-89823764",
"creationTime": {
"usecSinceEpochUtc": "15555040944000000",
"granularity": "MICROSECOND"
},
"transactionMerchant": {
"name": "Amazon.de"
},
"lineItem": [{
"purchase": {
"status": "ACCEPTED",
"unitPrice": {
"amountMicros": "4690000",
"currencyCode": {
"code": "EUR"
},
"displayString": "EUR 4,20"
},
"returnsInfo": {
"isReturnable": true,
"daysToReturn": 30
},
"fulfillment": {
"location": {
"address": [""]
},
"timeWindow": {
"startTime": {
"usecSinceEpochUtc": "155615040000000",
"granularity": "DAY"
},
"endTime": {
"usecSinceEpochUtc": "155615040000000",
"granularity": "DAY"
}
}
},
"landingPageUrl": {
"link": "https://www.amazon.de/gp/r.html?C\u003d3ILR4VQSVD3HI\u0026K\u0026M\u003durn:rtn:msg:20190417124222996c5bb6751e45b5ba12aff8d350p0eu\u0026R\u003d2FEGGCJMDBAOF\u0026T\u003dC\u0026U\u003dhttps%3A%2F%2Fwww.amazon.de%2Fdp%2FB001J8I7VG%2Fref%3Dpe_3044161_185740101_TE_item\u0026H\u003d6EXBPJA679MVNLICLRRO4K1XPFCA\u0026ref_\u003dpe_3044161_185740101_TE_item"
},
"productInfo": {
"name": "tesa Powerstrips Bildernagel, selbstklebend, weiß, 2 Stück"
}
},
"name": "tesa Powerstrips Bildernagel, selbstklebend, weiß, 2 Stück"
}],
"priceline": [{
"type": "SUBTOTAL",
"amount": {
"amountMicros": "4690000",
"currencyCode": {
"code": "EUR"
}
}
}, {
"type": "DELIVERY",
"amount": {
"amountMicros": "0",
"currencyCode": {
"code": "EUR"
},
"displayString": "EUR 0,00"
}
}]
}

Related

Transforming streaming data into a json

I have a streaming data coming from a source and I was able to capture few important variables like ID, Timestamp and Vital signs. What I am trying to do is create a json file object and unload it into a file system. This is one of the examples:
for k in streaming:
id = k[1] ----------> 1
timestamp = k[2] ------> 1652304692
vsigns = k[3] -------> Nested dictionary
This is how vsigns looks like
"vsigns":
{
"ECG":
{
"VPB_FREQ": "0",
"STI": "+0.00"
},
"HR":
{
"HR_EKG": "87",
"HR_PPG_1": "87",
"HR_PULSED": "87"
},
"NIBP":
{
"NIBPS": "119",
"NIBPD": "88",
"NIBPM": "95"
}
}
And I want a json structure in the following format:
[{
"id": "1",
"ECG":
{
"timestamp": 1652304692,
"VPB_FREQ": "0",
"STI": "+0.00",
}
},
{
"id": "1",
"HR":
{
"timestamp": 1652304692,
"HR_EKG": "87",
"HR_PPG_1": "87",
"HR_PULSED": "87"
}
},
{
"id": "1",
"NIBP":
{
"timestamp": 1652304692,
"NIBPS": "119",
"NIBPD": "88",
"NIBPM": "95"
},
}]
I tried an approach but doesn't give me what I want. How do I get this right.
for k in streaming:
id = k[1]
timestamp = k[2]
vsigns = k[3]
vlist = []
for k, v in vsigns.items():
vdict = {"id": id,
k:{"timestamp":timestamp,
"vsigns": v}}
vlist.append(vdict)
print(vlist)
Output:
[{
"id": "1",
"ECG":
{
"timestamp": 1652951054.0,
"vsigns":
{
"VPB_FREQ": "0"
}
}
},
{
"id": "1",
"HR":
{
"timestamp": 1652951054.0,
"vsigns":
{
"HR_EKG": "126",
"HR_PPG_1": "127",
"HR_PULSED": "127"
}
}
},
{
"id": "1",
"NIBP":
{
"timestamp": 1652951054.0,
"vsigns":
{
"NIBPS": "95",
"NIBPD": "46",
"NIBPM": "62"
}
}
}
}]

The following piece of code worked for me:
for k in streaming:
id = k[1]
timestamp = k[2]
vsigns = k[3]
vlist = []
for k, v in vsigns.items():
v['timestamp'] = epoch_time
vdict = {"id": id,
k:v}
vlist.append(vdict)

How to extract specific nested JSON value doing loop? (Python)

{
"127.0.0.1":{
"addresses":{
"ipv4":"127.0.0.1"
},
"hostnames":[
{
"name":"localhost",
"type":"PTR"
}
],
"status":{
"reason":"conn-refused",
"state":"up"
},
"tcp":{
"5000":{
"conf":"10",
"cpe":"cpe:/a:python:python:3.9.2",
"extrainfo":"Python 3.9.2",
"name":"http",
"product":"Werkzeug httpd",
"reason":"syn-ack",
"script":{
"vulners":"\n cpe:/a:python:python:3.9.2: \n \tCVE-2021-29921\t7.5\thttps://vulners.com/cve/CVE-2021-29921\n \tCVE-2021-23336\t4.0\thttps://vulners.com/cve/CVE-2021-23336\n \tMSF:ILITIES/DEBIAN-CVE-2021-3426/\t2.7\thttps://vulners.com/metasploit/MSF:ILITIES/DEBIAN-CVE-2021-3426/\t*EXPLOIT*\n \tCVE-2021-3426\t2.7\thttps://vulners.com/cve/CVE-2021-3426"
},
"state":"open",
"version":"1.0.1"
},
"6000":{
"conf":"10",
"cpe":"cpe:/a:python:python:3.9.2",
"extrainfo":"Python 3.9.2",
"name":"http",
"product":"Werkzeug httpd",
"reason":"syn-ack",
"script":{
"vulners":"\n cpe:/a:python:python:3.9.2: \n \tCVE-2021-29921\t7.5\thttps://vulners.com/cve/CVE-2021-29921\n \tCVE-2021-23336\t4.0\thttps://vulners.com/cve/CVE-2021-23336\n \tMSF:ILITIES/DEBIAN-CVE-2021-3426/\t2.7\thttps://vulners.com/metasploit/MSF:ILITIES/DEBIAN-CVE-2021-3426/\t*EXPLOIT*\n \tCVE-2021-3426\t2.7\thttps://vulners.com/cve/CVE-2021-3426"
},
"state":"open",
"version":"1.0.1"
}
},
"vendor":{
}
}
}
I want to extract "vulners" value here i tried this -
results = []
for x in collection.find({},{"scan": 1, "_id": 0 }):
results.append(json.loads(json_util.dumps(x)))
portnumber = []
datay = []
datapro = []
for result in results:
ips = result['scan']
for ip in ips:
ports = result['scan'][ip]['tcp']
ipdomain = result['scan'][ip]['hostnames']
for ip2 in ipdomain:
ip3 = ip2['name']
for port in ports:
portnumber.append(port)
datax = ports[port]['script']
datay.append(datax)
datapro2 = ports[port]['product']
datapro.append(datapro2)
date = datetime.datetime.now()
date_now = date.strftime("%x, %X")
pass_json_var = {'domain': ip3, 'ports': portnumber, 'product': datapro, 'vulnerabilities': datay, "date": date_now}
if isinstance(pass_json_var, list):
domaindata.insert_many(pass_json_var)
else:
domaindata.insert_one(pass_json_var)
Ok so here if the "results" output gives me one "vulners" value then it works fine but when it's multiple ports with vulners values it doesn't work!
How can i access the 'vulners' value? Hoping for someone to guide me also a bit, Please try to give a solution which is dynamic
Thanks a lot!

Model based approach
this approach is based on a model of your data you want to parse. From my point of view this is more work in the beginning. With the advantage, that you will have clean error messages and you can control the behaviour by adapting your data model.
make a model of the data you want to parse
from typing import Any, Optional
from pydantic import BaseModel, Field
class ExScript(BaseModel):
vulners:str = ""
class Ex30000(BaseModel):
script:ExScript = Field(default=Any)
class ExTcp(BaseModel):
root:Ex30000= Field(default=Any, alias="30000")
class ExRoot(BaseModel):
tcp:ExTcp = Field() # Required
class Base(BaseModel):
root:ExRoot = Field(default=Any, alias="35.0.0.0.0")
change your input data to a raw string outherwise you will have to escape \n and \t
input_will_work = r"""{
"35.0.0.0.0": {
"hostnames": [
{
"name": "domain.com",
"type": "PTR"
}
],
"addresses": {
"ipv4": "35.0.0.0"
},
"vendor": {},
"status": {
"state": "up",
"reason": "syn-ack"
},
"tcp": {
"30000": {
"state": "open",
"reason": "syn-ack",
"name": "http",
"product": "nginx",
"version": "1.20.0",
"extrainfo": "",
"conf": "10",
"cpe": "cpe:/a:igor_sysoev:nginx:1.20.0",
"script": {
"http-server-header": "nginx/1.20.0",
"vulners": "\n cpe:/a:igor_sysoev:nginx:1.20.0: \n \tNGINX:CVE-2021-23017\t6.8\thttps://vulners.com/nginx/NGINX:CVE-2021-23017\n \t9A14990B-D52A-56B6-966C-6F35C8B8EB9D\t6.8\thttps://vulners.com/githubexploit/9A14990B-D52A-56B6-966C-6F35C8B8EB9D\t*EXPLOIT*\n \t1337DAY-ID-36300\t6.8\thttps://vulners.com/zdt/1337DAY-ID-36300\t*EXPLOIT*\n \tPACKETSTORM:162830\t0.0\thttps://vulners.com/packetstorm/PACKETSTORM:162830\t*EXPLOIT*"
}
}
}
}
}
"""
input_will_fail = r"""{
"35.0.0.0.0": {}
}
"""
3.1 this should give you the expected result
obj1 = Base.parse_raw(input_will_work)
print(obj1.root.tcp.root.script.vulners)
3.2 this should throw an exception
obj2 = Base.parse_raw(input_will_fail)
Search data with jsonpath
should return all objects with the name vulners
from jsonpath_ng import jsonpath, parse
import json
obj = json.loads(input_will_work)
p = parse('$..vulners')
for match in p.find(obj):
print(match.value)

Update:
def extract_data(ip_address_data):
domains = ip_address_data["hostnames"]
ports_data = []
# Each port can have different products and vulners
# So that data is grouped together in a dictionary
for port in ip_address_data["tcp"].keys():
port_data = ip_address_data["tcp"][port]
product = port_data["product"]
vulners = port_data['script']['vulners']
ports_data.append({
"port": port,
"product": product,
"vulners": vulners
})
return {
"domains": domains,
"ports_data": ports_data
}
# Result is the data from mongo db
# result = collection.find({})["scan"]
result = {
"127.0.0.1": {
"addresses": {
"ipv4": "127.0.0.1"
},
"hostnames": [
{
"name": "localhost",
"type": "PTR"
}
],
"status": {
"reason": "conn-refused",
"state": "up"
},
"tcp": {
"5000": {
"conf": "10",
"cpe": "cpe:/a:python:python:3.9.2",
"extrainfo": "Python 3.9.2",
"name": "http",
"product": "Werkzeug httpd",
"reason": "syn-ack",
"script": {
"vulners": "\n cpe:/a:python:python:3.9.2: \n \tCVE-2021-29921\t7.5\thttps://vulners.com/cve/CVE-2021-29921\n \tCVE-2021-23336\t4.0\thttps://vulners.com/cve/CVE-2021-23336\n \tMSF:ILITIES/DEBIAN-CVE-2021-3426/\t2.7\thttps://vulners.com/metasploit/MSF:ILITIES/DEBIAN-CVE-2021-3426/\t*EXPLOIT*\n \tCVE-2021-3426\t2.7\thttps://vulners.com/cve/CVE-2021-3426"
},
"state": "open",
"version": "1.0.1"
},
"6000": {
"conf": "10",
"cpe": "cpe:/a:python:python:3.9.2",
"extrainfo": "Python 3.9.2",
"name": "http",
"product": "Werkzeug httpd",
"reason": "syn-ack",
"script": {
"vulners": "\n cpe:/a:python:python:3.9.2: \n \tCVE-2021-29921\t7.5\thttps://vulners.com/cve/CVE-2021-29921\n \tCVE-2021-23336\t4.0\thttps://vulners.com/cve/CVE-2021-23336\n \tMSF:ILITIES/DEBIAN-CVE-2021-3426/\t2.7\thttps://vulners.com/metasploit/MSF:ILITIES/DEBIAN-CVE-2021-3426/\t*EXPLOIT*\n \tCVE-2021-3426\t2.7\thttps://vulners.com/cve/CVE-2021-3426"
},
"state": "open",
"version": "1.0.1"
}
},
"vendor": {
}
}
}
def scandata():
for ip_address in result:
ip_address_data = extract_data(
result[ip_address]
)
print(ip_address, ip_address_data)
scandata()

def extract_data(ip_address_data):
domains = ip_address_data["hostnames"]
ports_data = []
# Each port can have different products and vulners
# So that data is grouped together in a dictionary
for port in ip_address_data["tcp"].keys():
port_data = ip_address_data["tcp"][port]
product = port_data["product"]
vulners = port_data['script']['vulners']
ports_data.append({
"port": port,
"product": product,
"vulners": vulners
})
return {
"domains": domains,
"ports_data": ports_data
}
#app.route('/api/vulnerableports', methods=['GET'])
def show_vulnerableports():
data = []
resultz = []
for x in collection.find({}, {"scan": 1, "_id": 0}):
resultz.append(json.loads(json_util.dumps(x)))
for resultx in resultz:
result = resultx['scan']
for ip_address in result:
ip_address_data = extract_data(
result[ip_address]
)
data.append({ip_address: ip_address_data})
return jsonify(data)
So this is the solution! i had to iterate over script and then access vulner!

JSON dump appending random characters to end of file

I am writing a parser that goes through a list of data that is roughly formatted:
{
"teachers": [
{
"fullName": "Testing",
"class": [
{
"className": "Counselor",
"school": {
"id": "2b6671cb-617d-48d6-b0b5-3d44ce4da21c"
}
}
]
},
...
}
The parser is supposed to check for duplicate names within this json object, and when it stumbles upon said duplicate name, append the class to the class array.
So for example:
{
"teachers": [
{
"fullName": "Testing",
"class": [
{
"className": "Counselor",
"school": {
"id": "2b6671cb-617d-48d6-b0b5-3d44ce4da21c"
}
}
]
},
{
"fullName": "Testing",
"class": [
{
"className": "Math 8",
"school": {
"id": "2b6671cb-617d-48d6-b0b5-3d44ce4da21c"
}
}
]
},
...
}
Would return
{
"teachers": [
{
"fullName": "Testing",
"class": [
{
"className": "Counselor",
"school": {
"id": "2b6671cb-617d-48d6-b0b5-3d44ce4da21c"
}
},
{
"className": "Math 8",
"school": {
"id": "2b6671cb-617d-48d6-b0b5-3d44ce4da21c"
}
},
]
},
...
}
My current parser works just fine for most objects, however for some reason it doesn't catch some of the duplicates despite the names being the exact same, and also is appending the string
}7d-48d6-b0b5-3d44ce4da21c"
}
}
]
}
]
to the end of the json document. I am not sure why it would do this considering I am just dumping the modified json (which only is modified within the array).
My parser code is:
i_duplicates = []
name_duplicates = []
def converter():
global i_duplicates
file = open("final2.json", "r+")
infinite = json.load(file)
for i, teacher in enumerate(infinite["teachers"]):
class_name = teacher["class"][0]["className"]
class_data = {
"className": class_name,
"school": {
"id": "2b6671cb-617d-48d6-b0b5-3d44ce4da21c"
}
}
d = {
"fullName": teacher["fullName"],
"index": i
}
c = {
"fullName": teacher["fullName"]
}
if c in name_duplicates:
infinite["teachers"][search(i_duplicates, c["fullName"])]["class"].append(class_data)
infinite["teachers"].pop(i)
file.seek(0)
json.dump(infinite, file, indent=4)
else:
i_duplicates.append(d)
name_duplicates.append(c)
def search(a, t):
for i in a:
if i["fullName"] == t:
return i["index"]
print(Fore.RED + "not found" + Fore.RESET)
I know I am going about this inefficiently, but I am not sure how to fix the issues the current algorithm is having. Any feedback appreciated.

Parsing nested JSON objects with Python

I have the following JSON data and would like to extract email value using Python:
{
"_links": {
"self": {
"href": "https://example.com/comments/9"
}
},
"_embedded": {
"customer": {
"name": "Jamie XXXX",
"email": "jamie#example.tv",
"thumbnail": {
"small": "https://secure.gravatar.com/avatar/dfd.png?d=blank&r=PG&s=100",
"medium": "https://secure.gravatar.com/avatar/dfd.png?d=blank&r=PG&s=200",
"large": "https://secure.gravatar.com/avatar/dfdfd.png?d=blank&r=PG&s=300"
}
},
"comments": []
},
"id": 9,
"video_id": null,
"content": "j/k I meant that as a reply",
"comments_count": 0,
"created_at": "2014-03-12T17:46:07Z",
"updated_at": "2014-03-12T17:46:07Z"
}
I tried something like but it's not working:
jsonresp = r.json()
for k, v in jsonresp:
print(jsonresp['_embedded']['customer']['email'])

jsonresp = r.json()
print(jsonresp['_embedded']['customer']['email'])

Using pyjq:
import json
import pyjq
with open("input.json", "r") as myfile:
data=json.load(myfile)
print pyjq.first('._embedded.customer.email', data);

manipulating json in python using recursion

All,
I am trying to change the way some json looks by going through and formatting it in the following way:
1. flatten all of the fields lists
2. Then remove the fields lists and replace them with the name : flatten list
Example:
{
"name": "",
"fields": [{
"name": "keys",
"fields": [{
"node-name": "0/0/CPU0"
},
{
"interface-name": "TenGigE0/0/0/47"
},
{
"device-id": "ASR9K-H1902.corp.cisco.com"
}
]
},
{
"name": "content",
"fields": [{
"name": "lldp-neighbor",
"fields": [{
"receiving-interface-name": "TenGigE0/0/0/47"
},
{
"receiving-parent-interface-name": "Bundle-Ether403"
},
{
"device-id": "ASR9K-H1902.corp.cisco.com"
},
{
"chassis-id": "78ba.f975.a64f"
},
{
"port-id-detail": "Te0/1/0/4/0"
},
{
"header-version": 0
},
{
"hold-time": 120
},
{
"enabled-capabilities": "R"
},
{
"platform": ""
}
]
}]
}
]
}
Would turn into:
{
"": [{
"keys": [{
"node-name": "0/0/CPU0",
"interface-name": "TenGigE0/0/0/47",
"device-id": "ASR9K-H1902.corp.cisco.com"
}]
},
{
"content": [{
"lldp-neighbor": [{
"receiving-interface-name": "TenGigE0/0/0/47",
"receiving-parent-interface-name": "Bundle-Ether403",
"device-id": "ASR9K-H1902.corp.cisco.com",
"chassis-id": "78ba.f975.a64f",
"port-id-detail": "Te0/1/0/4/0",
"header-version": 0,
"hold-time": 120,
"enabled-capabilities": "R",
"platform": ""
}]
}]
}
]
}
I have tried the following to get the list flattened:
def _flatten_fields(self, fields_list):
c = {}
for b in [d for d in fields_list if bool(d)]:
c.update(b)
return c
This seems to work but I can't figure out a way to get into the sub levels using recursion, I am saving all flatten lists and names into a new dictionary, is there a way to do it by just manipulating the original dictionary?

This worked on the example you provided:
import json
def flatten(data):
result = dict()
if isinstance(data, dict):
if 'name' in data:
name = data['name']
result[name] = flatten(data['fields'])
else:
key = data.keys()[0]
value = data.values()[0]
result[key] = value
else:
for entry in data:
result.update(flatten(entry))
return result
print json.dumps(flatten(data), indent=4)
Output
{
"": {
"keys": {
"node-name": "0/0/CPU0",
"interface-name": "TenGigE0/0/0/47",
"device-id": "ASR9K-H1902.corp.cisco.com"
},
"content": {
"lldp-neighbor": {
"receiving-interface-name": "TenGigE0/0/0/47",
"receiving-parent-interface-name": "Bundle-Ether403",
"header-version": 0,
"port-id-detail": "Te0/1/0/4/0",
"chassis-id": "78ba.f975.a64f",
"platform": "",
"device-id": "ASR9K-H1902.corp.cisco.com",
"hold-time": 120,
"enabled-capabilities": "R"
}
}
}
}
It doesn't have the extra list layers shown in your expected output, but I don't think you want those.

This worked on the example you provided:
def flatten_fields(fields_list):
c = {}
for item in fields_list:
for key in item:
if key == "fields":
c[item["name"]] = flatten_fields(item["fields"])
elif key != "name":
c[key] = item[key]
break
return [c]
But it works on a list of dictionaries, so you should call it like flatten_fields([data])[0].
The output is:
{
"": [{
"keys": [{
"node-name": "0/0/CP0",
"interface-name": "TenGigE0/0/0/47",
"device-id": "ASR9K-H1902.corp.cisco.com"
}],
"content": [{
"lldp-neighbor": [{
"chassis-id": "78ba.f975.a64f",
"receiving-parent-interface-name": "Bndle-Ether403",
"enabled-capabilities": "R",
"device-id": "ASR9K-H1902.corp.cisco.com",
"hold-time": 120,
"receiving-interface-name": "TenGigE0/0/0/47",
"platform": "",
"header-version": 0,
"port-id-detail": "Te0/1/0/4/0"
}]
}]
}]
}

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Problem with extrakting Data out of a List of JSONS - python

Related

Transforming streaming data into a json

How to extract specific nested JSON value doing loop? (Python)

JSON dump appending random characters to end of file

Parsing nested JSON objects with Python

manipulating json in python using recursion

Categories

Resources