Retrieve data from web site after 2 POST queries - python

I am trying to scrape this site to get the list of offers.
The problem is that we need to fill 2 forms (2 POST queries) before receiving the final result.
This is what I have done so far:
import requests as rs
from form_data import form_data1, form_data2
base_url = "https://compare.energy.vic.gov.au/api"
with rs.Session() as s:
url_ = f"{base_url}/get-psb-details?serverCacheId=null"
r = (s.get(url_))
serverCacheId = r.json()["serverCacheId"]
r = s.post(f"{base_url}/save-form-data", data=form_data1)
r = s.post(f"{base_url}/save-form-data", data=form_data2)
Then I am trying to retrieve the offers after the second POST query:
url_ = "https://compare.energy.vic.gov.au/api/get-offers"
body = {"serverCacheId": str(serverCacheId),
"loopBack": "false",
"selectedEnergy": "/offer"}
r = s.get(url_, params=body)
print(r.json())
But unfortunately I get a message indicating a redirection:
{'status': 'redirect', 'message': 'no data'}
The 2 POSTs use the current data:
form_data1 = {
"showSolarSelection": "true",
"energyType": "Electricity",
"userType": "Residential",
"bill": "no bill",
"postcode": "3000",
"usageProfile": "0",
"averageDailyConsumption": "0",
"skipNMI": "true",
"smartMeter": "1",
"disclaimer": "true",
"hasSolar": "0",
"hasConcession": "0",
"distributor": {
"id": "4",
"name": "Citipower",
"display": "Citipower",
"phone": "1300 301 101 / 13 12 80",
"distribution_zone_id": "11",
"distribution_zone_name": "All"
},
"distributorDerived": "0",
"distributorSubmit": "true",
"pageDataType": "energyConfigData",
"loopBack": "true"
}
and
form_data2 = {
"pvCapacity": "0", "pvCapacityCap": "null", "hhSize": "1", "totalRooms": "1", "fridgeCount": "0",
"gasConnection": "4", "poolHeating": "0", "poolHeatingSolar": "false", "poolHeatingGas": "false",
"poolHeatingElectric": "false", "poolHeatingNone": "false", "spaceHeatingElectricDucted": "false",
"spaceHeatingSplitSystem": "false", "spaceHeatingElectricUnderfloor": "false",
"spaceHeatingElectricIndividual": "false", "spaceHeatingGasDucted": "false",
"spaceHeatingGasUnderfloor": "false", "spaceHeatingGasIndividual": "false", "spaceHeatingOther": "false",
"spaceHeatingNone": "true", "spaceCoolingRoomAC": "false", "spaceCoolingSplitSystem": "false",
"spaceCoolingDuctedReverse": "false", "spaceCoolingDuctedEvaporative": "false",
"spaceCoolingPortableRef": "false", "spaceCoolingPortableEvap": "false", "spaceCoolingOther": "false",
"spaceCoolingNone": "true", "seaDistance": "", "clothesDryer": "0", "clothesDryerWeekday": "",
"clothesDryerWeekend": "", "dishwasherWeekday": "", "dishwasherWeekend": "",
"waterHeatingElectric": "false", "waterHeatingElectricSolar": "false", "waterHeatingGasStorage": "false",
"waterHeatingGasInstant": "false", "waterHeatingGasSolar": "false", "waterHeatingOther": "true",
"controlledLoad": "", "tvTotal": "", "turnOffAtPowerShort": "", "ovensElectric": "", "ovensGas": "",
"washingMachineUsage": "", "washingMachineWeekday": "", "washingMachineWeekend": "",
"televisionUsageWeekday": "", "televisionUsageWeekend": "", "heatingUsageMethod": "",
"gasUsageWinter": "0", "hhSize51": "", "energyType": "Electricity", "hasSolar": "0",
"pageDataType": "energyProfileData", "loopBack": "false"
}
Expected result
The expected result is a JSON object containing offers. Here is its structure:
{
"selectedEnergyType": "Electricity",
"energyTypeCount": 1,
"offers": {
"Electricity": {
"offersList": [{...}]
}
}
}

The site has some requirements and restrictions on the form data.
form_data1:
Add required fields "solarCapacity" and "feedInTariff".
"hasSolar": "0",
"solarCapacity": "", # Add this
"hasConcession": "0",
"feedInTariff": "", # Add this
Change "loopBack": "true" to "loopBack": false.
# "loopBack": "true"
"loopBack": False
Set "serverCacheId" and change data= to json=.
# r = s.post(f"{base_url}/save-form-data", data=form_data1)
r = s.post(f"{base_url}/save-form-data", json=dict(form_data1, serverCacheId=str(serverCacheId)))
form_data2:
Set "serverCacheId" and change data= to json=.
# r = s.post(f"{base_url}/save-form-data", data=form_data2)
r = s.post(f"{base_url}/save-form-data", json=dict(form_data2, serverCacheId=str(serverCacheId)))
(Optional, for consistency) Change "loopBack": "false" to "loopBack": false.
# "loopBack": "false"
"loopBack": False
The combined code:
import requests as rs
form_data1 = {
"showSolarSelection": "true",
"energyType": "Electricity",
"userType": "Residential",
"bill": "no bill",
"postcode": "3000",
"usageProfile": "0",
"averageDailyConsumption": "0",
"skipNMI": "true",
"smartMeter": "1",
"disclaimer": "true",
"hasSolar": "0",
"solarCapacity": "",
"hasConcession": "0",
"feedInTariff": "",
"distributor": {
"id": "4",
"name": "Citipower",
"display": "Citipower",
"phone": "1300 301 101 / 13 12 80",
"distribution_zone_id": "11",
"distribution_zone_name": "All"
},
"distributorDerived": "0",
"distributorSubmit": "true",
"pageDataType": "energyConfigData",
"loopBack": False
}
form_data2 = {
"pvCapacity": "0", "pvCapacityCap": "null", "hhSize": "1", "totalRooms": "1", "fridgeCount": "0",
"gasConnection": "4", "poolHeating": "0", "poolHeatingSolar": "false", "poolHeatingGas": "false",
"poolHeatingElectric": "false", "poolHeatingNone": "false", "spaceHeatingElectricDucted": "false",
"spaceHeatingSplitSystem": "false", "spaceHeatingElectricUnderfloor": "false",
"spaceHeatingElectricIndividual": "false", "spaceHeatingGasDucted": "false",
"spaceHeatingGasUnderfloor": "false", "spaceHeatingGasIndividual": "false", "spaceHeatingOther": "false",
"spaceHeatingNone": "true", "spaceCoolingRoomAC": "false", "spaceCoolingSplitSystem": "false",
"spaceCoolingDuctedReverse": "false", "spaceCoolingDuctedEvaporative": "false",
"spaceCoolingPortableRef": "false", "spaceCoolingPortableEvap": "false", "spaceCoolingOther": "false",
"spaceCoolingNone": "true", "seaDistance": "", "clothesDryer": "0", "clothesDryerWeekday": "",
"clothesDryerWeekend": "", "dishwasherWeekday": "", "dishwasherWeekend": "",
"waterHeatingElectric": "false", "waterHeatingElectricSolar": "false", "waterHeatingGasStorage": "false",
"waterHeatingGasInstant": "false", "waterHeatingGasSolar": "false", "waterHeatingOther": "true",
"controlledLoad": "", "tvTotal": "", "turnOffAtPowerShort": "", "ovensElectric": "", "ovensGas": "",
"washingMachineUsage": "", "washingMachineWeekday": "", "washingMachineWeekend": "",
"televisionUsageWeekday": "", "televisionUsageWeekend": "", "heatingUsageMethod": "",
"gasUsageWinter": "0", "hhSize51": "", "energyType": "Electricity", "hasSolar": "0",
"pageDataType": "energyProfileData", "loopBack": False
}
base_url = "https://compare.energy.vic.gov.au/api"
with rs.Session() as s:
url_ = f"{base_url}/get-psb-details?serverCacheId=null"
r = (s.get(url_))
serverCacheId = r.json()["serverCacheId"]
r = s.post(f"{base_url}/save-form-data", json=dict(form_data1, serverCacheId=str(serverCacheId)))
r = s.post(f"{base_url}/save-form-data", json=dict(form_data2, serverCacheId=str(serverCacheId)))
url_ = "https://compare.energy.vic.gov.au/api/get-offers"
body = {"serverCacheId": str(serverCacheId),
"loopBack": "false",
"selectedEnergy": "/offer"}
r = s.get(url_, params=body)
print(r.json())

Related

Parsing JSON in Python and Converting to Excel

I am attempting to parse this JSON and convert it to Excel and only print out certain columns using Python but I keep getting an TypeError: string indices must be integers. The JSON file continues to repeat what is inside "d" over and over again.
JSON:
{
"d": [
{
"__type": "New Cust",
"Description": "TV Purchase",
"End": "/Date(1624962600000)/",
"ID": 1,
"Resources": [
{
"Available": true,
"Key": "12345",
"Text": "John Doe",
"Type": "SalesProvider",
"CalendarID": "1234"
}
],
"Subject": "Meeting with Sam",
"Visible": true,
"AppStatus": "5",
"StartTime": "06/01/2021 10:30:00",
"AppID": "1",
"ServiceID": "7",
"NotesCount": "0",
"CustomerID": "1",
"AppType": "NNR",
"IsEditedThis": "False",
"BusinessPackageID": "0",
"PopupNotesCount": "0",
"EventType": "1",
"SPName": "John Doe",
"SPID": "12345",
"EventCapacity": "0",
"IsPaid": "False",
"IsWorkShop": "False",
"FormStatus": "0",
"PaidType": "1",
"AppComment": "",
"CustName": "Sam Will",
"ResourceID": "",
"CssClass": "rsCategoryBlue",
"ServiceName": "TV Sale",
"NoOfAttendees": null,
"StreamingNoOfAttendees": null,
"StreamingStatus": "0",
"StreamingEventCapacity": "",
"Photo": "",
"PersonalOffType": null,
"ResourceName": null,
"IsShowCheckIn": false,
"PaymentStatus": 0
},
{
"__type": "New Cust",
"Description": "Receiver Purchase",
"End": "/Date(1624962600000)/",
"ID": 1,
"Resources": [
{
"Available": true,
"Key": "12345",
"Text": "John Doe",
"Type": "SalesProvider",
"CalendarID": "1234"
}
],
"Subject": "Meeting with Bill",
"Visible": true,
"AppStatus": "5",
"StartTime": "07/02/2021 9:30:00",
"AppID": "1",
"ServiceID": "7",
"NotesCount": "0",
"CustomerID": "1",
"AppType": "NNR",
"IsEditedThis": "False",
"BusinessPackageID": "0",
"PopupNotesCount": "0",
"EventType": "1",
"SPName": "John Doe",
"SPID": "12345",
"EventCapacity": "0",
"IsPaid": "False",
"IsWorkShop": "False",
"FormStatus": "0",
"PaidType": "1",
"AppComment": "",
"CustName": "Bill Tom",
"ResourceID": "",
"CssClass": "rsCategoryBlue",
"ServiceName": "Audio Sale",
"NoOfAttendees": null,
"StreamingNoOfAttendees": null,
"StreamingStatus": "0",
"StreamingEventCapacity": "",
"Photo": "",
"PersonalOffType": null,
"ResourceName": null,
"IsShowCheckIn": false,
"PaymentStatus": 0
}
]
}
Python Code:
import json
import pandas as pd
f = open('JSON.txt', 'r')
data = json.loads(f.read())
l = []
for profile in data['d']:
l.append(profile["Subject"]["StartTime"]["IsPaid"]["CustName"]["ServiceName"])
df1 = pd.DataFrame(l)
print(df1)
df1.to_excel('df1.xlsx')
I do not need the "Resources": [] info I just need certain parameters outside it in the JSON object. I am having difficulty parsing the JSON any help would be appreciated.
you can use a combination of the json standard library and pd.json_normalize
import json
import pandas as pd
parsed_json = json.loads('JSON.txt')
df = pd.json_normalize(parsed_json['d'],record_path='Resources')
print(df)
Available Key Text Type CalendarID
0 True 12345 John Doe SalesProvider 1234
1 True 12345 John Doe SalesProvider 1234
then pass it to excel
df.to_excel(...,index=False)
Going back to your issues, it seems like your trying to grab a bunch of fields like a list, when in reality you're attempting to find an attribute from a single data type object.
print(data['d'][0]['Subject'])
'Meeting with Sam'
This has no other nested items so you'll naturally get an error.
the error TypeError: string indices must be integers is telling you you can only slice this object with integers i.e
#reversing the string.
print(data['d'][0]['Subject'][::-1])
maS htiw gniteeM
or
#first two characters of the string.
print(data['d'][0]['Subject'][:2])
Me
if you want to grab only a subset of columns from the top level you could do :
cols = ['Subject', 'StartTime', 'IsPaid', 'CustName', 'ServiceName']
df = pd.json_normalize(parsed_json['d'],)[cols]
print(df)
Subject StartTime IsPaid CustName ServiceName
0 Meeting with Sam 06/01/2021 10:30:00 False Sam Will TV Sale
1 Meeting with Bill 07/02/2021 9:30:00 False Bill Tom Audio Sale

How to append to the child node in JSON at every iteration using python provided new child node exists?

Sample
template = {
"Table": [
{
"level": 2,
"value": {
"element Name": "",
"add Row": "False",
"cost Type": "",
"flag": "flag",
"cost": "",
"actions": "True"
},
"RHS": {},
"children": [
{
"level": 3,
"value": {
"element Name": "",
"add Row": "False",
"cost Type": "",
"cost": "",
"actions": "True"
},
"RHS": {},
"children": []
}
]
}
]
}
Considering the above dictionary, I want to append to the last "children" and every time loop runs it should append to the children created in previous iteration.
Loop 1:
"children":{
"level": 4,
"value": {"element Name": "",
"add Row": "False",
"cost Type": "",
"cost": "",
"actions": "True"},
"RHS": {},
"children":
[]
}
Loop 2:
iteration 2
"children":{
"level": 5,
"value": {"element Name": "",
"add Row": "False",
"cost Type": "",
"cost": "",
"actions": "True"},
"RHS": {},
"children":
[]
}
and so on.
My code is:
Python code for loop
for _ in range(sublevels):
number = number + 1
child = {"level": sublevels + 2,
"value": {"element Name": "", "add Row": False,
"cost Type": "", "cost": "",
"actions": True}, "RHS": {}, "children": []}
template['Table'][0]['children'].append(child)
Output:
After iteration, the JSON should look like below
{
"Table": [
{
"level": 2,
"value": {
"element Name": "",
"add Row": "False",
"cost Type": "",
"flag": "flag",
"cost": "",
"actions": "True"
},
"RHS": {},
"children": [
{
"level": 3,
"value": {
"element Name": "",
"add Row": "False",
"cost Type": "",
"cost": "",
"actions": "True"
},
"RHS": {},
"children": [
[
{
"level": 4,
"value": {
"element Name": "",
"add Row": "False",
"cost Type": "",
"cost": "",
"actions": "True"
},
"RHS": {},
"children": [
[
{
"level": 5,
"value": {
"element Name": "",
"add Row": "False",
"cost Type": "",
"cost": "",
"actions": "True"
},
"RHS": {},
"children": []
}
]
]
}
]
]
}
]
}
]
}
Iteration 1: template['Table'][0]['children']
Iteration 2: template['Table'][0]['children'][0]['children']
Iteration 3: template['Table'][0]['children'][0]['children'][0]['children']
import json
template = {"Table": []}
sublevels = 5
for _ in range(sublevels):
#number = number + 1
child = {"level": _ + 2,
"value": {"element Name": "", "add Row": False,
"cost Type": "", "cost": "",
"actions": True}, "RHS": {}, "children": []}
cur_path = "[0]['children']"*_
if _ == 0:
template['Table'].append(child)
else:
exec(f"template['Table']{cur_path}.append(child)")
print(json.dumps(template, indent = 2))
Not the prettiest way, you should avoid using exec, but I was trying to call a JSON path from a dict, and it wasn't working so I used exec.
This works well and nests it tho..
Output I got from running this code:
{
"Table": [
{
"level": 2,
"value": {
"element Name": "",
"add Row": false,
"cost Type": "",
"cost": "",
"actions": true
},
"RHS": {},
"children": [
{
"level": 3,
"value": {
"element Name": "",
"add Row": false,
"cost Type": "",
"cost": "",
"actions": true
},
"RHS": {},
"children": [
{
"level": 4,
"value": {
"element Name": "",
"add Row": false,
"cost Type": "",
"cost": "",
"actions": true
},
"RHS": {},
"children": [
{
"level": 5,
"value": {
"element Name": "",
"add Row": false,
"cost Type": "",
"cost": "",
"actions": true
},
"RHS": {},
"children": [
{
"level": 6,
"value": {
"element Name": "",
"add Row": false,
"cost Type": "",
"cost": "",
"actions": true
},
"RHS": {},
"children": []
}
]
}
]
}
]
}
]
}
]
}

Creating dict within the dict based on dynamic key(contains the numbers) e.g 'abx.123':'xyz'. Group similar numbers to the Dict and make single dict

I have a dict object in which key and value pairs are there. So I want to add the key on the basis of key
e.g.
{'abx.123.a':'name',
'abz.123.b':'address',
aby.123.c':'location',
'abc.231.a':'Postion',
'abc.231.b':'dob'}
Now i want to add the key who contains the 123 number to the dict and create a dict within dict. And add the key who contains the 231 number to the dict.
Data = {
"arn": "arn",
"description": "Security group for all coporate communications",
"egress.#": "1",
"egress.4820.cidr_blocks.#": "1",
"egress.4820.cidr_blocks.0": "0.0.0.0/0",
"egress.4820.description": "",
"egress.4820.from_port": "0",
"egress.4820.ipv6_cidr_blocks.#": "0",
"egress.4820.prefix_list_ids.#": "0",
"egress.4820.protocol": "-1",
"egress.4820.security_groups.#": "0",
"egress.4820.self": "False",
"egress.4820.to_port": "0",
"id": "sg-080b03",
"ingress.#": "4",
"ingress.1279476397.cidr_blocks.#": "0",
"ingress.1279476397.description": "self",
"ingress.1279476397.from_port": "0",
"ingress.1279476397.prefix_list_ids.#": "0",
"ingress.1279476397.protocol": "-1",
"ingress.1279476397.security_groups.#": "0",
"ingress.1279476397.self": "true",
"ingress.1279476397.to_port": "0",
"ingress.2455438834.cidr_blocks.0": "10.10.0.0/16",
"ingress.2455438834.description": "cluster VPC",
"ingress.2455438834.from_port": "443",
"ingress.2455438834.protocol": "tcp",
"ingress.2455438834.to_port": "443",
"ingress.3391123749.cidr_blocks.#": "0",
"ingress.3391123749.description": "eks-cluster-master",
"ingress.3391123749.from_port": "443",
"ingress.3391123749.protocol": "tcp",
"ingress.3391123749.to_port": "443",
"ingress.439086653.cidr_blocks.#": "0",
"ingress.439086653.description": "eks-cluster-master",
"ingress.439086653.from_port": "1025",
"ingress.439086653.ipv6_cidr_blocks.#": "0",
"ingress.439086653.prefix_list_ids.#": "0",
"ingress.439086653.protocol": "tcp",
"ingress.439086653.security_groups.#": "1",
"ingress.439086653.security_groups.3696519931": "sg-0007a603523411",
"ingress.439086653.self": "False",
"ingress.439086653.to_port": "65535",
"name":"xyz.abc.corporate",
"owner_id": "12345678",
"revoke_rules_on_delete": "False",
"tags.%": "2",
"tags.Name": "abc.xyz.pqr",
"tags.abc": "owned"
}
Create a dict who has same number within th data dict.
I want like this. The number should come dynamically in future
e.g.
Data = [{
"arn": "arn",
"description": "Security group for all coporate communications",
"egress.#": "1",
{
"egress.4820.cidr_blocks.#": "1",
"egress.4820.cidr_blocks.0": "0.0.0.0/0",
"egress.4820.description": "",
"egress.4820.from_port": "0",
"egress.4820.ipv6_cidr_blocks.#": "0",
"egress.4820.prefix_list_ids.#": "0",
"egress.4820.protocol": "-1",
"egress.4820.security_groups.#": "0",
"egress.4820.self": "False",
"egress.4820.to_port": "0",
},
"id": "sg-080b03",
"ingress.#": "4",
{
"ingress.1279476397.cidr_blocks.#": "0",
"ingress.1279476397.description": "self",
"ingress.1279476397.from_port": "0",
"ingress.1279476397.prefix_list_ids.#": "0",
"ingress.1279476397.protocol": "-1",
"ingress.1279476397.security_groups.#": "0",
"ingress.1279476397.self": "true",
"ingress.1279476397.to_port": "0"
},
{
"ingress.2455438834.cidr_blocks.0": "10.10.0.0/16",
"ingress.2455438834.description": "cluster VPC",
"ingress.2455438834.from_port": "443",
"ingress.2455438834.protocol": "tcp",
"ingress.2455438834.to_port": "443"
},
{
"ingress.3391123749.cidr_blocks.#": "0",
"ingress.3391123749.description": "eks-cluster-master",
"ingress.3391123749.from_port": "443",
"ingress.3391123749.protocol": "tcp",
"ingress.3391123749.to_port": "443"
},
{
"ingress.439086653.cidr_blocks.#": "0",
"ingress.439086653.description": "eks-cluster-master",
"ingress.439086653.from_port": "1025",
"ingress.439086653.ipv6_cidr_blocks.#": "0",
"ingress.439086653.prefix_list_ids.#": "0",
"ingress.439086653.protocol": "tcp",
"ingress.439086653.security_groups.#": "1",
"ingress.439086653.security_groups.3631": "sg-0007",
"ingress.439086653.self": "False",
"ingress.439086653.to_port": "65535"
},
"name":"xyz.abc.corporate",
"owner_id": "12345678",
"revoke_rules_on_delete": "False",
"tags.%": "2",
"tags.Name": "abc.xyz.pqr",
"tags.abc": "owned"
}]
Can you please suggest me if somebody know the logic for this?
As Christian König already mentioned:
Your desired result is not a valid python data structure.
because your code contains things like this:
{
# ...
"egress.#": "1", # <-- ok
{ # <-- not ok (key is missing)
"egress.4820.cidr_blocks.#": "1",
"egress.4820.cidr_blocks.0": "0.0.0.0/0",
"egress.4820.description": "",
"egress.4820.from_port": "0",
"egress.4820.ipv6_cidr_blocks.#": "0",
"egress.4820.prefix_list_ids.#": "0",
"egress.4820.protocol": "-1",
"egress.4820.security_groups.#": "0",
"egress.4820.self": "False",
"egress.4820.to_port": "0",
},
}
But if you want to completely unfold your data like this (probably not very efficient):
Data: {
"description": "Security group for all coporate communications"
"arn": "arn"
"name": "xyz.abc.corporate"
"owner_id": "12345678"
"tags": {
"Name": "abc.xyz.pqr"
"%": "2"
"abc": "owned"
}
"revoke_rules_on_delete": "False"
"egress": {
"4820": {
"description": ""
"ipv6_cidr_blocks": {
"#": "0"
}
"prefix_list_ids": {
"#": "0"
}
"to_port": "0"
"cidr_blocks": {
"#": "1"
"0": "0.0.0.0/0"
}
"security_groups": {
"#": "0"
}
"self": "False"
"protocol": "-1"
"from_port": "0"
}
"#": "1"
}
"id": "sg-080b03"
"ingress": {
"1279476397": {
"description": "self"
"prefix_list_ids": {
"#": "0"
}
"to_port": "0"
"cidr_blocks": {
"#": "0"
}
"security_groups": {
"#": "0"
}
"self": "true"
"protocol": "-1"
"from_port": "0"
}
"3391123749": {
"description": "eks-cluster-master"
"cidr_blocks": {
"#": "0"
}
"to_port": "443"
"protocol": "tcp"
"from_port": "443"
}
"439086653": {
"description": "eks-cluster-master"
"ipv6_cidr_blocks": {
"#": "0"
}
"prefix_list_ids": {
"#": "0"
}
"to_port": "65535"
"cidr_blocks": {
"#": "0"
}
"security_groups": {
"3696519931": "sg-0007a603523411"
"#": "1"
}
"self": "False"
"protocol": "tcp"
"from_port": "1025"
}
"2455438834": {
"to_port": "443"
"cidr_blocks": {
"0": "10.10.0.0/16"
}
"protocol": "tcp"
"description": "cluster VPC"
"from_port": "443"
}
"#": "4"
}
}
you can use the following code:
def transformData(data, separator = '.'):
def insertRecursive(dict_, key, value): # Recursive
''' inserts recursively:
mydict = makeDefaultDict()
insertRecursive(mydict, "deply.nested.value", 42)
print(mydict["deply"]["nested"]["value"]) # prints 42 '''
l, dot, r = key.partition(separator) # partition at first separator
if dot == "": # if there was no separator left
dict_[l] = value
else:
insertRecursive(dict_[l], r, value)
def makeDefaultDict(): # Recursive
''' creates a dictionary thats default value is another dict with the same properties
so you can do something like this:
mydict = makeDefaultDict()
mydict["deply"]["nested"]["value"] = 42 '''
return defaultdict(makeDefaultDict)
# actual code:
result = makeDefaultDict()
for key, value in data.items(): # for each entry in data:
insertRecursive(result, key, value)
return result

How to delete on square from json file in python?

I have an extra last square } in a big json file, I need to remove it by using python :
{
"layers": {
"frame": {
"frame.interface_id": "0",
"frame.encap_type": "127",
"frame.time": "Oct 10, 2017 18:05:51.620568000 Central European Daylight Time",
"frame.offset_shift": "0.000000000",
"frame.time_epoch": "1507651551.620568000",
"frame.time_delta": "0.324011000",
"frame.time_delta_displayed": "0.324011000",
"frame.time_relative": "29.248970000",
"frame.number": "38",
"frame.len": "64",
"frame.cap_len": "64",
"frame.marked": "0",
"frame.ignored": "0",
"frame.protocols": "wpan:6lowpan:ipv6:ipv6.hopopts:udp:data",
"frame.coloring_rule.name": "UDP",
"frame.coloring_rule.string": "udp"
},
"wpan": {
"wpan.frame_length": "66",
"wpan.fcf": "0x0000dc41",
"wpan.fcf_tree": {
"wpan.frame_type": "0x00000001",
"wpan.security": "0",
"wpan.pending": "0",
"wpan.ack_request": "0",
"wpan.pan_id_compression": "1",
"wpan.seqno_suppression": "0",
"wpan.ie_present": "0",
"wpan.dst_addr_mode": "0x00000003",
"wpan.version": "1",
"wpan.src_addr_mode": "0x00000003"
},
"wpan.seq_no": "8",
"wpan.dst_pan": "0x0000abcd",
"wpan.dst64": "00:21:2f:3c:c6:b5:00:01",
"wpan.src64": "00:21:2f:3c:c6:b5:00:7e",
"wpan.fcs_ok": "1"
},
"6lowpan": {
"IPHC Header": {
"6lowpan.pattern": "0x00000003",
"6lowpan.iphc.tf": "0x00000003",
"6lowpan.iphc.nh": "0",
"6lowpan.iphc.hlim": "0x00000002",
"6lowpan.iphc.cid": "1",
"6lowpan.iphc.sac": "1",
"6lowpan.iphc.sam": "0x00000003",
"6lowpan.iphc.m": "0",
"6lowpan.iphc.dac": "1",
"6lowpan.iphc.dam": "0x00000003",
"6lowpan.iphc.sci": "0x00000000",
"6lowpan.iphc.dci": "0x00000000"
},
"6lowpan.next": "0x00000000",
"6lowpan.src": "::221:2f3c:c6b5:7e",
"6lowpan.dst": "::221:2f3c:c6b5:1"
},
"ipv6": {
"ipv6.version": "6",
"ip.version": "6",
"ipv6.tclass": "0x00000000",
"ipv6.tclass_tree": {
"ipv6.tclass.dscp": "0",
"ipv6.tclass.ecn": "0"
},
"ipv6.flow": "0x00000000",
"ipv6.plen": "39",
"ipv6.nxt": "0",
"ipv6.hlim": "64",
"ipv6.src": "::221:2f3c:c6b5:7e",
"ipv6.addr": "::221:2f3c:c6b5:7e",
"ipv6.src_host": "::221:2f3c:c6b5:7e",
"ipv6.host": "::221:2f3c:c6b5:7e",
"ipv6.dst": "::221:2f3c:c6b5:1",
"ipv6.addr": "::221:2f3c:c6b5:1",
"ipv6.dst_host": "::221:2f3c:c6b5:1",
"ipv6.host": "::221:2f3c:c6b5:1",
"Source GeoIP: Unknown": "",
"Destination GeoIP: Unknown": "",
"ipv6.hopopts": {
"ipv6.hopopts.nxt": "17",
"ipv6.hopopts.len": "0",
"ipv6.hopopts.len_oct": "8",
"ipv6.opt": {
"ipv6.opt.type": "99",
"ipv6.opt.type_tree": {
"ipv6.opt.type.action": "1",
"ipv6.opt.type.change": "1",
"ipv6.opt.type.rest": "0x00000003"
},
"ipv6.opt.length": "4",
"ipv6.opt.rpl.flag": "0x00000000",
"ipv6.opt.rpl.flag_tree": {
"ipv6.opt.rpl.flag.o": "0",
"ipv6.opt.rpl.flag.r": "0",
"ipv6.opt.rpl.flag.f": "0",
"ipv6.opt.rpl.flag.rsv": "0x00000000"
},
"ipv6.opt.rpl.instance_id": "0x0000001e",
"ipv6.opt.rpl.sender_rank": "0x00000200"
}
}
},
"udp": {
"udp.srcport": "30002",
"udp.dstport": "3000",
"udp.port": "30002",
"udp.port": "3000",
"udp.length": "31",
"udp.checksum": "0x00007ca5",
"udp.checksum.status": "2",
"udp.stream": "17"
},
"data": {
"data.data": "2f:14:02:15:20:ed:1a:05:02:40:29:5c:ab:41:cc:23:c7:42:10:d8:eb:41:45",
"data.len": "23"
}
}
}
}
,
How could I remove it please?
I would be very grateful if you help me please?
First thing first: having one extra closing brace means this is not valid json, so the best thing to do would be to cure the problem at the source. If this comes verbatim from some api then contact the tech staff, if this comes from your own code then fix it where this extra brace is introduced.
This being said, assuming your json is stored as a string data, then removing the last closing brace is as simple as
data = data.strip().rstrip("}")
If this is part of an automated process and you only sometimes have this extraneaous brace, you can test before cleaning up:
if data.count("}") > data.count("{"):
data = data.strip().rstrip("}")

can't print the value of json object in django

I have this json object in ajax_data variable
{
"columns[0][data]": "0",
"columns[1][name]": "",
"columns[5][searchable]": "true",
"columns[5][name]": "",
"columns[4][search][regex]": "false",
"order[0][dir]": "asc",
"length": "10",
}
I have converted it using json.loads() function like.
ajax_data = json.loads(ajax_data)
I want to get the value if "order[0][dir]" and "columns[0][data]" but if i print it using
ajax_data['order'][0]['dir]
its giving error :
KeyError at /admin/help
'order'
But same code works if i access it for length key then it works.
The keys you have used are actually not a good way of implementation.
{
"columns[0][data]": "0",
"columns[1][name]": "",
"columns[5][searchable]": "true",
"columns[5][name]": "",
"columns[4][search][regex]": "false",
"order[0][dir]": "asc",
"length": "10",
}
Instead of this you should hav gone for
{
"columns": [
{"data": "0", "name": "", "searchable": "true", "name": "", "search": {
"regex": "false"}
},
{"data": "0", "name": "", "searchable": "true", "name": ""," search": {
"regex": "false"}},
{"data": "0", "name": "", "searchable": "true", "name": "", "search": {
"regex": "false"}},
{"data": "0", "name": "", "searchable": "true", "name": "", "search": {
"regex": "false"}},
{"data": "0", "name": "", "searchable": "true", "name": "", "search": {
"regex": "false"}},
{"data": "0", "name": "", "searchable": "true", "name": "", "search": {
"regex": "false"}},
],
"order": [
{"dir": "asc"}
],
"length": "10"
}
In this case ajax_data['order'][0]['dir] will result in value "asc"
For your current implementation the key is "order[0][dir]"
That is go for
ajax_data["order[0][dir]"]
Hope you understood the issue.
Structuring of json is very important when dealing with APIs. Try to restructure your json which will help for future too.
That's because length is a key in that json object, and order is not. The key names are the entire strings inside the quotes: columns[0][data], order[0][dir], etc.
Those are unusual key names, but perfectly valid.

Categories

Resources