Parsing nested json payload python - python

I am trying to get only value A from from this nested json payload.
My function:
import requests
import json
def payloaded():
from urllib.request import urlopen
with urlopen("www.example.com/payload.json") as r:
data = json.loads(r.read().decode(r.headers.get_content_charset("utf-8")))
text = (data["bod"]["id"])
print(text)
The payload:
bod: {
id: [
{
value: "A",
summary: "B",
format: "C"
}
]
},
Currently it is returning everything within the brackets [... value ... summary ... format ...]
Solution found:
def payloaded():
from urllib.request import urlopen
with urlopen("www.example.com/payload.json") as r:
data = json.loads(r.read().decode(r.headers.get_content_charset("utf-8")))
text = (data["bod"]["id"][0]["value"])
print(text)

Since the id value is a list (even though it just contains a single value), you'll need to go inside it with a list indexer. Since lists in Python are zero-indexed (they start from zero) you'll use [0] to extract the first element:
data["bod"]["id"][0]["value"]

This works:
def payloaded():
from urllib.request import urlopen
with urlopen("www.example.com/payload.json") as r:
data = json.loads(r.read().decode(r.headers.get_content_charset("utf-8")))
text = (data["bod"]["id"][0]["value"])
print(text)

Related

Why am I getting this error "TypeError: string indices must be integers" when trying to fetch data from an api?

json file =
{
"success": true,
"terms": "https://curr
"privacy": "https://cu
"timestamp": 162764598
"source": "USD",
"quotes": {
"USDIMP": 0.722761,
"USDINR": 74.398905,
"USDIQD": 1458.90221
}
}
The json file is above. i deleted lot of values from the json as it took too many spaces. My python code is in below.
import urllib.request, urllib.parse, urllib.error
import json
response = "http://api.currencylayer.com/live?access_key="
api_key = "42141e*********************"
parms = dict()
parms['key'] = api_key
url = response + urllib.parse.urlencode(parms)
mh = urllib.request.urlopen(url)
source = mh.read().decode()
data = json.loads(source)
pydata = json.dumps(data, indent=2)
print("which curreny do you want to convert USD to?")
xm = input('>')
print(f"Hoe many USD do you want to convert{xm}to")
value = input('>')
fetch = pydata["quotes"][0]["USD{xm}"]
answer = fetch*value
print(fetch)
--------------------------------
Here is the
output
"fetch = pydata["quotes"][0]["USD{xm}"]
TypeError: string indices must be integers"
First of all the JSON data you posted here is not valid. There are missing quotes and commas. For example here "terms": "https://curr. It has to be "terms": "https://curr",. The same at "privacy" and the "timestamp" is missing a comma. After i fixed the JSON data I found a solution. You have to use data not pydata. This mean you have to change fetch = pydata["quotes"][0]["USD{xm}"] to fetch = data["quotes"][0]["USD{xm}"]. But this would result in the next error, which would be a KeyError, because in the JSON data you provided us there is no array after the "qoutes" key. So you have to get rid of this [0] or the json data has to like this:
"quotes":[{
"USDIMP": 0.722761,
"USDINR": 74.398905,
"USDIQD": 1458.90221
}]
At the end you only have to change data["quotes"]["USD{xm}"] to data["quotes"]["USD"+xm] because python tries to find a key called USD{xm} and not for example USDIMP, when you type "IMP" in the input.I hope this fixed your problem.

Payload requests - string concatenate layout error

I have code that reads a list of numbers that are parameters for a python order:
def search(number):
url = "http://localhost:8080/sistem/checkNumberStatus"
payload = '{\n\"SessionName\":\"POC\",\n\"celFull\":\"'+number+'\"\n}'
headers = {
'Content-Type': 'application/json'
}
response = requests.request("POST", url, headers=headers, data = payload)
object = open('numbers_wpp.txt', 'r')
for numbers in object:
search(number)
But, when I print the payload of my json the output is:
{
"SessionName":"POC",
"celFull":"5512997708936
"
}
{
"SessionName":"POC",
"celFull":"5512997709337
"
}
{
"SessionName":"POC",
"celFull":"5512992161195"
}
When reading the file with 3 numbers, the quotes in the celFull attribute closed correctly only in the last loop (last number), while the first two were broken into quotes. This break is giving error in queries.
If numbers_wpp.txt is a file that has each number on a different line, the following code would iterate over the file line by line.
for number in object:
search(number) # <--- numbers is actually a line in your file.
Since each number is on a new line, the preceding line has a '\n' at the end.
So
123
456
Is actually
123\n456
So number is actually 123\n. This causes your payload to have a \n before the quote closes.
You could fix this by calling number.strip() which strips whitespace from either end of the string.
Alternatively, you should consider not using a handcrafted json string and let the requests library do that for you. Documentation
def search(number):
url = "http://localhost:8080/sistem/checkNumberStatus"
payload = {"SessionName": "POC", "celFull": number} # <-- python dict
response = requests.post(url, data=payload)
object = open('numbers_wpp.txt', 'r')
for line in object:
number = int(line.strip()) # <-- convert to an integer
search(number)

How to use pandas DF as params in HTTP request

I have a list of places from an excel file which I would enrich with the geonames Ids. Starting from the excel file I made a pandas Data Frame then I would use the values from the DF as params in my request.
Here the script I made
import pandas as pd
import requests
import json
require_cols = [1]
required_df = pd.read_excel('grp.xlsx', usecols = require_cols)
print(required_df)
url = 'http://api.geonames.org/searchJSON?'
params = { 'username': "XXXXXXXX",
'name_equals': (required_df),
'maxRows': "1"}
e = requests.get(url, params=params)
pretty_json = json.loads(e.content)
print (json.dumps(pretty_json, indent=2))
The problem is related to the defintion of this parameter:
'name_equals': (required_df)
I would use the Places (around 15k) from the DF as param and recoursively retrieve the related geonames ID and write the output in a separate excel file.
The simple request works:
import requests
import json
url = 'http://api.geonames.org/searchJSON?'
params = { 'username': "XXXXXXX",
'name_equals': "Aire",
'maxRows': "1"}
e = requests.get(url, params=params)
pretty_json = json.loads(e.content)
print (json.dumps(pretty_json, indent=2))
#print(e.content)
As well as the definition of Pandas data frame:
# import pandas lib as pd
import pandas as pd
require_cols = [0,1]
# only read specific columns from an excel file
required_df = pd.read_excel('grp.xlsx', usecols = require_cols)
print(required_df)
I also tried via SPARQL without results so I decided to go via Python.
Thanks for your time.
You can use for-loop
import pandas as pd
df = pd.DataFrame({'Places': ['London', 'Paris', 'Berlin']})
for item in df['Places']:
print('requests for:', item)
# ... rest of code ...
or df.apply()
import pandas as pd
def run(item):
print('requests for:', item)
# ... rest of code ...
return 'result for ' + item
df = pd.DataFrame({'Places': ['London', 'Paris', 'Berlin']})
df['Results'] = df['Places'].apply(run)
Thanks #furas for your reply.
I solved like this:
import pandas as pd
import requests
import json
url = 'http://api.geonames.org/searchJSON?'
df = pd.read_excel('Book.xlsx', sheet_name='Sheet1', usecols="B")
for item in df.place_name:
df.place_name.head()
params ={ 'username': "XXXXXX",
'name_equals': item,
'maxRows': "1"}
e = requests.get(url, params=params)
pretty_json = json.loads(e.content)
for item in pretty_json["geonames"]:
print (json.dumps(item["geonameId"], indent=2))
with open('data.json', 'w', encoding='utf-8') as f:
json.dump(item["geonameId"], f, ensure_ascii=False, indent=4)
#print(e.content)
The only problem now is related to the json output: By print I'm having the complete IDs list however, when I'm going to write the output to a file I'm getting just the last ID from the list.

Finding all URLs in a single line

I'm trying to to fetch a page that has many urls and other stuff all in just one line in a plain text like
"link_url":"http://www.example.com/link1?site=web","mobile_link_url":"http://m.example.com/episode/link1?site=web" link_url":"http://www.example.com/link2?site=web","mobile_link_url":"http://m.example.com/episode/link2?site=web"
i tired
import re
import requests as req
response = req.get("http://api.example.com/?callback=jQuery112")
content = response.text
print content will give me the "link_url": output
but i need to find
http://www.example.com/link1?site=web
http://www.example.com/link2?site=web
and output only link1 and link2 to a file like
link1
link2
link3
The code below might be what you need.
import re
urls = '''"link_url":"http://www.example.com/link1?site=web","mobile_link_url":"http://m.example.com/episode/link1?site=web" link_url":"http://www.example.com/link2?site=web","mobile_link_url":"http://m.example.com/episode/link2?site=web"'''
links = re.findall(r'http://www[a-z/.?=:]+(link\d)+', urls)
print(links)
If it is a string and not a JSON object, then you could do this even though it's a bit hacky:
s1 ="\"link_url\":\"http://www.example.com/link1?site=web\",\"mobile_link_url\":\"http://m.example.com/episode/link1?site=web\" link_url\":\"http://www.example.com/link2?site=web\",\"mobile_link_url\":\"http://m.example.com/episode/link2?site=web\""
links = [x for x in s1.replace("\":\"", "LINK_DELIM").replace("\"", "").replace(" ", ",").split(",")]
for link in links:
print(link.split("LINK_DELIM")[1])
Which yields:
http://www.example.com/link1?site=web
http://m.example.com/episode/link1?site=web
http://www.example.com/link2?site=web
http://m.example.com/episode/link2?site=web
Though I think #al76's answer is more elegant for this.
But if it's a JSON which looks like:
[
{
"link_url": "http://www.example.com/link1?site=web",
"mobile_link_url": "http://m.example.com/episode/link1?site=web"
},
{
"link_url": "http://www.example.com/link2?site=web",
"mobile_link_url": "http://m.example.com/episode/link2?site=web"
}
]
Then you could do something like:
import json
s1 = "[{ \"link_url \": \"http://www.example.com/link1?site=web \", \"mobile_link_url \": \"http://m.example.com/episode/link1?site=web \"}, { \"link_url \": \"http://www.example.com/link2?site=web \", \"mobile_link_url \": \"http://m.example.com/episode/link2?site=web \"} ]"
data = json.loads(s1)
links = [y for x in data for y in x.values()]
for link in links:
print(link)
If this is a JSON api then you can use response.json() to get a python dictionary, as .text will give you the response as one long string.
You also do not need to use regex for something so simple, python comes with a url parser out of the box.
So provided your response is something like
[
{
"link_url": "http://www.example.com/link1?site=web",
"mobile_link_url": "http://m.example.com/episode/link1?site=web"
},
{
"link_url": "http://www.example.com/link2?site=web",
"mobile_link_url": "http://m.example.com/episode/link2?site=web"
}
]
(doesn't matter if IRL it's one line, as long as it's valid JSON)
You can iterate the results as a dictionary, then use urlparse to get specific components of your urls:
from urllib.parse import urlparse
import requests
response = requests.get("http://api.example.com/?callback=jQuery112")
for urls in response.json():
print(urlparse(url["link_url"]).path.rsplit('/', 1)[-1])
urlparse(...).path will return the path of your url only, eg. episode/link1, and we then we just get the last segment of that with rsplit to just get link1, link2 etc.
try
urls=""" "link_url":"http://www.example.com/link1?site=web","mobile_link_url":"http://m.example.com/episode/link1?site=web" link_url":"http://www.example.com/link2?site=web","mobile_link_url":"http://m.example.com/episode/link2?site=web" """
re.findall(r'"http://www[^"]+"',urls)
urls=""" "link_url":"http://www.example.com/link1?site=web","mobile_link_url":"http://m.example.com/episode/link1?site=web" link_url":"http://www.example.com/link2?site=web","mobile_link_url":"http://m.example.com/episode/link2?site=web" """
p = [i.split('":')[1] for i in urls.replace(' ', ",").split(",")[1:-1]]
#### Output ####
['"http://www.example.com/link1?site=web"',
'"http://m.example.com/episode/link1?site=web"',
'"http://www.example.com/link2?site=web"',
'"http://m.example.com/episode/link2?site=web"']
*Not as efficient as regex.

Python : Normalize Json response (array)

I am new to JSON and Python,I am trying to achieve below
Need to parse below JSON
{
"id": "12345abc",
"codes": [
"BSVN1FKW3JKKNNMN",
"HJYYUKJJL999OJR",
"DFTTHJJJJ0099JUU",
"FGUUKHKJHJGJJYGJ"
],
"ctr": {
"source": "xyz",
"user_id": "1234"
}
}
Expected output:Normalized on "codes" value
ID~CODES~USER_ID
12345abc~BSVN1FKW3JKKNNMN~1234
12345abc~HJYYUKJJL999OJR~1234
12345abc~DFTTHJJJJ0099JUU~1234
12345abc~FGUUKHKJHJGJJYGJ~1234
Started with below ,but need help to get to my desired output.
The "codes" block can have n number of values separated by comma.
The below code is throwing an error "TypeError: string indices must be integers"
#!/usr/bin/python
import os
import json
import csv
f = open('rspns.csv','w')
writer = csv.writer(f,delimiter = '~')
headers = [‘ID’,’CODES’,’USER_ID’]
default = ''
writer.writerow(headers)
string = open('sample.json').read().decode('utf-8')
json_obj = json.loads(string)
#print json_obj['id']
#print json_obj['codes']
#print json_obj['codes'][0]
#print json_obj['codes'][1]
#print json_obj['codes’][2]
#print json_obj['codes’][3]
#print json_obj['ctr’][‘user_id']
for keyword in json_obj:
row = []
row.append(str(keyword['id']))
row.append(str(keyword['codes']))
row.append(str(keyword['ctr’][‘user_id']))
writer.writerow(row)
If your json_obj looks exactly like that , that is it is a dictionary, then the issue is that when you do -
for keyword in json_obj:
You are iterating over keys in json_obj, then if you try to access ['id'] for that key it should error out saying string indices must be integers .
You should first get the id and user_id before looping and then loop over json_obj['codes'] and then add the previously computed id and user_id along with the current value from codes list to the writer csv as a row.
Example -
import json
import csv
string = open('sample.json').read().decode('utf-8')
json_obj = json.loads(string)
with open('rspns.csv','w') as f:
writer = csv.writer(f,delimiter = '~')
headers = ['ID','CODES','USER_ID']
writer.writerow(headers)
id = json_obj['id']
user_id = json_obj['ctr']['user_id']
for code in json_obj['codes']:
writer.writerow([id,code,user_id])
You don't want to iterate through json_obj as that is a dictionary and iterating through will get the keys. The TypeError is caused by trying to index into the keys ('id', 'code', and 'ctr') -- which are strings -- as if they were a dictionary.
Instead, you want a separate row for each code in json_obj['codes'] and to use the json_obj dictionary for your lookups:
for code in json_obj['codes']:
row = []
row.append(json_obj['id'])
row.append(code)
row.append(json_obj['ctr’][‘user_id'])
writer.writerow(row)

Categories

Resources