Yandex Spellchecker API Returns Empty Array - python

I am trying to harness a Russian language spellcheck API, Yandex.Speller.
The request seems to work fine in my browser. However, when I use a python script, the response is empty.
I am stumped as to what I am doing wrong.
Here is my code:
import urllib
from urllib.request import urlopen
import json
def main():
api(text_preproc())
def text_preproc():
""" Takes misspelled word/phrase,
“t”, and prepares it for
API request
"""
t = "синхрафазатрон в дубне"
text = t.replace(" ", "+")
return text
def diff_api(text):
my_url = "https://speller.yandex.net/services/spellservice.json/checkText?text="
my_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36'}
my_data = {
"text" : text,
"lang" : "ru",
"format" : "plain"}
my_uedata = urllib.parse.urlencode(my_data)
my_edata = my_uedata.encode('ascii')
req = urllib.request.Request(url=my_url, data=my_edata, headers=my_headers)
response = urlopen(req)
data = json.load(response)
print(data)
The response is always an empty array, no matter how I tinker with my request.
Any insight into what I might be doing wrong?

my_uedata has to be a part of the URL you send the request to.
Also, in:
def main():
api(text_preproc())
You call api() but the function is not defined. I've used diff_api().
Try this:
import json
import urllib
from urllib.request import urlopen
def main():
diff_api(text_preproc("синхрафазатрон в дубне"))
def text_preproc(phrase):
""" Takes misspelled word/phrase,
“t”, and prepares it for
API request
"""
return phrase.replace(" ", "+")
def diff_api(text):
my_url = "https://speller.yandex.net/services/spellservice.json/checkText?text="
my_headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36'}
my_data = {
"text": text,
"lang": "ru",
"format": "plain"}
my_uedata = urllib.parse.urlencode(my_data)
req = urllib.request.Request(url=my_url+my_uedata, headers=my_headers)
data = json.load(urlopen(req))
print(data)
main()
Output:
[{'code': 1, 'pos': 5, 'row': 0, 'col': 5, 'len': 14, 'word': 'синхрафазатрон', 's': ['синхрофазотрон', 'синхрофазатрон', 'синхрофазотрона']}]

Related

How to decode an UTF-8 encoded API response

When I send a request to an API:
import requests
url = 'website'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36'
}
response = requests.get(url.strip(), headers=headers, timeout=10)
response.encoding = response.apparent_encoding
print(response.text)
The output is:
0e1\u10e2\u10d4\u10db\u10d0 BOSE,\u10d3\u10d0\u10ec\u10e7\u10d4\u10d1\u10d0-\u10d2\u10d0\u10e9\u10d4\u10e0\u10d4\u10d1\u10d8\u10e1 \u10e1\u10d8\u10e1\u10e2\u10d4\u10db\u10d0,\u10d4\u10da\u10d4\u10e5\u10e2\u10e0\u10dd\u10dc\u10e3\u10da\u10d8 \u10d3\u10d8\u10e4\u10d4\u10e0\u10d4\u10dc\u10ea\u10d8\u10d0\u10da\u10e3\u10e0\u10d8 \u10e1\u10d0\u10d9\u10d4\u10e2\u10d8,\u10eb\u10e0\u10d0\u10d5\u10d8\u10e1 \u10e1\u10d0\u10db\u10e3\u10ee\u10e0\u10e3\u10ed\u10d4 \u10d9\u10dd\u10dc\u10e2\u10e0\u10dd\u10da\u10d8\u10e1 \u10e1\u10d8\u10e1\u10e2\u10d4\u10db\u10d0,\u10ec\u10d4\u10d5\u10d8\u10e1 \u10d9\u10dd\u10dc\u10e2\u10e0\u10dd\u10da\u10d8\u10e1 \u10e1\u10d8\u10e1\u10e2\u10d4\u10db\u10d0,\u10e1\u10e2\u10d0\u10d1\u10d8\u10da\u10e3\u10e0\u10dd\u10d1\u10d8\u10e1 \u10e1\u10d8\u10e1\u10e2\u10d4\u10db\u10d0,\u10d3\u10d0\u10d1\u10da\u10dd\u10d9\u10d5\u10d8\u10e1 \u10e1\u10d0\u10ec\u10d8\u10dc\u10d0\u10d0\u10e6\u10db\u10d3\u10d4\u10d2\u10dd \u10d3\u10d0\u10db\u10e3\u10ee\u10e0\u10e3\u10ed\u10d4\u10d
How to decode it correctly?
In order to encode or decode from utf8 string you can use :
s = "test"
u = s.encode("utf8")
s = u.decode("utf8")
But your problem is that your string is utf-8 encoded and escaped!
So you will need to un-escape it and then re-interpret it:
s = response.text
r = s.encode('raw_unicode_escape').decode('unicode_escape')
print(r)
# -> '0e1ტემა BOSE,დაწყება-გაჩერების სისტემა,ელექტრონული დიფერენციალური საკეტი,ძრავის სამუხრუჭე კონტროლის სისტემა,წევის კონტროლის სისტემა,სტაბილურობის სისტემა,დაბლოკვის საწინააღმდეგო დამუხრუჭებ'

How to change Json data output in table format

import requests
from pprint import pprint
import pandas as pd
baseurl = "https://www.nseindia.com/"
url = f'https://www.nseindia.com/api/live-analysis-oi-spurts-underlyings'
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, '
'like Gecko) '
'Chrome/80.0.3987.149 Safari/537.36',
'accept-language': 'en,gu;q=0.9,hi;q=0.8', 'accept-encoding': 'gzip, deflate, br'}
session = requests.Session()
request = session.get(baseurl, headers=headers, timeout=30)
cookies = dict(request.cookies)
res = session.get(url, headers=headers, timeout=30, cookies=cookies)
print(res.json())
I tried df = pd.DataFrame(res.json()) but couldn't get data in table format. How to do that Plz. Also how to select few particular columns only in data output instead of all columns.
Try this :
import json
import codecs
df = pd.DataFrame(json.loads(codecs.decode(bytes(res.text, 'utf-8'), 'utf-8-sig'))['data'])
And to select a specific columns, you can use :
mini_df = df[['symbol', 'latestOI', 'prevOI', 'changeInOI', 'avgInOI']]
>>> print(mini_df)

I was trying to scrape some data from website, but cant understand how the webpages calling functions to get the data?

I was trying to scrape for option data table. The website has a drop-down menu to select the expiration.
I can see that the page is making API calls like this
to fetch data. However, if I use the link to send a request from python i get nothing, why is that ?? How to correct this ?
import requests ##### to connect to web for data
import pandas as pd
import numpy as np
import datetime as dt
from pathlib import Path
#from io import BytesIO
#from zipfile import ZipFile
date=dt.datetime.today().strftime("%d%m%Y")
#date='09072021'
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) chrome/80.0.3987.132 Safari/537.36','Accept-Language': 'en-US,en;q=0.9','Accept-Encoding': 'gzip, deflate'}
x= True
url="https://www.barchart.com/proxies/core-api/v1/options/get?baseSymbol=%24SPX&fields=symbol%2CbaseSymbol%2CstrikePrice%2Cmoneyness%2CbidPrice%2Cmidpoint%2CaskPrice%2ClastPrice%2CpriceChange%2CpercentChange%2Cvolume%2CopenInterest%2CvolumeOpenInterestRatio%2Cvolatility%2CoptionType%2CdaysToExpiration%2CexpirationDate%2CtradeTime%2CweightedImpliedVolatility%2ChistoricVolatility20d%2CsymbolCode%2CsymbolType&groupBy=optionType&expirationDate=nearest&meta=field.shortName%2Cexpirations%2Cfield.description&orderBy=strikePrice&orderDir=asc&raw=1"
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:81.0) Gecko/20100101 Firefox/81.0'}
with requests.Session() as req:
req.headers.update(headers)
response = req.get(url).json()
You can try that code
import requests
import json
from urllib.parse import unquote
# create session
session = requests.Session()
# set user agent to avoid cloudflare
session.headers['User-Agent'] = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.164 Safari/537.36'
# create url
url = 'https://www.barchart.com/proxies/core-api/v1/options/get'
# create params for start page
params = {
'view': 'stacked',
'expiration': '2021-07-26-w'
}
# load first page
session.get(
'https://www.barchart.com/stocks/quotes/$SPX/options', params=params
)
# create params for AJAX
params_ajax = {
'baseSymbol': '$SPX',
'fields': ','.join(
[
'symbol', 'baseSymbol', 'strikePrice', 'moneyness', 'bidPrice',
'midpoint', 'askPrice', 'lastPrice', 'priceChange',
'percentChange', 'volume', 'openInterest',
'volumeOpenInterestRatio', 'volatility', 'optionType',
'daysToExpiration', 'expirationDate', 'tradeTime',
'weightedImpliedVolatility', 'historicVolatility20d',
'symbolCode', 'symbolType'
]
),
'groupBy': 'optionType',
'expirationDate': '2021-07-28',
'meta': ','.join(['field.shortName', 'expirations', 'field.description']),
'orderBy': 'strikePrice',
'orderDir': 'asc',
'expirationType': 'weekly',
'raw': 1
}
# change headers
session.headers['Accept'] = 'application/json'
session.headers['X-XSRF-TOKEN'] = unquote(session.cookies['XSRF-TOKEN'])
# get result
result = session.get(url, params=params_ajax).json()
print(json.dumps(result, indent=4))
{
"count": 2,
"total": 458,
"data": {
"Call": [
{
"symbol": "$SPX|20210726|1200.00WC",
"baseSymbol": "$SPX",
"strikePrice": "1,200.00",
"moneyness": "+72.24%",
"bidPrice": "3,125.50",
"midpoint": "3,126.95",
"askPrice": "3,128.40",
"lastPrice": "0.00",
"priceChange": "0.00",
"percentChange": "unch",
"volume": "0",
"openInterest": "0",
"volumeOpenInterestRatio": "0.00",
"volatility": "438.24%",
"optionType": "Call",
"daysToExpiration": "6",
"expirationDate": "07\/26\/21",
"tradeTime": "N\/A",
"weightedImpliedVolatility": "14.66%",
"historicVolatility20d": "10.79%",
"symbolType": "Call",
...
P.S. if I help you - please mark answer as correct

Webscraping website - can't print price - api & json i think

having trouble with this website to print price, i think i'm close but getting errors.
please help, tx
"
{'statusDetails': {'state': 'FAILURE', 'errorCode': 'SYS-3003', 'correlationid': 'rrt-5636881267628447407-b-gsy1-18837-18822238-1', 'description': 'Invalid key identifier or token'}}
"
code:
import requests
import json
s = requests.Session()
url = 'https://www.bunnings.com.au/ozito-pxc-2-x-18v-cordless-line-trimmer-skin-only_p0167719'
header = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36'}
resp = s.get(url,headers=header)
api_url = f'https://api.prod.bunnings.com.au/v1/products/0167719/fulfillment/6400/radius/100000?isToggled=true'
price_resp = s.get(api_url,headers=header).json()
print(price_resp)
#price = price_resp['data']['price']['value']
#print(price)

python handling incoming from url

i'm sending below request to URL and get the response from it
import requests
url = "http://localhost/dat.txt"
payload = {}
headers = {
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36',
'Sec-Fetch-Dest': 'document',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9'
}
response = requests.request("GET", url, headers=headers, data = payload)
print(response.text.encode('utf8'))
Below is the response data that I get -
mohame4|nameon#example.com|passsd!##$4|head,customer|manager,devlop
mohame3|nameon3#example.com|passsd!##$4|head,customer|manager,devlop
I do this with the data
for i in response.text:
try:
i = i.strip().split('|')
userna = i[0]
emaill = i[1]
passd = i[2]
rol1= i[3]
rol2= i[4]
except:
pass
How can I make rol1 as
this head,customer
to
rol1=['head','customer']
Simply split the string you're getting:
rol1 = i[3].split(',')
You could do this more... gracefully, though, using iterable unpacking:
username, email, password, rol1, rol2 = i.strip().split('|')
rol1 = rol1.split(',')
thanks for all helper special #ForceBru
import requests
url = "http://localhost/dat.txt"
response = requests.request("GET", url)
print(response.text)
dat = str(response.text).split('\n')
for i in dat:
i = i.strip().split('|')
print(i[3].split(","))
# TODO: write code...

Categories

Resources