I am trying to harness a Russian language spellcheck API, Yandex.Speller.
The request seems to work fine in my browser. However, when I use a python script, the response is empty.
I am stumped as to what I am doing wrong.
Here is my code:
import urllib
from urllib.request import urlopen
import json
def main():
api(text_preproc())
def text_preproc():
""" Takes misspelled word/phrase,
“t”, and prepares it for
API request
"""
t = "синхрафазатрон в дубне"
text = t.replace(" ", "+")
return text
def diff_api(text):
my_url = "https://speller.yandex.net/services/spellservice.json/checkText?text="
my_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36'}
my_data = {
"text" : text,
"lang" : "ru",
"format" : "plain"}
my_uedata = urllib.parse.urlencode(my_data)
my_edata = my_uedata.encode('ascii')
req = urllib.request.Request(url=my_url, data=my_edata, headers=my_headers)
response = urlopen(req)
data = json.load(response)
print(data)
The response is always an empty array, no matter how I tinker with my request.
Any insight into what I might be doing wrong?
my_uedata has to be a part of the URL you send the request to.
Also, in:
def main():
api(text_preproc())
You call api() but the function is not defined. I've used diff_api().
Try this:
import json
import urllib
from urllib.request import urlopen
def main():
diff_api(text_preproc("синхрафазатрон в дубне"))
def text_preproc(phrase):
""" Takes misspelled word/phrase,
“t”, and prepares it for
API request
"""
return phrase.replace(" ", "+")
def diff_api(text):
my_url = "https://speller.yandex.net/services/spellservice.json/checkText?text="
my_headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36'}
my_data = {
"text": text,
"lang": "ru",
"format": "plain"}
my_uedata = urllib.parse.urlencode(my_data)
req = urllib.request.Request(url=my_url+my_uedata, headers=my_headers)
data = json.load(urlopen(req))
print(data)
main()
Output:
[{'code': 1, 'pos': 5, 'row': 0, 'col': 5, 'len': 14, 'word': 'синхрафазатрон', 's': ['синхрофазотрон', 'синхрофазатрон', 'синхрофазотрона']}]
Related
When I send a request to an API:
import requests
url = 'website'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36'
}
response = requests.get(url.strip(), headers=headers, timeout=10)
response.encoding = response.apparent_encoding
print(response.text)
The output is:
0e1\u10e2\u10d4\u10db\u10d0 BOSE,\u10d3\u10d0\u10ec\u10e7\u10d4\u10d1\u10d0-\u10d2\u10d0\u10e9\u10d4\u10e0\u10d4\u10d1\u10d8\u10e1 \u10e1\u10d8\u10e1\u10e2\u10d4\u10db\u10d0,\u10d4\u10da\u10d4\u10e5\u10e2\u10e0\u10dd\u10dc\u10e3\u10da\u10d8 \u10d3\u10d8\u10e4\u10d4\u10e0\u10d4\u10dc\u10ea\u10d8\u10d0\u10da\u10e3\u10e0\u10d8 \u10e1\u10d0\u10d9\u10d4\u10e2\u10d8,\u10eb\u10e0\u10d0\u10d5\u10d8\u10e1 \u10e1\u10d0\u10db\u10e3\u10ee\u10e0\u10e3\u10ed\u10d4 \u10d9\u10dd\u10dc\u10e2\u10e0\u10dd\u10da\u10d8\u10e1 \u10e1\u10d8\u10e1\u10e2\u10d4\u10db\u10d0,\u10ec\u10d4\u10d5\u10d8\u10e1 \u10d9\u10dd\u10dc\u10e2\u10e0\u10dd\u10da\u10d8\u10e1 \u10e1\u10d8\u10e1\u10e2\u10d4\u10db\u10d0,\u10e1\u10e2\u10d0\u10d1\u10d8\u10da\u10e3\u10e0\u10dd\u10d1\u10d8\u10e1 \u10e1\u10d8\u10e1\u10e2\u10d4\u10db\u10d0,\u10d3\u10d0\u10d1\u10da\u10dd\u10d9\u10d5\u10d8\u10e1 \u10e1\u10d0\u10ec\u10d8\u10dc\u10d0\u10d0\u10e6\u10db\u10d3\u10d4\u10d2\u10dd \u10d3\u10d0\u10db\u10e3\u10ee\u10e0\u10e3\u10ed\u10d4\u10d
How to decode it correctly?
In order to encode or decode from utf8 string you can use :
s = "test"
u = s.encode("utf8")
s = u.decode("utf8")
But your problem is that your string is utf-8 encoded and escaped!
So you will need to un-escape it and then re-interpret it:
s = response.text
r = s.encode('raw_unicode_escape').decode('unicode_escape')
print(r)
# -> '0e1ტემა BOSE,დაწყება-გაჩერების სისტემა,ელექტრონული დიფერენციალური საკეტი,ძრავის სამუხრუჭე კონტროლის სისტემა,წევის კონტროლის სისტემა,სტაბილურობის სისტემა,დაბლოკვის საწინააღმდეგო დამუხრუჭებ'
import requests
from pprint import pprint
import pandas as pd
baseurl = "https://www.nseindia.com/"
url = f'https://www.nseindia.com/api/live-analysis-oi-spurts-underlyings'
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, '
'like Gecko) '
'Chrome/80.0.3987.149 Safari/537.36',
'accept-language': 'en,gu;q=0.9,hi;q=0.8', 'accept-encoding': 'gzip, deflate, br'}
session = requests.Session()
request = session.get(baseurl, headers=headers, timeout=30)
cookies = dict(request.cookies)
res = session.get(url, headers=headers, timeout=30, cookies=cookies)
print(res.json())
I tried df = pd.DataFrame(res.json()) but couldn't get data in table format. How to do that Plz. Also how to select few particular columns only in data output instead of all columns.
Try this :
import json
import codecs
df = pd.DataFrame(json.loads(codecs.decode(bytes(res.text, 'utf-8'), 'utf-8-sig'))['data'])
And to select a specific columns, you can use :
mini_df = df[['symbol', 'latestOI', 'prevOI', 'changeInOI', 'avgInOI']]
>>> print(mini_df)
I was trying to scrape for option data table. The website has a drop-down menu to select the expiration.
I can see that the page is making API calls like this
to fetch data. However, if I use the link to send a request from python i get nothing, why is that ?? How to correct this ?
import requests ##### to connect to web for data
import pandas as pd
import numpy as np
import datetime as dt
from pathlib import Path
#from io import BytesIO
#from zipfile import ZipFile
date=dt.datetime.today().strftime("%d%m%Y")
#date='09072021'
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) chrome/80.0.3987.132 Safari/537.36','Accept-Language': 'en-US,en;q=0.9','Accept-Encoding': 'gzip, deflate'}
x= True
url="https://www.barchart.com/proxies/core-api/v1/options/get?baseSymbol=%24SPX&fields=symbol%2CbaseSymbol%2CstrikePrice%2Cmoneyness%2CbidPrice%2Cmidpoint%2CaskPrice%2ClastPrice%2CpriceChange%2CpercentChange%2Cvolume%2CopenInterest%2CvolumeOpenInterestRatio%2Cvolatility%2CoptionType%2CdaysToExpiration%2CexpirationDate%2CtradeTime%2CweightedImpliedVolatility%2ChistoricVolatility20d%2CsymbolCode%2CsymbolType&groupBy=optionType&expirationDate=nearest&meta=field.shortName%2Cexpirations%2Cfield.description&orderBy=strikePrice&orderDir=asc&raw=1"
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:81.0) Gecko/20100101 Firefox/81.0'}
with requests.Session() as req:
req.headers.update(headers)
response = req.get(url).json()
You can try that code
import requests
import json
from urllib.parse import unquote
# create session
session = requests.Session()
# set user agent to avoid cloudflare
session.headers['User-Agent'] = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.164 Safari/537.36'
# create url
url = 'https://www.barchart.com/proxies/core-api/v1/options/get'
# create params for start page
params = {
'view': 'stacked',
'expiration': '2021-07-26-w'
}
# load first page
session.get(
'https://www.barchart.com/stocks/quotes/$SPX/options', params=params
)
# create params for AJAX
params_ajax = {
'baseSymbol': '$SPX',
'fields': ','.join(
[
'symbol', 'baseSymbol', 'strikePrice', 'moneyness', 'bidPrice',
'midpoint', 'askPrice', 'lastPrice', 'priceChange',
'percentChange', 'volume', 'openInterest',
'volumeOpenInterestRatio', 'volatility', 'optionType',
'daysToExpiration', 'expirationDate', 'tradeTime',
'weightedImpliedVolatility', 'historicVolatility20d',
'symbolCode', 'symbolType'
]
),
'groupBy': 'optionType',
'expirationDate': '2021-07-28',
'meta': ','.join(['field.shortName', 'expirations', 'field.description']),
'orderBy': 'strikePrice',
'orderDir': 'asc',
'expirationType': 'weekly',
'raw': 1
}
# change headers
session.headers['Accept'] = 'application/json'
session.headers['X-XSRF-TOKEN'] = unquote(session.cookies['XSRF-TOKEN'])
# get result
result = session.get(url, params=params_ajax).json()
print(json.dumps(result, indent=4))
{
"count": 2,
"total": 458,
"data": {
"Call": [
{
"symbol": "$SPX|20210726|1200.00WC",
"baseSymbol": "$SPX",
"strikePrice": "1,200.00",
"moneyness": "+72.24%",
"bidPrice": "3,125.50",
"midpoint": "3,126.95",
"askPrice": "3,128.40",
"lastPrice": "0.00",
"priceChange": "0.00",
"percentChange": "unch",
"volume": "0",
"openInterest": "0",
"volumeOpenInterestRatio": "0.00",
"volatility": "438.24%",
"optionType": "Call",
"daysToExpiration": "6",
"expirationDate": "07\/26\/21",
"tradeTime": "N\/A",
"weightedImpliedVolatility": "14.66%",
"historicVolatility20d": "10.79%",
"symbolType": "Call",
...
P.S. if I help you - please mark answer as correct
having trouble with this website to print price, i think i'm close but getting errors.
please help, tx
"
{'statusDetails': {'state': 'FAILURE', 'errorCode': 'SYS-3003', 'correlationid': 'rrt-5636881267628447407-b-gsy1-18837-18822238-1', 'description': 'Invalid key identifier or token'}}
"
code:
import requests
import json
s = requests.Session()
url = 'https://www.bunnings.com.au/ozito-pxc-2-x-18v-cordless-line-trimmer-skin-only_p0167719'
header = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36'}
resp = s.get(url,headers=header)
api_url = f'https://api.prod.bunnings.com.au/v1/products/0167719/fulfillment/6400/radius/100000?isToggled=true'
price_resp = s.get(api_url,headers=header).json()
print(price_resp)
#price = price_resp['data']['price']['value']
#print(price)
i'm sending below request to URL and get the response from it
import requests
url = "http://localhost/dat.txt"
payload = {}
headers = {
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36',
'Sec-Fetch-Dest': 'document',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9'
}
response = requests.request("GET", url, headers=headers, data = payload)
print(response.text.encode('utf8'))
Below is the response data that I get -
mohame4|nameon#example.com|passsd!##$4|head,customer|manager,devlop
mohame3|nameon3#example.com|passsd!##$4|head,customer|manager,devlop
I do this with the data
for i in response.text:
try:
i = i.strip().split('|')
userna = i[0]
emaill = i[1]
passd = i[2]
rol1= i[3]
rol2= i[4]
except:
pass
How can I make rol1 as
this head,customer
to
rol1=['head','customer']
Simply split the string you're getting:
rol1 = i[3].split(',')
You could do this more... gracefully, though, using iterable unpacking:
username, email, password, rol1, rol2 = i.strip().split('|')
rol1 = rol1.split(',')
thanks for all helper special #ForceBru
import requests
url = "http://localhost/dat.txt"
response = requests.request("GET", url)
print(response.text)
dat = str(response.text).split('\n')
for i in dat:
i = i.strip().split('|')
print(i[3].split(","))
# TODO: write code...