i have give all the payload data but not working

i have give all the payload data but not working - python

Can someone please help me with proxy.webshare.io/register on creating account please?
i have tried
task_id = captcha.create_task(website_url="https://proxy.webshare.io/register/", website_key="6LeHZ6UUAAAAAKat_YS--O2tj_by3gv3r_l03j9d")
print("Wait for respone")
print(task_id)
respone = captcha.join_task_result(task_id).get("gRecaptchaResponse")
print("Recieved key: " + respone)
source = client.get('https://proxy.webshare.io/register').content
soup = BeautifulSoup(source, 'html.parser')
header = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Content-Type': 'application/x-www-form-urlencoded',
'sec-ch-ua': '"Chromium";v="104", " Not A;Brand";v="99", "Google Chrome";v="104"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-User': '?1',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36'
}
val = soup.find("input", {'id': 'a'}).get("value")
print(val)
if 'csrftoken' in client.cookies:
csrftoken = client.cookies['csrftoken']
else:
csrftoken = client.cookies['csrf']
print(csrftoken)
mail = em_f_n+ "#gmail.com"
passz = pw_f_n
print(mail)
print(passz)
data = {
"csrfmiddlewaretoken": csrftoken,
"next": "",
"a": val,
"email": mail,
"password1": passz,
"g-recaptcha-response": respone
}
r = client.post("https://proxy.webshare.io/register", json=data, headers=header)
print(r.context)
print(r.status_code)
but it just return 200 status code(although it returned all of data above) and didn't work at all please help

Related

how to youtube python a post request to change account to another one?

I'm trying to send a post request to change my YouTube account to another one, but I can't do it.
Below I have attached the code that I wrote, it gives me an error, I can't figure out what the problem is. I also know that if the account is successfully changed, there should be a 303 response.
At the very end of the code there are cookie lines: __Secure-1 PSD and __Secure-1PAPISID, I replaced them with "-" to send them here because they are private. You can take them yourself from chrome cookies
Could you help me?
import requests
import hashlib
import re
import time
class YouTube(object):
base_headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36',
}
origin = 'https://youtube.com'
referer = f'https://youtube.com'
def __init__(self, cookies: dict):
self.__CHANNEL_ID = None
self.__API_KEY = None
self.__SAPISIDHASH = None
self.cookies = cookies
self.session = requests.Session()
self.session.headers.update(self.base_headers)
self.load_cookies()
def load_cookies(self) -> None:
for key, value in self.cookies.items():
self.session.cookies.set(key, value)
def set_data(self) -> None:
response = self.session.get(self.referer)
# print(response.text)
print(self.__API_KEY)
def get_data_of_monetization(self) -> dict:
# s = requests.Session()
#
self.session.headers.update(
{'accept-encoding': 'gzip, deflate, br', 'accept-language': 'ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7',
'sec-ch-ua': '".Not/A)Brand";v="99", "Google Chrome";v="103", "Chromium";v="103"',
'sec-ch-ua-arch': '"x86"', 'sec-ch-ua-bitness': '"64"', 'sec-ch-ua-full-version': '"103.0.5060.134"',
'sec-ch-ua-full-version-list': '".Not/A)Brand";v="99.0.0.0", "Google Chrome";v="103.0.5060.134", "Chromium";v="103.0.5060.134"',
'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"Windows"', 'sec-ch-ua-platform-version': '"10.0.0"',
'sec-fetch-user': '?1', 'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',
'x-client-data': 'CKq1yQEIkbbJAQiltskBCMG2yQEIqZ3KAQio68oBCJahywEI2+/LAQjmucwBCLS6zAEIibvMAQj2u8wBCJi9zAEI8sDMAQiawcwBCLLBzAEIxMHMAQjXwcwBCN/EzAEYq6nKAQ=='})
r = self.session.get('https://www.youtube.com/signin',
params={'action_handle_signin': 'true', 'authuser': '1',
'next': 'https%3A%2F%2Fstudio.youtube.com%2F', 'feature': 'masthead_switcher',
'skip_identity_prompt': 'true'},
headers={
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'cookie': f'__Secure-1PSID={self.cookies["__Secure-1PSID"]}; __Secure-1PAPISID={self.cookies["__Secure-1PAPISID"]}',
'referer': 'https://studio.youtube.com/', 'sec-ch-ua-model': '""', 'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate', 'sec-fetch-site': 'same-site',
'service-worker-navigation-preload': 'true'},
)
print(r.text)
return r.json()
#property
def get_channel_id(self) -> str:
return self.__CHANNEL_ID
#property
def get_api_key(self) -> str:
return self.__API_KEY
#property
def get_sapisidhash(self) -> str:
return self.__SAPISIDHASH
if __name__ == '__main__':
cookie = {'__Secure-1PSID': '-',
'__Secure-1PAPISID': '-'
}
client = YouTube(cookie)
client.set_data()
response = client.get_data_of_monetization()

Scrape multiple pages with json

I am trying to scrape multiple pages with json but they will provide me error
import requests
import json
import pandas as pd
headers = {
'Accept-Language': 'en-GB,en-US;q=0.9,en;q=0.8,pt;q=0.7',
'Connection': 'keep-alive',
'Origin': 'https://www.nationalhardwareshow.com',
'Referer': 'https://www.nationalhardwareshow.com/',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'cross-site',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',
'accept': 'application/json',
'content-type': 'application/x-www-form-urlencoded',
'sec-ch-ua': '".Not/A)Brand";v="99", "Google Chrome";v="103", "Chromium";v="103"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
}
params = {
'x-algolia-agent': 'Algolia for vanilla JavaScript 3.27.1',
'x-algolia-application-id': 'XD0U5M6Y4R',
'x-algolia-api-key': 'd5cd7d4ec26134ff4a34d736a7f9ad47',
}
for i in range(0,4):
data = '{"params":"query=&page={i}&facetFilters=&optionalFilters=%5B%5D"}'
resp = requests.post('https://xd0u5m6y4r-dsn.algolia.net/1/indexes/event-edition-eve-e6b1ae25-5b9f-457b-83b3-335667332366_en-us/query', params=params, headers=headers, data=data).json()
req_json=resp
df = pd.DataFrame(req_json['hits'])
f = pd.DataFrame(df[['name','representedBrands','description']])
print(f)
the error :
Traceback (most recent call last):
File "e:\ScriptScraping\Extract data from json\uk.py", line 31, in <module>
df = pd.DataFrame(req_json['hits']) KeyError: 'hits'

Try to concatenate the variable i with data parameter
import requests
import json
import pandas as pd
headers = {
'Accept-Language': 'en-GB,en-US;q=0.9,en;q=0.8,pt;q=0.7',
'Connection': 'keep-alive',
'Origin': 'https://www.nationalhardwareshow.com',
'Referer': 'https://www.nationalhardwareshow.com/',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'cross-site',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',
'accept': 'application/json',
'content-type': 'application/x-www-form-urlencoded',
'sec-ch-ua': '".Not/A)Brand";v="99", "Google Chrome";v="103", "Chromium";v="103"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"'
}
params = {
'x-algolia-agent': 'Algolia for vanilla JavaScript 3.27.1',
'x-algolia-application-id': 'XD0U5M6Y4R',
'x-algolia-api-key': 'd5cd7d4ec26134ff4a34d736a7f9ad47'
}
lst=[]
for i in range(0,4):
data = '{"params":"query=&page='+str(i)+'&facetFilters=&optionalFilters=%5B%5D"}'
resp = requests.post('https://xd0u5m6y4r-dsn.algolia.net/1/indexes/event-edition-eve-e6b1ae25-5b9f-457b-83b3-335667332366_en-us/query', params=params, headers=headers, data=data).json()
req_json=resp
df = pd.DataFrame(req_json['hits'])
f = pd.DataFrame(df[['name','representedBrands','description']])
lst.append(f)
#print(f)
d=pd.concat(lst)
print(d)

It is returning status_code 400 as the request is bad. You are sending wrongly formatted data. Change:
data = '{"params":"query=&page={i}&facetFilters=&optionalFilters=%5B%5D"}'
To
data = '{"params":"query=&page='+str(i)+'&facetFilters=&optionalFilters=%5B%5D"}'
For it to work. Hope I could help.

python request trying to login session barchart

I am trying to create a session login into barchart with no luck using requests but i am not sure what i'm missing.
I am always geting 500 error
code:
import requests
def main():
site_url = "https://www.barchart.com/login"
payload = {
'email': 'user',
'password': 'pass',
}
headers = {
'authority': 'www.barchart.com',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'accept-language': 'es-ES,es;q=0.9,en;q=0.8,gl;q=0.7',
'cache-control': 'no-cache',
'cookie': '_gcl_au=1.1.1986418753.1652987508; _fbp=fb.1.1652987508275.1121978397; _ga=GA1.2.581266708.1652987508; __gads=ID=4850c10bd629ae1e-227a8d8bb6d30042:T=1652987552:RT=1652987552:S=ALNI_MYiYqDwr6eWdC-6Q67HlsmfGR9TFQ; _admrla=2.2-8105254c8da36a72-ab5313fc-d7a7-11ec-8803-06c18b5dfeba; pubcv=%7B%7D; tcf2cookie=CPZN1UAPZN1UAAJAEBENCQCsAP_AAEPAACiQIxNd_X__bX9j-_5_bft0eY1P9_r3_-QzjhfNs-8F3L_W_L0Xw2E7NF36pq4KuR4Eu3LBIQNlHMHUTUmwaokVrzHsak2cpyNKJ7LEknMZO2dYGH9Pn9lDuYKY7_5___bx3D-v_t_-39T378Xf3_d5_2_--vCfV599jbn9fV_7_9nP___9v-_8_________wRgAJMNS8gC7MscGTSMIoUQIwrCQqgUAFFAMLRFYAODgp2VgEuoIWACAVIRgRAgxBRgwCAAQSAJCIgJACwQCIAiAQAAgARAIQAETAILACwMAgAFANCxACgAECQgyICI5TAgKkSiglsrEEoK9jTCAOs8AKBRGRUACJJIQSAgJCwcxwBICXiyQNMUL5ACMEKAAAAA.f_gACHgAAAAA; webinar107WebinarClosed=true; _gid=GA1.2.1453697314.1653243141; _hjid=e2d759d4-08f2-4e97-b4bf-15667795cb0e; _hjSessionUser_2563157=eyJpZCI6ImMyZWQyMTQ2LTZmZGItNTViNi1hMzljLTlkODIyOTAyYTlkYSIsImNyZWF0ZWQiOjE2NTMyNDMxNTkwMTAsImV4aXN0aW5nIjp0cnVlfQ==; __aaxsc=2; _hjSession_2563157=eyJpZCI6Ijc2MDE2ZDE3LTRlN2MtNGFiNS05OTgzLTRjNjY5YTg3ODM0YyIsImNyZWF0ZWQiOjE2NTMyNTA4MDE3MzQsImluU2FtcGxlIjpmYWxzZX0=; _hjAbsoluteSessionInProgress=0; _hjIncludedInPageviewSample=1; _hjIncludedInSessionSample=0; market=eyJpdiI6ImdJTllrNEpHMnF6U3B3THRoQ0dZTkE9PSIsInZhbHVlIjoibjkwM3lrYkNORXU0cDNhV25VUHNYUTZ6eFlCOHVQRC9GOEJhM2VJK0RtN21IYjFWQVZMVlRTYXZpZk5idWNLSCIsIm1hYyI6IjE5NmY0MGI3OGNjNjVhZjY5ZWU5N2FkZjY1NWVlYzVjZThmMGM3Mjk0YjljNWEwZjI0YzBjMjQwOThmYTAyN2EifQ%3D%3D; bcFreeUserPageView=0; cto_bundle=JsBghF84Rm9rTThnUWNFdEM0blV1Q1lFUUVha3huMEY1NkpnZFVjblpsNyUyRk8zUFBZNUM2dGp1Q054bkElMkYyR09aaUtRckpUMHViJTJCQjJ2cEg0OGt2c3B6QllxWUczeWRmZEJVUnUlMkZ6MnRrT0xvakxnWXIxeGJtRUdYMXJVVFglMkZ3RWJDSUFEeFFqZDZIN3pSemtZRjZrdndmazNnJTNEJTNE; laravel_token=eyJpdiI6IkNjWW9EUkI1OGdkT1duRVNEMlU1U2c9PSIsInZhbHVlIjoiRXMyRXlsRnpzbFlvdkpRL0RSU0lPeC85Zkx0MGJkdi9mczQ1Nk9WUFlNbGorTlVEUDBGd0VhTysrTHhUWGxRNTVaa3lzMVFOZ0pMRjFIYklFQW9TUlBFT0pZN1BjOUU0TldYVEZjbmZBcFBBWWViRVFHTzFVazFHMHZ0bUlSbEhndzdRNEs0L0xMUjc3cHlKL3FEdGJuTDN5VktaRlVhTTdtYlpLVWM3TDlpWGlBWEtKa3p4Rjh4Ty9zOXVtSGF5djRTVHpPQWZZRFNQQTlpNGNnQURNclpucjlVMG8xaUc0U2NRejdjU25Td0hIb3pLNkxwS1IzcG9KU3p2TUYybmMyajM5cmFsWlhOM0xhQS9tR0xDNktPdHcxK0lKR0JHNE5qUjZPQnlTZUNndkFvQ0l6QjhaVWxlbEJoVWVOKzAiLCJtYWMiOiJhMDc0ZmVmN2I4NGMxNjE2ZWRiM2IwMjY3YmNhNzY3MjZjNjA4ODU5MTQ0MmY2YjMwNWVjZDA2NDIwM2E1MTljIn0%3D; XSRF-TOKEN=eyJpdiI6IlUzSzRkTExjZElxY2FGNGlCVWlNQ3c9PSIsInZhbHVlIjoiM2Y3QllmVGViMEJEOEdjOHNXR1lBdHd0enQxRnp4YlJmVXBiTDhSNjZNYTRYK3lLajVESWg0QTlxcHpLS2pHdDBKYjkrWEV0eHcvQzE0U2J2QnpwR2dQMWVVN1RlNTlhVlJ1M2FlSjhBd2hRd09zVC9YbG8rN1ZVcGQ1OWkwNXMiLCJtYWMiOiI2ZTkzOWMwY2E4OGQyYmU0ZTI0NDc3NjM0NDhmZjAwOGFjYzhiMWQxMGU0ZTdmMDM5YmJmMmUwZmViNDFiODRlIn0%3D; laravel_session=eyJpdiI6Ijk4R1Z1c0U4L0R5cTU0TjBFTWdqY3c9PSIsInZhbHVlIjoiR1lHTVI0c1JPblc2VENtY0thMy9ROXhMYXE2VDZwU3BKNGhpZUUxc2QzOFJySlNhTWVwWnh1RzdTaitDYUdpRXlIckszV0NEL1JCbkpXN3YzamtOWEIvUFJYZGhDMzFVaysrSUJvL3NTQ2NaYndiQjIwbWIxcVZmTGR4Uk5lZVoiLCJtYWMiOiI1YjczYzczZmIyMTQ4NzMxODIzMGIwZjk2MTdkNzU2YjU0N2NjZjkxMDViOGI1YTIzMzA5ZGY1OGY1OWQyYjM0In0%3D; _gat_UA-2009749-51=1; IC_ViewCounter_www.barchart.com=10; aasd=15%7C1653252462025; _awl=2.1653252699.0.5-06f6ddfaf139e746127acfcfca00de3c-6763652d6575726f70652d7765737431-0',
'origin': 'https://www.barchart.com',
'pragma': 'no-cache',
'referer': 'https://www.barchart.com/',
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="101", "Google Chrome";v="101"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'same-origin',
'sec-fetch-user': '?1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36',
}
with requests.Session() as req:
req.headers.update(headers)
res=req.post(site_url,data=payload)
print(res)
if __name__ == '__main__':
main()

Web scraping - his application is out of date, please click the refresh button on your browser return message

I would like to scrape some data from the following web site: https://oss.uredjenazemlja.hr/public/lrServices.jsp?action=publicLdbExtract
Steps I would like to automate are:
Choose "Opcinski sud/ZK odjel". For example choose "Zemljišnoknjižni odjel Benkovac".
Choose "Glavna knjiga". For example choose "BENKOVAC"
Enter "Broj kat. čestice:". For example, enter 576/2.
Select "Da" in "Povijesni pregled" (the last row, leave "Broj ZK uloska empty").
Click "Pregledaj" and solve the captcha.
Scrape html that appers.
I have tried to follow above steps using plain requests in python by following network, after opening inspector in the web browser.
There are lots of requests on the page. I will divide my code in several steps:
Start session and make requests that on the start of the page
import requests
import re
import shutil
from twocaptcha import TwoCaptcha
import pandas as pd
import numpy as np
import os
from pathlib import Path
import json
import uuid
# start session
url = 'https://oss.uredjenazemlja.hr/public/lrServices.jsp?action=publicLdbExtract'
session = requests.Session()
session.get(url)
jid = session.cookies.get_dict()['JSESSIONID']
# some requests on the start of the page (probabbly redundandt)
headers = {
'Referer': 'https://oss.uredjenazemlja.hr/public/lrServices.jsp?action=publicLdbExtract',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36',
}
session.get("https://oss.uredjenazemlja.hr/public/js/libs/modernizr-2.5.3.min.js", headers = headers) #
session.get("https://oss.uredjenazemlja.hr/public/js/libs/jquery-1.7.1.min.js", headers = headers) #
session.get("https://oss.uredjenazemlja.hr/public/js/script.js", headers = headers) # script.json
# no cache json
headers = {
'Cookie': 'ossprivatelang=hr_HR; gxtTheme=m%3Aid%7Cs%3Agray%2Cfile%7Cs%3Axtheme-gray.css; JSESSIONID=' + jid,
"Connection": "keep-alive",
'Host': 'oss.uredjenazemlja.hr',
'Referer': 'https://oss.uredjenazemlja.hr/public/lrServices.jsp?action=publicLdbExtract',
"sec-ch-ua": '"Google Chrome";v="89", "Chromium";v="89", ";Not A Brand";v="99"',
"sec-ch-ua-mobile": "?0",
"Sec-Fetch-Dest": "script",
"Sec-Fetch-Mode": "no-cors",
"Sec-Fetch-Site": "same-origin",
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'
}
res = session.get('https://oss.uredjenazemlja.hr/public/gwt/hr.ericsson.oss.ui.pia.OssPiaModule.nocache.js', headers = headers)
cache_html = re.findall(r'bc=\'(.*\.cache.html)\',C', res.text)[0]
# cache_html = "1F6C776DEF6D55F56C900B938F84D726.cache.html"
# some more requests on the start of the page (probabbly redundandt)
headers = {
'Referer': 'https://oss.uredjenazemlja.hr/public/lrServices.jsp?action=publicLdbExtract',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36',
}
session.get("https://oss.uredjenazemlja.hr/public/gwt/tiny_mce_editor/tiny_mce_src.js", headers = headers) # tiny_mce_src.js
session.get("https://oss.uredjenazemlja.hr/public/gwt/js/common.js", headers = headers)
session.get("https://oss.uredjenazemlja.hr/public/gwt/js/blueimp_tmpl.js", headers = headers) # blueimp_tmpl.js
# cache json
headers = {
"DNT": "1",
'Referer': 'https://oss.uredjenazemlja.hr/public/lrServices.jsp?action=publicLdbExtract',
"sec-ch-ua": '"Google Chrome";v="89", "Chromium";v="89", ";Not A Brand";v="99"',
'sec-ch-ua-mobile': '?0',
'Sec-Fetch-Dest': 'iframe',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'
}
session.get('https://oss.uredjenazemlja.hr/public/gwt/' + cache_html, headers = headers)
Then, I made requests for steps 1 and 2 above:
# commonRPCService opcinski sud 1
headers = {
'Accept': '*/*',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'hr-HR,hr;q=0.9,en-US;q=0.8,en;q=0.7',
'Connection': 'keep-alive',
# 'Content-Length': '166',
'Content-Type': 'text/x-gwt-rpc; charset=UTF-8',
'Cookie': 'gxtTheme=m%3Aid%7Cs%3Agray%2Cfile%7Cs%3Axtheme-gray.css; ossprivatelang=hr_HR; __utma=79801043.802441445.1616788486.1616788486.1616788486.1; __utmz=79801043.1616788486.1.1.utmcsr=google|utmccn=(organic)|utmcmd=organic|utmctr=(not%20provided); x-auto-31=m%3Acollapsed%7Cb%3Atrue; JSESSIONID=' + jid,
"DNT": "1",
'Host': 'oss.uredjenazemlja.hr',
'Origin': 'https://oss.uredjenazemlja.hr',
'Referer': 'https://oss.uredjenazemlja.hr/public/gwt/' + cache_html,
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'
}
payload = '5|0|4|https://oss.uredjenazemlja.hr/public/gwt/|957F3F03E95E97ABBDE314DFFCCEF4BC|hr.ericsson.oss.ui.common.client.core.rpc.ICommonRPCService|getMainBook|1|2|3|4|0|'
res = session.post(
'https://oss.uredjenazemlja.hr/rpc/commonRPCService',
headers = headers,
data=payload
)
print(res.text)
# commonRPCService opcinski sud 2
payload = '5|0|18|https://oss.uredjenazemlja.hr/public/gwt/|957F3F03E95E97ABBDE314DFFCCEF4BC|hr.ericsson.oss.ui.common.client.core.rpc.ICommonRPCService|getLrInstitutions|com.extjs.gxt.ui.client.data.BaseModel|hr.ericsson.oss.ui.common.client.core.data.RpcModel/2891266824|dirty|java.lang.Boolean/476441737|new|deleted|resourceCode|java.lang.Integer/3438268394|elementSelected|class|java.lang.String/2004016611|hr.ericsson.jis.domain.admin.Institution|name||1|2|3|4|1|5|6|7|7|8|0|9|-2|10|-2|11|12|0|13|-2|14|15|16|17|15|18|'
res = session.post(
'https://oss.uredjenazemlja.hr/rpc/commonRPCService',
data=payload,
headers=headers
)
# print(res.text)
# commonRPCService glavna knjiga 1
payload = '5|0|4|https://oss.uredjenazemlja.hr/public/gwt/|957F3F03E95E97ABBDE314DFFCCEF4BC|hr.ericsson.oss.ui.common.client.core.rpc.ICommonRPCService|getMainBook|1|2|3|4|0|'
res = session.post(
'https://oss.uredjenazemlja.hr/rpc/commonRPCService',
data=payload,
headers=headers
)
print(res.text)
# commonRPCService glavna knjiga 2
payload = ('5|0|34|https://oss.uredjenazemlja.hr/public/gwt/|957F3F03E95E97ABBDE314DFFCCEF4BC|hr.ericsson.oss.ui.common.client.core.rpc.ICommonRPCService|getMainBooks|com.extjs.gxt.ui.client.data.BaseModel|hr.ericsson.oss.ui.common.client.core.data.RpcModel/2891266824|dirty|java.lang.Boolean/476441737|new|deleted|resourceCode|java.lang.Integer/3438268394|elementSelected|cadastralMunicipality|class|java.lang.String/2004016611|hr.ericsson.jis.domain.admin.CadastralMunicipality|hr.ericsson.jis.domain.admin.MainBook|institution|institutionId|parentInstitution|name|Općinski sud u Zadru|hr.ericsson.jis.domain.admin.Institution|institutionType|institutionTypeId|hr.ericsson.jis.domain.admin.InstitutionType|source|superviseInstitutionId|Zemljišnoknjižni odjel Benkovac|place|BENKOVAC|preconditionsRequired||1|2|3|4|1|5|6|10|7|8|0|9|-2|10|-2|11|12|0|13|-2|14|6|1|15|16|17|15|16|18|19|6|13|7|-2|9|-2|20|12|500|21|6|8|7|-2|9|-2|10|-2|20|12|605|11|12|0|13|-2|22|16|23|15|16|24|25|6|7|7|-2|9|-2|10|-2|26|12|14|11|-11|13|-2|15|16|27|28|12|1|10|-2|29|-10|11|-11|13|-2|22|16|30|31|16|32|15|-13|33|-2|22|16|34|').encode("utf-8")
res = session.post(
'https://oss.uredjenazemlja.hr/rpc/commonRPCService',
data=payload,
headers=headers
)
Than I solve the captcha:
# some captcha post
payload = ('5|0|4|https://oss.uredjenazemlja.hr/public/gwt/|957F3F03E95E97ABBDE314DFFCCEF4BC|hr.ericsson.oss.ui.common.client.core.rpc.ICommonRPCService|isCaptchaDisabled|1|2|3|4|0|').encode('utf-8')
res = session.post(
'https://oss.uredjenazemlja.hr/rpc/commonRPCService',
data=payload,
headers=headers
)
print(res.text)
# get and save captcha
TWO_CAPTCHA_APY_KEY = "myapikey"
solver = TwoCaptcha(TWO_CAPTCHA_APY_KEY)
save_path = 'D:/zkrh/captchas'
p = session.get('https://oss.uredjenazemlja.hr/servlets/kaptcha.jpg?1617088523212',
headers=headers,
stream=True)
captcha_path = os.path.join(Path(save_path), 'captcha' + ".jpg")
with open(captcha_path, 'wb') as out_file:
shutil.copyfileobj(p.raw, out_file)
# solve captcha
result = solver.normal(captcha_path, minLength=5, maxLength=5)
payload = ('5|0|6|https://oss.uredjenazemlja.hr/public/gwt/|957F3F03E95E97ABBDE314DFFCCEF4BC|hr.ericsson.oss.ui.common.client.core.rpc.ICommonRPCService|validateCaptcha|java.lang.String|' +
result['code'] + '|1|2|3|4|1|5|6|').encode('utf-8')
res = requests.post(
'https://oss.uredjenazemlja.hr/rpc/commonRPCService',
data=payload,
headers=headers
)
if res.text.startswith("//OK"):
os.rename(captcha_path, os.path.join(Path(save_path), result['code'] + ".jpg"))
else:
print("Kriva captcha. Rijesi!")
Now, here is the most important request and I can't get the right output from it. It should return lots of numbers where the most important number is one with 7 digits (\d{7}. the should be 1 or more of such numbers). I can use that number in the last step, to get html Here is my try:
payload = ('5|0|40|https://oss.uredjenazemlja.hr/public/gwt/|0EAC9F40996251FDB21FF254E1600E83|hr.ericsson.oss.ui.pia.client.rpc.IOssPublicRPCService|getLrUnitsByMainBookAndParcel|com.extjs.gxt.ui.client.data.BaseModel|java.lang.String|hr.ericsson.oss.ui.common.client.core.data.RpcModel/2891266824|date|java.sql.Date/3996530531|dirty|java.lang.Boolean/476441737|new|cadastralMunicipality|id|java.lang.Integer/3438268394|class|java.lang.String/2004016611|hr.ericsson.jis.domain.admin.CadastralMunicipality|cadastralMunicipalityId|source|creationDate|formatedName|BENKOVAC|userId|cadInstitution|deleted|institutionId|resourceCode|elementSelected|name|Odjel za katastar nekretnina Benkovac|hr.ericsson.jis.domain.admin.Institution|institution|Zemljišnoknjižni odjel Benkovac|place|sidMainBook|java.lang.Long/4227064769|hr.ericsson.jis.domain.admin.MainBook|status|576/2|1|2|3|4|2|5|6|7|18|8|9|115|10|21|10|11|0|12|-3|13|7|3|14|15|98|16|17|18|19|-5|20|15|1|21|9|116|0|1|22|17|23|24|15|-9999|25|7|8|10|-3|12|-3|26|-3|27|15|117|28|15|0|29|-3|30|17|31|16|17|32|33|7|9|10|-3|12|-3|26|-3|27|15|500|28|-13|29|-3|30|17|34|35|-9|16|-15|26|-3|28|15|0|29|-3|30|-9|36|37|4730091|0|14|15|30857|16|17|38|39|-19|40|').encode('utf-8')
res = session.post(
'https://oss.uredjenazemlja.hr/rpc/commonRPCService',
data=payload,
headers=headers
)
print(res.text)
It returns:
"//EX[2,1,["com.google.gwt.user.client.rpc.IncompatibleRemoteServiceException/3936916533","This application is out of date, please click the refresh button on your browser. ( Blocked attempt to access interface 'hr.ericsson.oss.ui.pia.client.rpc.IOssPublicRPCService', which is not implemented by 'hr.ericsson.oss.ui.common.server.core.rpc.CommonRPCService'; this is either misconfiguration or a hack attempt )"],0,5]"
instead of numbers as I explained before.
Then, in the last step, I should use 7 digit number as lrUnitNumber parameter
# Publicreportservlet
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'hr-HR,hr;q=0.9,en-US;q=0.8,en;q=0.7',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Content-Length': '169',
'Content-Type': 'application/x-www-form-urlencoded',
'Cookie': 'ossprivatelang=hr_HR; gxtTheme=m%3Aid%7Cs%3Agray%2Cfile%7Cs%3Axtheme-gray.css; JSESSIONID=' + jid,
'Host': 'oss.uredjenazemlja.hr',
'Origin': 'https://oss.uredjenazemlja.hr',
'Referer': 'https://oss.uredjenazemlja.hr/public/lrServices.jsp?action=publicLdbExtract',
'Sec-Fetch-Dest': 'iframe',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-User': '?1',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'
}
dataFrom = {
'pia': 1,
'report_type_id': 4,
'report_type_name': 'bzp_izvadak_oss',
'source': 1,
'institutionID': 500,
'mainBookId': 30857,
'lrUnitNumber': 5509665,
'lrunitID': 5799992,
'status': '0,1',
'footer': '',
'export_type': 'html'
}
res = session.post(
'https://oss.uredjenazemlja.hr/servlets/PublicReportServlet',
data=dataFrom,
headers=headers
)
res
I am providing the R ode too. Maybe someone with R and web scraping knowledge can help:
library(httr)
library(rvest)
library(stringr)
library(reticulate)
twocaptcha <- reticulate::import("twocaptcha")
# captcha python library
TWO_CAPTCHA_APY_KEY = ".."
solver = twocaptcha$TwoCaptcha(TWO_CAPTCHA_APY_KEY)
#
url = 'https://oss.uredjenazemlja.hr/public/lrServices.jsp?action=publicLdbExtract'
session = GET(url)
jid <- cookies(session)$value
headers_cache = c(
'Referer'= 'https://oss.uredjenazemlja.hr/public/lrServices.jsp?action=publicLdbExtract',
'User-Agent'= 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'
)
session <- rvest:::request_GET(content(session), 'https://oss.uredjenazemlja.hr/public/gwt/hr.ericsson.oss.ui.pia.OssPiaModule.nocache.js',
add_headers(headers_cache))
cache_html <- str_extract(session$response, "bc=\\'(.*\\.cache.html)\\',C")
cache_html <- gsub(".*=\\'|\\'.C", "", cache_html)
headers_cache = c(
'Referer'= 'https://oss.uredjenazemlja.hr/public/lrServices.jsp?action=publicLdbExtract',
'User-Agent'= 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'
)
session <- rvest:::request_GET(session, paste0('https://oss.uredjenazemlja.hr/public/gwt/', cache_html), add_headers(headers_cache))
# meta
commonRPCServiceUrl <- "https://oss.uredjenazemlja.hr/rpc/commonRPCService"
headers = c(
'Accept'= '*/*',
'Accept-Encoding'= 'gzip, deflate, br',
'Accept-Language'= 'hr-HR,hr;q=0.9,en-US;q=0.8,en;q=0.7',
'Connection'= 'keep-alive',
# 'Content-Length'= '166',
'Content-Type'= 'text/x-gwt-rpc; charset=UTF-8',
'Cookie'= paste0('gxtTheme=m%3Aid%7Cs%3Agray%2Cfile%7Cs%3Axtheme-gray.css; ossprivatelang=hr_HR; x-auto-31=m%3Acollapsed%7Cb%3Atrue; JSESSIONID=', jid),
'Host'= 'oss.uredjenazemlja.hr',
'Origin'= 'https://oss.uredjenazemlja.hr',
'Referer'= paste0('https://oss.uredjenazemlja.hr/public/gwt/', cache_html),
'Sec-Fetch-Dest'= 'empty',
'Sec-Fetch-Mode'= 'cors',
'Sec-Fetch-Site'= 'same-origin',
'User-Agent'= 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'
)
payload <- "5|0|4|https://oss.uredjenazemlja.hr/public/gwt/|957F3F03E95E97ABBDE314DFFCCEF4BC|hr.ericsson.oss.ui.common.client.core.rpc.ICommonRPCService|getMainBook|1|2|3|4|0|"
session <- rvest:::request_POST(session, commonRPCServiceUrl, body = payload, add_headers(headers))
session$response$content
readBin(session$response$content, character(), endian = "little")
payload <- "5|0|22|https://oss.uredjenazemlja.hr/public/gwt/|957F3F03E95E97ABBDE314DFFCCEF4BC|hr.ericsson.oss.ui.common.client.core.rpc.ICommonRPCService|getMainBooks|com.extjs.gxt.ui.client.data.BaseModel|hr.ericsson.oss.ui.common.client.core.data.RpcModel/2891266824|dirty|java.lang.Boolean/476441737|new|deleted|resourceCode|java.lang.Integer/3438268394|elementSelected|cadastralMunicipality|class|java.lang.String/2004016611|hr.ericsson.jis.domain.admin.CadastralMunicipality|hr.ericsson.jis.domain.admin.MainBook|institution|preconditionsRequired|name|VELIKA GORICA|1|2|3|4|1|5|6|10|7|8|0|9|-2|10|-2|11|12|0|13|-2|14|6|1|15|16|17|15|16|18|19|0|20|-2|21|16|22|"
session <- rvest:::request_POST(session, commonRPCServiceUrl, body = payload, add_headers(headers))
session$response$content
readBin(session$response$content, character(), endian = "little")
# captcha
payload <- "5|0|4|https://oss.uredjenazemlja.hr/public/gwt/|957F3F03E95E97ABBDE314DFFCCEF4BC|hr.ericsson.oss.ui.common.client.core.rpc.ICommonRPCService|isCaptchaDisabled|1|2|3|4|0|"
session <- rvest:::request_POST(session, commonRPCServiceUrl, body = payload, add_headers(headers))
session$response$content
readBin(session$response$content, character(), endian = "little")
headers_captcha <- c(
"Accept"= "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8",
"Accept-Encoding"= "gzip, deflate, br",
"Accept-Language"=" hr-HR,hr;q=0.9,en-US;q=0.8,en;q=0.7",
"Connection"= "keep-alive",
"Cookie"= paste0("gxtTheme=m%3Aid%7Cs%3Agray%2Cfile%7Cs%3Axtheme-gray.css; ossprivatelang=hr_HR; x-auto-31=m%3Acollapsed%7Cb%3Atrue; JSESSIONID=", jid),
"DNT"= "1",
"Host"= "oss.uredjenazemlja.hr",
"Referer"= "https://oss.uredjenazemlja.hr/public/lrServices.jsp?action=publicLdbExtract",
"sec-ch-ua"= '"Google Chrome";v="89", "Chromium";v="89", ";Not A Brand";v="99"',
"sec-ch-ua-mobile"= "?0",
"Sec-Fetch-Dest"= "image",
"Sec-Fetch-Mode"= "no-cors",
"Sec-Fetch-Site"= "same-origin",
"User-Agent"= "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36"
)
captcha <- GET("https://oss.uredjenazemlja.hr/servlets/kaptcha.jpg?1617286122160", add_headers(headers_captcha))
# session <- rvest:::request_GET(session, "https://oss.uredjenazemlja.hr/servlets/kaptcha.jpg?1617286122160", add_headers(headers_captcha))
captcha$content
captcha$response$content
writeBin(captcha$content, "D:/zkrh/captchas/test.jpg")
result = solver$normal("D:/zkrh/captchas/test.jpg", minLength=5, maxLength=5)
payload <- paste0("5|0|6|https://oss.uredjenazemlja.hr/public/gwt/|957F3F03E95E97ABBDE314DFFCCEF4BC|hr.ericsson.oss.ui.common.client.core.rpc.ICommonRPCService|validateCaptcha|java.lang.String|",
result$code, "|1|2|3|4|1|5|6|")
session <- rvest:::request_POST(session, commonRPCServiceUrl, body = payload, add_headers(headers))
session$response$content
readBin(p$response$content, character(), endian = "little")
# ID!!!!!!
headers = c(
'Accept'= '*/*',
'Accept-Encoding'= 'gzip, deflate, br',
'Accept-Language'= 'hr-HR,hr;q=0.9,en-US;q=0.8,en;q=0.7',
'Connection'= 'keep-alive',
# 'Content-Length'= '166',
'Content-Type'= 'text/x-gwt-rpc; charset=UTF-8',
'Cookie'= paste0('gxtTheme=m%3Aid%7Cs%3Agray%2Cfile%7Cs%3Axtheme-gray.css; ossprivatelang=hr_HR; x-auto-31=m%3Acollapsed%7Cb%3Atrue; JSESSIONID=', jid),
'DNT' = '1',
'Host'= 'oss.uredjenazemlja.hr',
'Origin'= 'https://oss.uredjenazemlja.hr',
'Referer'= paste0('https://oss.uredjenazemlja.hr/public/gwt/', cache_html),
'sec-ch-ua' = '"Google Chrome";v="89", "Chromium";v="89", ";Not A Brand";v="99"',
'sec-ch-ua-mobile' = "?0",
'Sec-Fetch-Dest'= 'empty',
'Sec-Fetch-Mode'= 'cors',
'Sec-Fetch-Site'= 'same-origin',
'User-Agent'= 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'
)
payload <- paste0("5|0|40|https://oss.uredjenazemlja.hr/public/gwt/|0EAC9F40996251FDB21FF254E1600E83|hr.ericsson.oss.ui.pia.client.rpc.IOssPublicRPCService|getLrUnitByMainBook|com.extjs.gxt.ui.client.data.BaseModel|java.lang.String|hr.ericsson.oss.ui.common.client.core.data.RpcModel/2891266824|date|java.sql.Date/3996530531|dirty|java.lang.Boolean/476441737|new|cadastralMunicipality|id|java.lang.Integer/3438268394|class|java.lang.String/2004016611|hr.ericsson.jis.domain.admin.CadastralMunicipality|cadastralMunicipalityId|source|creationDate|formatedName|VELIKA GORICA|userId|cadInstitution|deleted|institutionId|resourceCode|elementSelected|name|Odjel za katastar nekretnina Velika Gorica|hr.ericsson.jis.domain.admin.Institution|institution|Zemljišnoknjižni odjel Velika Gorica|place|sidMainBook|java.lang.Long/4227064769|hr.ericsson.jis.domain.admin.MainBook|status|1|1|2|3|4|2|5|6|7|18|8|9|114|1|21|10|11|0|12|-3|13|7|3|14|15|102844|16|17|18|19|-5|20|15|1|21|9|116|0|1|22|17|23|24|15|-20|25|7|8|10|-3|12|-3|26|-3|27|15|32|28|15|0|29|-3|30|17|31|16|17|32|33|7|9|10|-3|12|-3|26|-3|27|15|277|28|-13|29|-3|30|17|34|35|-9|16|-15|26|-3|28|-7|29|-3|30|-9|36|37|286610893|17179869184|14|15|21921|16|17|38|39|15|0|40|")
# Encoding(payload) <- "UTF-8"
# payload <- RCurl::curlEscape(payload)
session <- rvest:::request_POST(session, commonRPCServiceUrl, body = payload, add_headers(headers))
session$response$content
readBin(session$response$content, character())

I have found the error. The problem was in wrong url argument in one request.

You should really look into selenium and beautifulsoup4 to automate this - it's like requests on steroids.
see an example on my github: https://github.com/stevenhurwitt/reliant-scrape/blob/master/reliant_scrape.py

Python Requests AJAX Response Different from Browser Due to Cookie Handling

My request:
# python 3.7.3
import requests
from requests import Session
session = Session()
session.head('https://www.basspro.com/shop/en/blazer-brass-handgun-ammo')
cookies = requests.utils.cookiejar_from_dict(requests.utils.dict_from_cookiejar(session.cookies))
response = session.post(
url='https://www.basspro.com/shop/BPSGetInventoryStatusByIDView',
data={
'productId': '3074457345616736172',
'itemId': '3074457345616736949',
'isGunFlag': 'false',
},
cookies=cookies,
headers={
'accept': '*/*',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'en-US,en;q=0.9',
'content-length': '72',
'content-type': 'application/x-www-form-urlencoded',
'origin': 'https://www.basspro.com',
'referer': 'https://www.basspro.com/shop/en/blazer-brass-handgun-ammo',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.92 Safari/537.36 Vivaldi/2.9.1705.38',
'x-requested-with': 'XMLHttpRequest',
},
)
print(response.text)
Output:
<input type="hidden" class="relativeToAbsolute" value="true" />
/*
{
"onlineInventory": {
"status": "Status Not Available",
"image": "widget_product_info/outofstock_icon.svg",
"altText": "Status Not Available",
"isDropShip": false,
"availableDate":""
},
"inStoreInventory": {
"stores": [],
"checkStoreText": "Check Store Availability",
"isInStoreInventory": true,
"isPickupInventory": false
}
}
*/
My output when inspecting and running the same AJAX request via browser:
/*
{
"onlineInventory": {
"status": "Backordered",
"image": "widget_product_info/backordered_icon.svg",
"altText": "Backordered",
"isDropShip": false,
"quantity": 0,
"availableDate":"May 1-8"
},
"inStoreInventory": {
"stores": [{
id: '715839555',
name: '83',
gunRestricted: 'false',
dsName: 'TX - Round Rock',
status: 'Unavailable',
statusText: 'Out of Stock',
image: 'widget_product_info/outofstock_icon.svg',
altText: 'Out of Stock',
availableDate: '',
availableQuantity: '',
availableQuantityDisplay: 'false',
cityState: 'Round Rock, TX',
ISPavailableDate: '',
ISPavailableQuantity: '',
pickupTime: 'by 2:00pm',
offerISPOnBPS: 'Yes',
offerISPOnCAB: 'No'}],
"checkStoreText": "Change Store",
"isInStoreInventory": true,
"isPickupInventory": true
}
}
*/
I tried assigning cookies this way as well:
url = "https://www.basspro.com/shop/en/blazer-brass-handgun-ammo"
r = requests.get(url)
cookies = r.cookies
# fails to pass the right cookie
If I instead copy the cookie verbatim from an inspected GET request at https://www.basspro.com/shop/en/blazer-brass-handgun-ammo and put that into the POST headers, it works. How do I get cookies to work properly programatically?
EDIT:
Here's my attempt at just using Session() for cookies:
# python 3.7.3
import requests
from requests import Session
session = Session()
session.get("https://www.basspro.com/shop/en/blazer-brass-handgun-ammo")
# session.head('https://www.basspro.com/shop/en/blazer-brass-handgun-ammo')
response = session.post(
url='https://www.basspro.com/shop/BPSGetInventoryStatusByIDView',
data={
'productId': '3074457345616736172',
'itemId': '3074457345616736949',
'isGunFlag': 'false',
},
headers={
'accept': '*/*',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'en-US,en;q=0.9',
'content-length': '72',
'content-type': 'application/x-www-form-urlencoded',
'origin': 'https://www.basspro.com',
'referer': 'https://www.basspro.com/shop/en/blazer-brass-handgun-ammo',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.92 Safari/537.36 Vivaldi/2.9.1705.38',
'x-requested-with': 'XMLHttpRequest',
},
)
print(response.text)
I get the same result as before ("status": "Status Not Available", etc.)
Here's my attempt at the second solution:
# python 3.7.3
import requests
from requests import Session
url = "https://www.basspro.com/shop/en/blazer-brass-handgun-ammo"
r = requests.get(url)
cookies = r.cookies # the type is RequestsCookieJar
response = requests.post(
url='https://www.basspro.com/shop/BPSGetInventoryStatusByIDView',
data={
'productId': '3074457345616736172',
'itemId': '3074457345616736949',
'isGunFlag': 'false',
},
cookies=cookies,
headers={
'accept': '*/*',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'en-US,en;q=0.9',
'content-length': '72',
'content-type': 'application/x-www-form-urlencoded',
'origin': 'https://www.basspro.com',
'referer': 'https://www.basspro.com/shop/en/blazer-brass-handgun-ammo',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.92 Safari/537.36 Vivaldi/2.9.1705.38',
'x-requested-with': 'XMLHttpRequest',
},
)
print(response.text)
Again, I get the same result as before. What am I doing wrong?

can you try like this
session = Session()
session.get("https://www.basspro.com/shop/en/blazer-brass-handgun-ammo")
Then all the following calls with
session.xxx
donot use cookies parameter in it
another way I have tested,
cookies = r.cookies # the type is RequestsCookieJar
requests.post(.... cookies=cookies...)
at last ,I tested this works:
Please compare carefully
from requests import Session
session = Session()
agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36'
r1 = session.get("https://www.basspro.com/shop/en/blazer-brass-handgun-ammo",headers={'user-agent': agent})
response = session.post(
url='https://www.basspro.com/shop/BPSGetOnlineInventoryStatusByIDView',
data={
'productId': '3074457345616736172',
'catalogId': '3074457345616676768',
'storeId': '715838534',
'langId':-1
},
headers={
'user-agent': agent,
'x-requested-with': 'XMLHttpRequest',
},
cookies=r1.cookies
)
print(response.text)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

i have give all the payload data but not working - python

Related

how to youtube python a post request to change account to another one?

Scrape multiple pages with json

python request trying to login session barchart

Web scraping - his application is out of date, please click the refresh button on your browser return message

Python Requests AJAX Response Different from Browser Due to Cookie Handling

Categories

Resources