import requests
session = requests.Session()
url = 'https://supremenewyork.com/shop/304070/add'
headers = {
'Accept': '*/*;q=0.5, text/javascript, application/javascript, application/ecmascript, application/x-ecmascript',
'Origin': 'https://www.supremenewyork.com',
'X-CSRF-Token': 'cGh34LIXA5O75UEl+ArjyIQA/CS6BGY9mFleXXZ5GnznS4t8y2rGTpUTumG93EHNwSfnkDDtsYLvbEGbmMymRQ==',
'X-Requested-With': 'XMLHttpRequest',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
}
post_data = {
'commit': 'add to basket',
'size': '53133',
'style': '25229',
'utf8': '✓'
}
session.post(url=url, headers=headers, data=post_data, timeout=1)
r = session.get('https://supremenewyork.com/shop/cart.json', headers=headers)
print(r.text)
Post data is correct, i took it from Google Chrome, but every time code return nothing (because basket is empty). How do i do post request correct?
Related
I'm trying to create a script using scrapy to grab json content from this webpage. I've used headers within the script accordingly but when I run it, I always end up getting JSONDecodeError. The site sometimes throws captcha but not always. However, I've never got any success using the script below even when I used vpn. How can I fix it?
This is how I've tried:
import scrapy
import urllib
class ImmobilienScoutSpider(scrapy.Spider):
name = "immobilienscout"
start_url = "https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/wohnung-kaufen"
headers = {
'accept': 'application/json; charset=utf-8',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'en-US,en;q=0.9',
'x-requested-with': 'XMLHttpRequest',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36',
}
params = {
'price': '1000.0-',
'constructionyear': '-2000',
'pagenumber': '1'
}
def start_requests(self):
req_url = f'{self.start_url}?{urllib.parse.urlencode(self.params)}'
yield scrapy.Request(
url=req_url,
headers=self.headers,
callback=self.parse,
)
def parse(self,response):
yield {"response":response.json()}
This is how the output should look like (truncated):
{"searchResponseModel":{"additional":{"lastSearchApiUrl":"/region?realestatetype=apartmentbuy&price=1000.0-&constructionyear=-2000&pagesize=20&geocodes=1276010&pagenumber=1","title":"Eigentumswohnung in Nordrhein-Westfalen - ImmoScout24","sortingOptions":[{"description":"Standardsortierung","code":0},{"description":"Kaufpreis (höchste zuerst)","code":3},{"description":"Kaufpreis (niedrigste zuerst)","code":4},{"description":"Zimmeranzahl (höchste zuerst)","code":5},{"description":"Zimmeranzahl (niedrigste zuerst)","code":6},{"description":"Wohnfläche (größte zuerst)","code":7},{"description":"Wohnfläche (kleinste zuerst)","code":8},{"description":"Neubau-Projekte (Projekte zuerst)","code":31},{"description":"Aktualität (neueste zuerst)","code":2}],"pagerTemplate":"|Suche|de|nordrhein-westfalen|wohnung-kaufen?price=1000.0-&constructionyear=-2000&pagenumber=%page%","sortingTemplate":"|Suche|de|nordrhein-westfalen|wohnung-kaufen?price=1000.0-&constructionyear=-2000&sorting=%sorting%","world":"LIVING","international":false,"device":{"deviceType":"NORMAL","devicePlatform":"UNKNOWN","tablet":false,"mobile":false,"normal":true}
EDIT:
This is how the script built upon requests module looks like:
import requests
link = 'https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/wohnung-kaufen'
headers = {
'accept': 'application/json; charset=utf-8',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'en-US,en;q=0.9',
'x-requested-with': 'XMLHttpRequest',
'content-type': 'application/json; charset=utf-8',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36',
'referer': 'https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/wohnung-kaufen?price=1000.0-&constructionyear=-2000&pagenumber=1',
# 'cookie': 'hardcoded cookies'
}
params = {
'price': '1000.0-',
'constructionyear': '-2000',
'pagenumber': '2'
}
sess = requests.Session()
sess.headers.update(headers)
resp = sess.get(link,params=params)
print(resp.json())
Scrapy's CookiesMiddleware disregards 'cookie' passed in headers.
Reference: scrapy/scrapy#1992
Pass cookies explicitly:
yield scrapy.Request(
url=req_url,
headers=self.headers,
callback=self.parse,
# Add the following line:
cookies={k: v.value for k, v in http.cookies.SimpleCookie(self.headers.get('cookie', '')).items()},
),
Note: That site uses GeeTest CAPTCHA, which cannot be solved by simply rendering the page or using Selenium, so you still need to periodically update the hardcoded cookie (cookie name: reese84) taken from the browser, or use a service like 2Captcha.
What should I do with this error() since the beginning of 403 was and could not log in decided to use the agent!
import requests
headers = {
'Host': 'mvideo.ru',
'User-Agent':'Safari',
'Accept': '*/*',
'Accept-Encoding':'gzip, deflate, br',
'Connection': 'keep-alive'
}
mvideo_requests =requests.get('https://www.mvideo.ru/smartfony-i-svyaz-10/smartfony-205/f/category=iphone-914', headers = headers)
print(mvideo_requests)
Try using different headers.
headers = {"User-Agent": 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Mobile Safari/537.36'}
mvideo_requests =requests.get('https://www.mvideo.ru/smartfony-i-svyaz-10/smartfony-205/f/category=iphone-914', headers = headers)
Context:
I'm making GET requests to an API, and the API sometimes returns data that is up to 5 minutes old. However, when making the same request on Chrome, the data is always up to date. The server is ngnix.
This is the API request made when the page is loaded in Chrome:
https://buff.163.com/api/market/goods/sell_order?game=csgo&goods_id=781660&_=1604808126524
Relevant Code:
def epochTimestamp():
return int(round(datetime.now().timestamp()*1000))
def getProxies():
proxy = random.choice(proxies)
return {'http': fr'socks5h://{proxy}', 'https': fr'socks5h://{proxy}'}
get_purchase_headers = {
'Host': 'buff.163.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36',
'X-Requested-With': 'XMLHttpRequest',
'Cache-Control': 'max-age=0'
}
url = f"https://buff.163.com/api/market/goods/sell_order?game=csgo&goods_id=781660&_={epochTimestamp()}"
source = requests.get(url, timeout=10, proxies=getProxies(), headers=get_purchase_headers)
What I have tried:
Including User-Agent headers
'Cache-Control': 'max-age=0'
Including timestamp in the URL
I am using this code to update some itens in my list of products
# Inform the payload data
payload = {
"EPrincipal":"888407233616",
"SiteId":106
}
# POST request
adicionar_url = "MY URL"
post = session_req.post(
adicionar_url,
data = payload
)
Once I try to debbug, the status code that it is returning is 200, but when I write the result using soup, I got
soup = BeautifulSoup(result.text, 'html.parser')
#Return
{"success": false, "site_id":"" }
and the itens are not updated in my account. Can someone try to help me on it?
I got a solution using the request exported by the postman using the python requests library.
import requests
url = "MY SITE"
payload = "Principal=9999&Site=999&g-recaptcha-response="
headers = {
'authority': 'ROOT-SITE',
'x-sec-clge-req-type': 'ajax',
'accept': 'application/json, text/javascript, */*; q=0.01',
'x-requested-with': 'XMLHttpRequest',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36',
'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
'origin': 'https://ROOT-SITE',
'sec-fetch-site': 'same-origin',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'referer': 'https://ROOT-SITE',
'accept-language': 'pt-BR,pt;q=0.9,en-US;q=0.8,en;q=0.7,fr;q=0.6,es;q=0.5',
}
response = requests.request("POST", url, headers=headers, data = payload)
print(response.text.encode('utf8'))
This code is to post a form data
headers = {
'authority': 'ec.ef.com.cn',
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36',
'accept': '*/*',
'accept-language': 'en-US,en;q=0.9,ru-RU;q=0.8,ru;q=0.7,uk;q=0.6,en-GB;q=0.5',
}
s = requests.Session()
response = s.post(url, headers = headers)
which seems different to what Chrome does
I understand :authority is a kind of HTTP/2 Headers. How do I send it with Python requests?
You could use hyper.contrib.HTTP20Adapter, and set the mount(),like:
from hyper.contrib import HTTP20Adapter
import requests
def getHeaders():
headers = {
":authority": "xxx",
":method": "POST",
":path": "/login/secure.ashx",
":scheme": "https",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36",
"X-Requested-With": "XMLHttpRequest"
}
return headers
sessions=requests.session()
sessions.mount('https://xxxx.com', HTTP20Adapter())
r=sessions.post(url_search,data=playload,headers=getHeaders())
Refer to a Chinese blog