I want to get the source page of 'g2.com' website.
I included all the headers in the request and still get 403 response.
headers = {
'Authority': 'www.g2.com',
'Method': 'GET',
'Scheme': 'https',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US, en; q=0.9',
'Referer': 'https://www.g2.com/products/adobe-marketo-engage/reviews?__cf_chl_tk=Fo5h38ejOyOQlLESaOeFuwQwgN_UhEf3GEk.D8oGJUI-1657274049-0-gaNycGzNCmU',
'Pragma': 'no-cache',
'Connection': 'keep-alive',
'Cache-Control': 'max-age=0',
'Upgrade-Insecure-Requests': '1',
'sec-ch-ua': '" Not;A Brand";v="99", "Google Chrome";v="103", "Chromium";v="103"',
'sec-ch-ua-mobile': '?0',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-User': '?1',
'cookie': 'events_distinct_id=c2b8f12b-8533-4f3b-8116-5d1697f9fb9a; _ga=GA1.2.1805609855.1656582972; ajs_anonymous_id=c2b8f12b-8533-4f3b-8116-5d1697f9fb9a; intercom-id-rzpwcktf=ca9542d9-48e1-43e9-9c03-2e6f764bd313; _gcl_au=1.1.1117117031.1656582974; _delighted_web={"h7nzI49oCCJbJbS4":{"_delighted_fst":{"t":"1656582990103"}}}; __adroll_fpc=be0a43b909a1ee55f7fbbe0ff435677b-1656582993099; intercom-session-rzpwcktf=; cf_clearance=bZFBSAXN._7HrbMYKIHlwGhxjEpv1LcxhdgUZJvANuA-1657274052-0-150; __ar_v4=|C6MKFN32KVBHZAS4DKYVVW:20220707:1|EEPCTRZ5RNC6ZCBB2PJM4J:20220707:1|NBMTYK27EJFT3GYAV7FM56:20220707:1; _g2_session_id=741c7ae825402c34f500c9e045907672; _gid=GA1.2.923101367.1657517161; AWSALB=OS0LwCLYUCHeMYMSmWfhqAWaatiLtCuQhpwkGLfmjOb8/p6Lc/wFvC/DJ0MD7qnMTML3BbUpq9caoDwAQuU/EStHqVpw+cO7juiwSVs551sfSWPPYgiP8lwLahU+; AWSALBCORS=OS0LwCLYUCHeMYMSmWfhqAWaatiLtCuQhpwkGLfmjOb8/p6Lc/wFvC/DJ0MD7qnMTML3BbUpq9caoDwAQuU/EStHqVpw+cO7juiwSVs551sfSWPPYgiP8lwLahU+',
'User-Agent': 'Mozilla/5.0(Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/103.0.0.0 Safari/537.36',
'Content-Type': 'text/html; charset=UTF-8',
}
response = requests.get(url, headers=headers)
I tried with pytor package tho but I got the same problem.
I can view the website in all browsers but when I try to get the source page with requests I get 403.
Related
i have this code
import requests
import time
import re
import json
from datetime import datetime
url = 'https://www.cryptocommando.io/mio-account/'
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Language': 'it-IT,it;q=0.9,en-US;q=0.8,en;q=0.7',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Content-Type': 'application/x-www-form-urlencoded',
'Origin': 'https://www.cryptocommando.io',
'Referer': 'https://www.cryptocommando.io/mio-account/',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-User': '?1',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
'sec-ch-ua': '"Not_A Brand";v="99", "Google Chrome";v="109", "Chromium";v="109"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Linux"'
}
data = {
'username': "mylogin",
'password': '123456',
'woocommerce-login-nonce': '030f97cc6f',
'_wp_http_referer': '/mio-account/',
'login': 'Accedi'
}
response = requests.post(url, headers=headers, data=data)
#response_headers = response.headers
#print(response.status_code)
#print(response.text)
response = requests.get(url, headers=headers,data=data)
print(response.headers)
when i do a post after in browser i saw the site reply with GET if i copy headers i saw my wordpress_logged_in_....etc.... but if i call by pyhton i dont have why ?
i try to:
response = requests.post(url, headers=headers, data=data)
response_headers = response.headers
I am trying to do some web scraping from here but I am struggling to get the access token automaticaly. Everytime I do the web scraping, I need to manually update the Bearer token. Is there a way to do this automaticaly?
Let me show you how I do it manually:
url_WiZink = 'https://www.creditopessoal.wizink.pt/gravitee/gateway/api-chn-loans/v1/loans/quotation'
headers_WiZink = {'Accept': 'application/json',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'pt-PT',
'Authorization': 'Bearer de6ea490-381e-417f-ab77-3aad0d7eb63c',
'Connection': 'keep-alive',
'Content-Length': '266',
'Content-Type': 'application/json;charset=UTF-8',
'Host': 'www.creditopessoal.wizink.pt',
'Origin': 'https://www.wizink.pt',
'Referer': 'https://www.wizink.pt/',
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="90", "Google Chrome";v="90"',
'sec-ch-ua-mobile': '?0',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-site',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36',
'X-Channel-Id': 'LOANSIMULATOR',
'X-Client-Id': 'simWzkPt',
'X-Country-Id': 'PRT',
'X-Device-UUID': 'd14e9b629804cbba1ac7c3e78ab39a56'}
payload_WiZink = {"productCode":"WZP01","fixedTermLoanId":84,"impositionAmount":{"amount":10000,"currency":"EUR"},"settlementDay":"5","dueOrAdvanceInterestIndicator":"3","nominalInterest":"8.0000000","feeRateId":"05","settlementFrequencyId":"0001","deprecationFrequencyId":"0001"}
response_WiZink = requests.post(url_WiZink, headers=headers_WiZink, json=payload_WiZink, verify=False).json()
For that website, you can get an access token by calling their oauth/token endpoint:
import requests
access_token = requests.post(
'https://www.creditopessoal.wizink.pt/gravitee/gateway/api-chn-auth-server/v1/oauth/token',
headers={'Authorization': 'Basic c2ltV3prUHQ6YmllZTktZmR6dzAzLXBvZWpuY2Q='},
data={'grant_type': 'client_credentials'},
).json()['access_token']
print(access_token)
I am trying to send a request to a website but I am getting a 503 status code. It seems like the website is protected by Cloudflare. Is it possible to send a request to the Cloudflare protected website with the python-requests library? I have sent cookies and headers along with the request but it didn't get through.
Below is my code.
import requests
cookies = {
'SSPV_C': 'BPwAAAAAAAAAAAAAAAAAAAAAAAEAAAAAAAAAAAAA',
'locale': 'en',
'cookieID': '390778282271656143963365',
'uui': '800.606.6969|',
'cartId': '42951851197',
'mapp': '0',
'__cfruid': '7f4badea550ab7327454d1e2bac7cdec7c0701cf-1656593179',
'__cf_bm': 'gn_yevoOR3SIcU9B8oDZQ.r_e9300kA61vY264Hls64-1656593179-0-AaRJibHSFeN0Z7jkQTvjq+HQMc3lRPlhM850slZTqy7uy5QzXhmRx3M6rxlwk78kIU+zC8Vb7eDsPpuhdnNOhAkil4ZdBSaZW4pRvSMX53Xd',
'SSLB_C': '0',
'SSID_C': 'CQD72x0AAAAAAABbwLZiApVDB1vAtmICAAAAAAAAAAAAHJu9YgANyA',
'SSSC_C': '333.G7113084158674703618.2|0.0',
'SSRT_C': 'HJu9YgAAAA',
'lpi': 'cat=2,cur=USD,app=D,lang=E,view=L,lgdin=N,cache=release-WEB-20220629v10-BHJ-DVB31150-11,ipp=24,view=L,sort=BS,priv=N,state=',
'sessionKey': 'f6fbd948-2fed-41f9-bcf3-7defa626f36a',
'dpi': 'cat=2,cur=USD,app=D,lang=E,view=L,lgdin=N,cache=release-WEB-20220629v10-BHJ-DVB31150-11',
'utkn': '97655ab6781ce66340f0d2aa809c3f68',
'build': '20220629v10-20220629v10',
'aperture-be-commit-id': 'n/a',
'JSESSIONID': 'SUq0pea5K0bNUyQEJscyjUnJFvvEGjW7!622981770',
'sshc': '61f5b3f36d4907c548b3efc82cfcecd9',
'_pxhd': '53bIlMthB4XG3X644UXFOgn-jRSXY56BvM49fjHfOdSg53A7NqKSOXYc0jBByweKQ4NgEZR/R61UG9ouHxGSUw==:Kfr40D-EuMhLJ4qxdatLAMna184C2zbBIJV3xlOVy2hTdUEI3sN3kCGBQV73oDxdiOoVZAKilYlJZn--t492StGQHTm21i-GiwB5xxziLd8=',
'cf-colo': 'KHI',
'aperture-be-commit-id': 'n/a',
'TS0188dba5': '01ec39615f5a1331c083e7ac7ff7f2895322c069326ea3e7a0fb426c2906479f8fdba41c2cbebcf1669847d2488313d23495cf506ce0991eb9af796b9032458b1a715a28e71e7a31b64b6791644a6f092364bff1d8e79d027277b851adf5faa365dd8e2609',
'TS01d628c4': '01ec39615f8b9833712bd8ae68ec8c0798bd1df2e408a949a17c3772b8419cc7bbfe911b2b2798bd33f09b9e2fa7d6837ec5814f8ca97bd51f8eccc8779214eac7cd387b8f1f1d5097bca3b926c8d264dd80d59d7e4879197618d3a0ef6777bdb5902263106d9d95ac8fd7d92cd8458f02fb7c1409230f71f6b3a638107bbd8a73aa1629da3456ce69fd32f210cf1826979006e713',
'TopBarCart': '0|0',
'dlc': '%43%4D%5F%4D%4D%43%3D%7C%54%59%50%45%44%56%41%4C%55%45%3D%7C%45%4D%4C%45%3D%7C%55%4E%42%49%3D%6E%75%6C%6C%7C%4C%4F%4E%47%3D%37%30%2E%30%30%30%30%30%7C%4C%41%54%3D%33%30%2E%30%30%30%30%30%7C',
'app_cookie': '1656593927',
'TS01e1f1fd': '01ec39615fa2aeefd67a3c8e74158e94069993ea3308a949a17c3772b8419cc7bbfe911b2b55db8474b97fd606a862d187b6fdf539dfd177a32a93169e75a1c8599fc7428443914075f1081235d9564cc0fc8b69460d7a08aef755b5c296a42cf6b735f4953465ca238a6965b0625b2de8e4934e04',
'forterToken': 'a1ba6a2e88e74edb91df3bcf567bdd45_1656593924573_588_dUAL43-mnts-ants_13ck',
}
headers = {
'authority': 'www.bhphotovideo.com',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'accept-language': 'en-PK,en;q=0.9,ur-PK;q=0.8,ur;q=0.7,en-GB;q=0.6,en-US;q=0.5,sv;q=0.4,it;q=0.3',
'cache-control': 'no-cache',
# Requests sorts cookies= alphabetically
# 'cookie': 'SSPV_C=BPwAAAAAAAAAAAAAAAAAAAAAAAEAAAAAAAAAAAAA; locale=en; cookieID=390778282271656143963365; uui=800.606.6969|; cartId=42951851197; mapp=0; __cfruid=7f4badea550ab7327454d1e2bac7cdec7c0701cf-1656593179; __cf_bm=gn_yevoOR3SIcU9B8oDZQ.r_e9300kA61vY264Hls64-1656593179-0-AaRJibHSFeN0Z7jkQTvjq+HQMc3lRPlhM850slZTqy7uy5QzXhmRx3M6rxlwk78kIU+zC8Vb7eDsPpuhdnNOhAkil4ZdBSaZW4pRvSMX53Xd; SSLB_C=0; SSID_C=CQD72x0AAAAAAABbwLZiApVDB1vAtmICAAAAAAAAAAAAHJu9YgANyA; SSSC_C=333.G7113084158674703618.2|0.0; SSRT_C=HJu9YgAAAA; lpi=cat=2,cur=USD,app=D,lang=E,view=L,lgdin=N,cache=release-WEB-20220629v10-BHJ-DVB31150-11,ipp=24,view=L,sort=BS,priv=N,state=; sessionKey=f6fbd948-2fed-41f9-bcf3-7defa626f36a; dpi=cat=2,cur=USD,app=D,lang=E,view=L,lgdin=N,cache=release-WEB-20220629v10-BHJ-DVB31150-11; utkn=97655ab6781ce66340f0d2aa809c3f68; build=20220629v10-20220629v10; aperture-be-commit-id=n/a; JSESSIONID=SUq0pea5K0bNUyQEJscyjUnJFvvEGjW7!622981770; sshc=61f5b3f36d4907c548b3efc82cfcecd9; _pxhd=53bIlMthB4XG3X644UXFOgn-jRSXY56BvM49fjHfOdSg53A7NqKSOXYc0jBByweKQ4NgEZR/R61UG9ouHxGSUw==:Kfr40D-EuMhLJ4qxdatLAMna184C2zbBIJV3xlOVy2hTdUEI3sN3kCGBQV73oDxdiOoVZAKilYlJZn--t492StGQHTm21i-GiwB5xxziLd8=; cf-colo=KHI; aperture-be-commit-id=n/a; TS0188dba5=01ec39615f5a1331c083e7ac7ff7f2895322c069326ea3e7a0fb426c2906479f8fdba41c2cbebcf1669847d2488313d23495cf506ce0991eb9af796b9032458b1a715a28e71e7a31b64b6791644a6f092364bff1d8e79d027277b851adf5faa365dd8e2609; TS01d628c4=01ec39615f8b9833712bd8ae68ec8c0798bd1df2e408a949a17c3772b8419cc7bbfe911b2b2798bd33f09b9e2fa7d6837ec5814f8ca97bd51f8eccc8779214eac7cd387b8f1f1d5097bca3b926c8d264dd80d59d7e4879197618d3a0ef6777bdb5902263106d9d95ac8fd7d92cd8458f02fb7c1409230f71f6b3a638107bbd8a73aa1629da3456ce69fd32f210cf1826979006e713; TopBarCart=0|0; dlc=%43%4D%5F%4D%4D%43%3D%7C%54%59%50%45%44%56%41%4C%55%45%3D%7C%45%4D%4C%45%3D%7C%55%4E%42%49%3D%6E%75%6C%6C%7C%4C%4F%4E%47%3D%37%30%2E%30%30%30%30%30%7C%4C%41%54%3D%33%30%2E%30%30%30%30%30%7C; app_cookie=1656593927; TS01e1f1fd=01ec39615fa2aeefd67a3c8e74158e94069993ea3308a949a17c3772b8419cc7bbfe911b2b55db8474b97fd606a862d187b6fdf539dfd177a32a93169e75a1c8599fc7428443914075f1081235d9564cc0fc8b69460d7a08aef755b5c296a42cf6b735f4953465ca238a6965b0625b2de8e4934e04; forterToken=a1ba6a2e88e74edb91df3bcf567bdd45_1656593924573_588_dUAL43-mnts-ants_13ck',
'pragma': 'no-cache',
'referer': 'https://www.bhphotovideo.com/c/buy/Notebooks/ci/6782/N/4110474287',
'sec-ch-ua': '".Not/A)Brand";v="99", "Google Chrome";v="103", "Chromium";v="103"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'same-origin',
'sec-fetch-user': '?1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',
}
response = requests.get('https://www.bhphotovideo.com/c/product/1663923-REG/lenovo_82h801ekus_ip3_15itl6_i3_1115g4_8gb.html', cookies=cookies, headers=headers)
print(response)
You can use cloudscraper to get rid of cloudflare protection
from bs4 import BeautifulSoup
import cloudscraper
scraper = cloudscraper.create_scraper(delay=10, browser={'custom': 'ScraperBot/1.0',})
url = 'https://www.bhphotovideo.com/c/product/1663923-REG/lenovo_82h801ekus_ip3_15itl6_i3_1115g4_8gb.html'
req = scraper.get(url)
print(req)
Output:
<Response [200]>
cloudscraper
I'm trying to make a post request using the following converted from CURL bash:
headers = {
'authority': 'www.discoverecentral.com',
'accept': '*/*',
'accept-language': 'en-US,en;q=0.9',
'content-type': 'application/x-www-form-urlencoded',
# Requests sorts cookies= alphabetically
# 'cookie': 'COOKIE_SUPPORT=true; GUEST_LANGUAGE_ID=en_US; _ga=GA1.2.976004968.1651680892; _gid=GA1.2.1647177146.1652091069; ak_bmsc=376E16054B8CE1667585CF4B843B1281~000000000000000000000000000000~YAAQVJPIF87oQqyAAQAA5y8jrQ9DHy/4GZJUo1mSNg5U7s7R0A1ATGV+bFMIIp99MPTSGgwRJbLppQ33OtTnvp4dT1gF31OZ01N5b7SAvYbzGh6p1JHCPRkuLI7LI/yDQ/Y24KBTfsRYeTkILDOlI948yMwXay1lXdXMwVmiUOhfUV1TqPoS/kuHVjF+Pu5TYaGVoHmz2tARel9ydbLCv44P+yYkEssPPJanuEtdg3A3IYXH4SzSbaqhN+yV2OmwbYj9C4rHP3Vb1R7g2zQAKzS8Z+kwdV5Ns13EVuFPb+bVNxAKUIsnMKy7Lpxa05e+l38JktfKWtto7bBkfAzH7FyibI/6iyCvw/cghpDaE/PkXqXZDZh6GFWkVUABzngytkXRkS1aTG9VwhBJap2iJbWaVvA=; SAML_SP_SESSION_KEY=_bd42396230f077643c06f7bb75c60202169a8011748d5bf587745d054563; JSESSIONID=429078A672F540F1159490C033065E11.jvm6; _gat=1; bm_sv=237874D0F3F147A8B5E9FE30ABD61E37~YAAQVJPIFxrtQqyAAQAA/dqCrQ8Pjm4VHd954FLp0cvcoavAJFayiPFK25Q0lEeLQz4Ejuy7Q2GTzcT1DC0xhWkz2XAC6zLrqBc93TFAOG9zTjPZFqUTKfu9XplU5QowZlz76ekHhvprJpnen+rsaOPGScci0EPsUaU4LXyknJADa97lizWyy/1RpFDuSUnspML6cYGOBwVmpVs3EM13bfVQCuB7r4li7iMJ0toY6hl30+YIzwF7ESB1xrlwvl59Uvumf3j4w4UC1kw=~1; LFR_SESSION_STATE_4814701=1652178477701',
'origin': 'https://www.discoverecentral.com',
'referer': 'https://www.discoverecentral.com/group/merchant/my-reports',
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="101", "Google Chrome";v="101"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36',
'x-requested-with':'XMLHttpRequest',
}
params = {
'p_p_id': 'DiscoverMyReportPortlet_WAR_discovermyreportportlet',
'p_p_lifecycle': '2',
'p_p_state': 'normal',
'p_p_mode': 'view',
'p_p_resource_id': 'retreiveHierarchyList',
'p_p_cacheability': 'cacheLevelPage',
}
data = '&direction=des&orderBy=Default&selectedLocalEntityId=6011&gridPageSize=5000'
response = s.post('https://www.discoverecentral.com/group/merchant/my-reports', params=params, headers=headers, data=data)
The response is a 401 unauthorized. I know it has something to do with the structure of the data being passed into the request. Anyone come across a similar issue?
I'm trying to get into papara.com using Python. When I make a request it always gives 403 as a response. I got cookies from my browser. Here is my code:
import requests
headers = {
'authority': 'www.papara.com',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'sec-fetch-site': 'none',
'sec-fetch-mode': 'navigate',
'sec-fetch-user': '?1',
'sec-fetch-dest': 'document',
'accept-language': 'en-US,en;q=0.9',
'cookie': '__cfruid=64370d0d06d80a1e1a701ae8bee5a4b85c1de1af-1610296629',
}
response = requests.get('https://www.papara.com/', headers=headers)
I tried different user agents, I tried removing the cookie from the headers but didn't work.