I try to build a python script who sends a POST with parameters for extracting the result.
With fiddler, I have extracted the post request who return that I want. The website uses https only.
POST /Services/GetFromDataBaseVersionned HTTP/1.1
Host: www.mywbsite.fr
"Connection": "keep-alive",
"Content-Length": 129,
"Origin": "https://www.mywbsite.fr",
"X-Requested-With": "XMLHttpRequest",
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.52 Safari/536.5",
"Content-Type": "application/json",
"Accept": "*/*",
"Referer": "https://www.mywbsite.fr/data/mult.aspx",
"Accept-Encoding": "gzip,deflate,sdch",
"Accept-Language": "fr-FR,fr;q=0.8,en-US;q=0.6,en;q=0.4",
"Accept-Charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.3",
"Cookie": "ASP.NET_SessionId=j1r1b2a2v2w245; GSFV=FirstVisit=; GSRef=https://www.google.fr/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&ved=0CHgQFjAA&url=https://www.mywbsite.fr/&ei=FZq_T4abNcak0QWZ0vnWCg&usg=AFQjCNHq90dwj5RiEfr1Pw; HelpRotatorCookie=HelpLayerWasSeen=0; NSC_GSPOUGS!TTM=ffffffff09f4f58455e445a4a423660; GS=Site=frfr; __utma=1.219229010.1337956889.1337956889.1337958824.2; __utmb=1.1.10.1337958824; __utmc=1; __utmz=1.1337956889.1.1.utmcsr=google|utmccn=(organic)|utmcmd=organic|utmctr=(not%20provided)"
{"isLeftColumn":false,"lID":-1,"userIpCountryCode":"FR","version":null,"languageCode":"fr","siteCode":"frfr","Quotation":"eu"}
And now my python script:
#!/usr/bin/env python
# -*- coding: iso-8859-1 -*-
import string
import httplib
import urllib2
host = "www.mywbsite.fr/sport/multiplex.aspx"
params='"isLeftColumn":"false","liveID":"-1","userIpCountryCode":"FR","version":"null","languageCode":"fr","siteCode":"frfr","Quotation":"eu"'
headers = { Host: www.mywbsite.fr,
"Connection": "keep-alive",
"Content-Length": 129,
"Origin": "https://www.mywbsite.fr",
"X-Requested-With": "XMLHttpRequest",
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.52 Safari/536.5",
"Content-Type": "application/json",
"Accept": "*/*",
"Referer": "https://www.mywbsite.fr/data/mult.aspx",
"Accept-Encoding": "gzip,deflate,sdch",
"Accept-Language": "fr-FR,fr;q=0.8,en-US;q=0.6,en;q=0.4",
"Accept-Charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.3",
"Cookie": "ASP.NET_SessionId=j1r1b2a2v2w245; GSFV=FirstVisit=; GSRef=https://www.google.fr/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&ved=0CHgQFjAA&url=https://www.mywbsite.fr/&ei=FZq_T4abNcak0QWZ0vnWCg&usg=AFQjCNHq90dwj5RiEfr1Pw; HelpRotatorCookie=HelpLayerWasSeen=0; NSC_GSPOUGS!TTM=ffffffff09f4f58455e445a4a423660; GS=Site=frfr; __utma=1.219229010.1337956889.1337956889.1337958824.2; __utmb=1.1.10.1337958824; __utmc=1; __utmz=1.1337956889.1.1.utmcsr=google|utmccn=(organic)|utmcmd=organic|utmctr=(not%20provided)"
}
url = "/Services/GetFromDataBaseVersionned"
# POST the request
conn = httplib.HTTPConnection(host,port=443)
conn.request("POST",url,params,headers)
response = conn.getresponse()
data = response.read()
print data
But when I run my script, I have this error:
socket.gaierror: [Errno -2] Name or service not known
Thanks a lot for your link to the requests module. It's just perfect. Below the solution to my problem.
import requests
import json
url = 'https://www.mywbsite.fr/Services/GetFromDataBaseVersionned'
payload = {
"Host": "www.mywbsite.fr",
"Connection": "keep-alive",
"Content-Length": 129,
"Origin": "https://www.mywbsite.fr",
"X-Requested-With": "XMLHttpRequest",
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.52 Safari/536.5",
"Content-Type": "application/json",
"Accept": "*/*",
"Referer": "https://www.mywbsite.fr/data/mult.aspx",
"Accept-Encoding": "gzip,deflate,sdch",
"Accept-Language": "fr-FR,fr;q=0.8,en-US;q=0.6,en;q=0.4",
"Accept-Charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.3",
"Cookie": "ASP.NET_SessionId=j1r1b2a2v2w245; GSFV=FirstVisit=; GSRef=https://www.google.fr/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&ved=0CHgQFjAA&url=https://www.mywbsite.fr/&ei=FZq_T4abNcak0QWZ0vnWCg&usg=AFQjCNHq90dwj5RiEfr1Pw; HelpRotatorCookie=HelpLayerWasSeen=0; NSC_GSPOUGS!TTM=ffffffff09f4f58455e445a4a423660; GS=Site=frfr; __utma=1.219229010.1337956889.1337956889.1337958824.2; __utmb=1.1.10.1337958824; __utmc=1; __utmz=1.1337956889.1.1.utmcsr=google|utmccn=(organic)|utmcmd=organic|utmctr=(not%20provided)"
}
# Adding empty header as parameters are being sent in payload
headers = {}
r = requests.post(url, data=json.dumps(payload), headers=headers)
print(r.content)
If we want to add custom HTTP headers to a POST request, we must pass them through a dictionary to the headers parameter.
Here is an example with a non-empty body and headers:
import requests
import json
url = 'https://somedomain.com'
body = {'name': 'Maryja'}
headers = {'content-type': 'application/json'}
r = requests.post(url, data=json.dumps(body), headers=headers)
Source
To make POST request instead of GET request using urllib2, you need to specify empty data, for example:
import urllib2
req = urllib2.Request("http://am.domain.com:8080/openam/json/realms/root/authenticate?authIndexType=Module&authIndexValue=LDAP")
req.add_header('X-OpenAM-Username', 'demo')
req.add_data('')
r = urllib2.urlopen(req)
Related
I'm trying to log in to the site, but I have a problem!
Here is my code:
from requests_ntlm import HttpNtlmAuth
import requests
from main import username, password
data = {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7",
"Authorization": "NTLM TlRMTVNT.......",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Cookie": "_ym_uid=1654686701790358885; _ym_d=1654686701; _ym_isad=2",
"Host": "...",
"Pragma": "no-cache",
"Referer": "https://...",
"sec-ch-ua": '" Not A;Brand";v="99", "Chromium";v="104", "Opera GX";v="90"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "Windows",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/104.0.5112.102 Safari/537.36 OPR/90.0.4480.117"
}
auth = HttpNtlmAuth(username, password)
with requests.Session() as session:
q1 = session.get("https://...", auth=auth, headers=data)
data['Authorization'] = q1.headers.get("WWW-Authenticate")
q2 = session.get("https://...", auth=auth, headers=data)
print(q2.raise_for_status())
You need to log in inside the site. I used to use HttpBaseAuth, but after searching in the site files I saw that it does a strange thing using NTLM.
He makes a get request using my headers, receives a 401 and another "WWW-Authenticate" header in the response and resends this request, but with the changed "Authorization" header just the same to the value of the "WWW-Authenticate" header. The header "Authorization" in the very first request is always the same, the values do not change (unfortunately I can't write it here), but if you send it yourself, then the response is still 401 and via response.headers.get not view
What should I do?enter image description here
I can't log in to the site.
If you log in manually, in the browser, it makes a get request, receives the “WWW-authenticate” header in response, and makes a get request again, but with this header.
When I try to do the same thing through python, I get a 401 error.
I'm trying very hard to login to a website which uses Discord Oauth. To do this I have to make a request to
https://discord.com/api/v9/oauth2/authorize?client_id=896549597550358548&response_type=code&redirect_uri=https://www.monkeebot.xyz/oauth/discord&scope=identify guilds
However. Discord returns a an errored response:
<Response [200]>
{'location': 'https://www.monkeebot.xyz/oauth/discord?error=access_denied&error_description=The+resource+owner+or+authorization+server+denied+the+request'}
url = "https://discord.com/api/v9/oauth2/authorize?client_id=896549597550358548&response_type=code&redirect_uri=https://www.monkeebot.xyz/oauth/discord&scope=identify guilds"
headers = {
"authorization": "",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.81 Safari/537.36 Edg/104.0.1293.47",
"accept": "*/*",
"accept-encoding": "gzip, deflate, br",
"accept-language": "en-GB,en;q=0.9,en-US;q=0.8",
"cache-control": "no-cache",
"content-length": "36",
"content-type": "application/json",
"origin": "https://discord.com",
"pragma": "no-cache",
"referer": "https://discord.com/oauth2/authorize?client_id=896549597550358548&redirect_uri=https://www.monkeebot.xyz/oauth/discord&response_type=code&scope=identify%20guilds",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.81 Safari/537.36 Edg/104.0.1293.47",
}
r = requests.post(url, headers=headers, json={}, allow_redirects=True)
Does anybody have any experience with doing this? I'm not entirely sure what I'm doing wrong. I have added the correct authorization etc, and I have tried it on different websites that use Discord oauth, but I get this error every time.
Any helps/debugging tips would be very grateful.
Before executing this script, please create your auth code and use it in below "Authorization" key. I don't want to share mine :)
import requests
import json
def generate_code():
url = "https://discord.com/api/v9/oauth2/authorize?client_id=896549597550358548&response_type=code&redirect_uri=https://www.monkeebot.xyz/oauth/discord&code=0nNWo24HJQgwytTMlqkwgdil9fW24k&scope=identify guilds"
payload = json.dumps({"permissions": "0", "authorize": True})
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:103.0) Gecko/20100101 Firefox/103.0",
"Accept": "*/*",
"Accept-Language": "en-US,en;q=0.5",
"Content-Type": "application/json",
"Authorization": "USE YOUR AUTH CODE HERE. YOu need to CREATE it first",
"X-Super-Properties": "eyJvcyI6IldpbmRvd3MiLCJicm93c2VyIjoiRmlyZWZveCIsImRldmljZSI6IiIsInN5c3RlbV9sb2NhbGUiOiJlbi1VUyIsImJyb3dzZXJfdXNlcl9hZ2VudCI6Ik1vemlsbGEvNS4wIChXaW5kb3dzIE5UIDEwLjA7IFdpbjY0OyB4NjQ7IHJ2OjEwMy4wKSBHZWNrby8yMDEwMDEwMSBGaXJlZm94LzEwMy4wIiwiYnJvd3Nlcl92ZXJzaW9uIjoiMTAzLjAiLCJvc192ZXJzaW9uIjoiMTAiLCJyZWZlcnJlciI6IiIsInJlZmVycmluZ19kb21haW4iOiIiLCJyZWZlcnJlcl9jdXJyZW50IjoiIiwicmVmZXJyaW5nX2RvbWFpbl9jdXJyZW50IjoiIiwicmVsZWFzZV9jaGFubmVsIjoic3RhYmxlIiwiY2xpZW50X2J1aWxkX251bWJlciI6MTQxMjY5LCJjbGllbnRfZXZlbnRfc291cmNlIjpudWxsfQ==",
"X-Discord-Locale": "en-US",
"X-Debug-Options": "bugReporterEnabled",
"Alt-Used": "discord.com",
"Cookie": "__dcfduid=7dc5606419a411ed9b1fee90ba2eef9f; __sdcfduid=7dc5606419a411ed9b1fee90ba2eef9f20120e864cf33496bedf659a7820954f7611454b15423f7553e3e85c08756a75",
}
response = requests.request("POST", url, headers=headers, data=payload)
return response.json()
print(generate_code())
I am new on python. I am sending POST Request using this line of code:
response = requests.post(url=API_ENDPOINT, headers=headers, data=payload)
The problem is that the values of header are dynamic(they are different every time on browser).
These are the headers in browser:
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "en-US,en;q=0.5",
"Connection": "keep-alive",
"Content-Length": "276",
"Content-Type": "application/x-www-form-urlencoded",
"Cookie": "acceptedCookie=%7B%22type%22%3A%22all%22%7D; TS01a14d32=01f893c9654ba8a49f70366efc3464fd76d4a461343cf44a7f074a5071b9818b6b196051effd669b784f691c8fab79bdc5a7efada418db04fc3cf8c3e43224fe186e64941eab43b5d9500201644abda7c0f5914ebb9ab95046ee2cb83c43f259ab0ed0e538fee3db50b2aa541ee5646d70634cea4cec54352547d3366c51e2ae5270756ee57bf78d915dcb8209c9c5771956c715bd75fb761bf42da6ba5cfa34ffbfee670e871ed33f8e25c09fdfc882953efd981f; ASLBSA=85b54f44c65f329c72b20a3ee7a9fc9a63d44001bc2c4e2c2b2f26fdaba7e0e3; ASLBSACORS=85b54f44c65f329c72b20a3ee7a9fc9a63d44001bc2c4e2c2b2f26fdaba7e0e3; utag_main=v_id:0179ddda773b0020fa6584d13ce40004e024f00d00978$_sn:2$_ss:1$_st:1623016566769$_pn:1%3Bexp-session$ses_id:1623014766769%3Bexp-session; s_cc=true; s_fid=3BE425806C624053-0396695F1870C86E; s_sq=luxmyluxottica%3D%2526pid%253DSite%25253APreLogin%25253ALogin%2526pidt%253D1%2526oid%253DLOGIN%2526oidt%253D3%2526ot%253DSUBMIT; todayVisit=true",
"Host": "mywebsite.com",
"Origin": "https://mywebsite.com",
"Referer": "https://mywebsite.com",
"TE": "Trailers",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0"
}
The value of content length, cookies, Accept parameter is different every time whenever I hit the API on browser, so I cannot just copy paste the values of headers and send it on POST request. How to generate this dynamic header(how to generate content length, cookies etc)? Please help.
I am trying to scrape German zip codes (PLZ) for a given street in a given city using Python's requests on this server. I am trying to apply what I learned here.
I want to return the PLZ of
Schanzäckerstr. in Nürnberg.
import requests
url = 'https://www.11880.com/ajax/getsuggestedcities/schanz%C3%A4ckerstra%C3%9Fe%20n%C3%BCrnberg?searchString=schanz%25C3%25A4ckerstra%25C3%259Fe%2520n%25C3%25BCrnberg'
data = 'searchString=schanz%25C3%25A4ckerstra%25C3%259Fe%2520n%25C3%25BCrnberg'
headers = {"Authority": "wwww.11880.com",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:70.0) Gecko/20100101 Firefox/70.0",
"Accept": "application/json, text/javascript, */*; q=0.01",
"Accept-Language": "de-DE,de;q=0.9,en-US;q=0.8,en;q=0.7",
"Accept-Encoding": "gzip, deflate, br",
"X-Requested-With": "XMLHttpRequest",
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
"Content-Length": "400",
"Origin": "https://www.postleitzahlen.de",
"Sec-Fetch-Site": "cross-site",
"Fetch-Mode": "cors",
"DNT": "1",
"Connection": "keep-alive",
"Referer": "https://www.postleitzahlen.de",
}
multipart_data = {(None, data,)}
session = requests.Session()
response = session.get(url, files=multipart_data, headers=headers)
print(response.text)
The above code yields an empty response of the type 200. I want to return:
'90443'
I was able to solve this problem using nominatim openstreetmap API. One can also add street numbers
import requests
city = 'Nürnberg'
street = 'Schanzäckerstr. 2'
response = requests.get( 'https://nominatim.openstreetmap.org/search', headers={'User-Agent': 'PLZ_scrape'}, params={'city': city, 'street': street[1], 'format': 'json', 'addressdetails': '1'}, )
print(street, ',', [i.get('address').get('postcode') for i in response.json()][0])
Make sure to only send one request per second.
How could I send two consecutive requests including redirecting
I tried to use Python requests to mimic the search function on the browser.
However, it's not as simple as other simple requests.
I opened the developer mode on Chrome browser and copied the two requests in Curl form then converted it into Python request form.
I can only get 500 error via Python, but I could get the correct response on the browser.
Current code , it only returns 500 error
cookies = {
'optimizelyEndUserId': 'oeu1454030467608r0.5841516454238445',
~~~
'_gat': '1',
}
headers = {
'Origin': 'https://m.flyscoot.com',
~~~~
}
data = 'origin=KHH&destination=KIX&departureDate=20160309&returnDate=&roundTrip=false&adults=1&children=0&infants=0&promoCode='
req = requests.session()
resp_1 = req.post('https://m.flyscoot.com/search', headers=headers, cookies=cookies, data=data)
headers = {
'Accept-Encoding': 'gzip, deflate, sdch',
~~~~
}
# because the first request will be redirected to a unknown status, so I copied the first response set_cookie for the 2nd request uses.
resp_2 = req.get('https://m.flyscoot.com/select', headers=headers, cookies=resp_1.history[0].cookies)
It's seem it's the mobile url. Mostly you should set a web agent. Try this (Python 3):
import urllib
import requests
FF_USER_AGENT = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:21.0.0) '
'Gecko/20121011 Firefox/21.0.0',
"Origin": "http://makeabooking.flyscoot.com",
"Referer": "http://makeabooking.flyscoot.com",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Encoding": "gzip,deflate,sdch",
"Accept-Language": "fr-FR,fr;q=0.8,en-US;q=0.6,en;q=0.4",
"Cache-Control": "max-age=0",
"Connection": "keep-alive",
}
req = requests.session()
resp_1 = req.get('http://makeabooking.flyscoot.com/', headers=FF_USER_AGENT)
# form urlencoded data
raw_data = (
"availabilitySearch.SearchInfo.SearchStations%5B0%5D.DepartureStationCode"
"=ADL"
"&availabilitySearch.SearchInfo.SearchStations%5B0%5D.ArrivalStationCode"
"=SIN"
"&availabilitySearch.SearchInfo.SearchStations%5B0%5D.DepartureDate=2%2F17"
"%2F2016&availabilitySearch.SearchInfo.SearchStations%5B1%5D"
".DepartureStationCode=SIN&availabilitySearch.SearchInfo.SearchStations%5B1"
"%5D.ArrivalStationCode=ADL&availabilitySearch.SearchInfo.SearchStations"
"%5B1"
"%5D.DepartureDate=3%2F17%2F2016&availabilitySearch.SearchInfo.Direction"
"=Return&Singapore+%28SIN%29=Singapore+%28SIN%29&availabilitySearch"
".SearchInfo.AdultCount=1&availabilitySearch.SearchInfo.ChildrenCount=0"
"&availabilitySearch.SearchInfo.InfantCount=0&availabilitySearch.SearchInfo"
".PromoCode=")
dict_data = dict(urllib.parse.parse_qsl(raw_data))
final = req.post('http://makeabooking.flyscoot.com/',
headers=FF_USER_AGENT,
data=dict_data)
print(final.status_code)
print(final.url)
[MOBILE Version]
import urllib
import requests
# debug request
import http.client
http.client.HTTPConnection.debuglevel = 1
import logging
logging.basicConfig()
logging.getLogger().setLevel(logging.DEBUG)
requests_log = logging.getLogger("requests.packages.urllib3")
requests_log.setLevel(logging.DEBUG)
requests_log.propagate = True
FF_USER_AGENT = {
'User-Agent': "Mozilla/5.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/600.1.3 (KHTML, like Gecko) Version/8.0 Mobile/12A4345d Safari/600.1.4",
"Origin": "https://m.flyscoot.com",
"Referer": "https://m.flyscoot.com/search",
"Host": "m.flyscoot.com",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Encoding": "gzip,deflate",
"Accept-Language": "fr-FR,fr;q=0.8,en-US;q=0.6,en;q=0.4",
"Cache-Control": "max-age=0",
"Connection": "keep-alive",
"X-Requested-With": "XMLHttpRequest",
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
}
req = requests.session()
resp_1 = req.get('https://m.flyscoot.com', headers=FF_USER_AGENT)
# form urlencoded data
raw_data = (
"origin=MEL&destination=CAN&departureDate=20160220&returnDate=20160227&roundTrip=true&adults=1&children=0&infants=0&promoCode=")
dict_data = dict(urllib.parse.parse_qsl(raw_data))
final = req.post('https://m.flyscoot.com/search',
headers=FF_USER_AGENT,
data=dict_data)