import requests
import random
with requests.Session() as c:
url = 'https://www.example.com/ajax/logon.php?t=login'
USERNAME = 'login'
PASSWORD = 'password'
AGENT = {'User-Agent': 'Its me',}
c.get(url)
tokens = c.cookies
login_data = (('l',USERNAME), ('ph' , PASSWORD))
c.post(url, data = login_data, headers=AGENT)
url2 = 'http://second.example.com/engine'
go_data = (('t','init'), ('value' , str(random.random())), ('id' , '6959025'))
page = c.get(url2, params = go_data, headers=AGENT)
print(page.text)
Am I doing everything allright? Because it's says that I am not logged in, also in Wireshark there's no POST request.
I am positive that this post is working because it logoffs me from my account.
Related
I am trying to read out out some of my stats from chess.com. I am struggling to log in with python.
I am trying to follow the tutorial here
I have identified the following tags from the login-page. _username as the username, _password as the password and _token as the hidden token.
My code:
session_requests = requests.session()
login_url = 'https://www.chess.com/login'
result = session_requests.get(login_url)
tree = html.fromstring(result.text)
token = list(set(tree.xpath("//input[#name='_token']/#value")))[0]
payload = {
"_username": "ChristianSloper",
"_password": "mypasswordgoeshere",
"_token": token
}
result = session_requests.post( login_url, data = payload, headers = dict(referer=login_url), verify=True)
Unfortunately, I just get sent back to the login page. I am very new to front end /web and would be very pleased for any help.
your payloads aren't entirely correct (there are a few things missing in it) and it appears that you are sending the POST request to the wrong URL, try this code:
session_requests = requests.session()
login_url = 'https://www.chess.com/login'
result = session_requests.get(login_url)
tree = html.fromstring(result.text)
token = list(set(tree.xpath("//input[#name='_token']/#value")))[0]
payload={"_username": "ChristianSloper",
"_password": "mypasswordgoeshere",
"login": '',
"_target_path": "https://www.chess.com/home",
"_token": token
}
session_requests.headers.update(dict(referer=login_url))
result = session_requests.post("https://www.chess.com:443/login_check", data = payload, verify=True)
Hope this helps!
I can't send in the form on the website data through the python script. The usual feedback form, which works properly when manually filling, but does not send anything if you send a request script. What to do?
The script sending a request to the site:
import requests
import sys
URL = 'http://127.0.0.1:8000/'
client = requests.session()
client.get(URL)
csrftoken = client.cookies['csrftoken']
login_data = dict(lastname='Игин', name='Anton', middlename='Konst', birthday='2017-04-20', telephone='(896) 097-29-02', csrfmiddlewaretoken=csrftoken, next='form_call/')
r = client.post(URL, data=login_data, headers=dict(Referer=URL))
views.py
def form_call(request):
if request.method=='POST':
form = Call_Form(request.POST)
name = request.POST.get('name', '')
lastname = request.POST.get('lastname', '')
middlename = request.POST.get('middlename', '')
birthday = request.POST.get('birthday', '')
telephone = request.POST.get('telephone', '')
if form.is_valid():
mail_host = SMTPMail.objects.all()[0]
rec_list = RecMail.objects.all()
recipients= []
for mail in rec_list:
recipients.append(mail.mail) #Список получателей
message = '''
На сайте вашей структуры NL International появилась новая заявка на звонок! Вот данные, предоставленные новым консультантом:
ФИО:{0} {1} {2}
Дата рождения: {3}
Телефон: {4}'''.format(name,lastname, middlename, birthday, telephone)
subject= 'Заявка на звонок'
send_mail(subject, message, mail_host.mail, recipients, fail_silently=False)
return redirect('/thanks/')
else:
return redirect('/error/')
I am already solved my problem. There is my code:
import requests
import sys
URL = 'http://127.0.0.1:8000/' #address of web page with html form
URL2 = 'http://127.0.0.1:8000/form_call/' #address of view processing form data
client = requests.session()
client.get(URL)
csrftoken = client.cookies['csrftoken']
login_data = dict(lastname='Игин', name='Anton', middlename='Konst testim', birthday='2017-01-20', telephone='896-002-00-02', csrfmiddlewaretoken=csrftoken)
r = client.post(URL2, data=login_data)
I am trying to scrape some emails from mdpi.com, emails available only to logged in users. But it fails when I am trying to do so. I am getting
when logged out:
Code itself:
import requests
from bs4 import BeautifulSoup
import traceback
login_data = {'form[email]': 'xxxxxxx#gmail.com', 'form[password]': 'xxxxxxxxx', 'remember': 1,}
base_url = 'http://www.mdpi.com'
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 6.1; rv:40.0) Gecko/20100101 Firefox/40.0'}
session = requests.Session()
session.headers = headers
# log_in
s = session.post('https://susy.mdpi.com/user/login', data=login_data)
print(s.text)
print(session.cookies)
def make_soup(url):
try:
r = session.get(url)
soup = BeautifulSoup(r.content, 'lxml')
return soup
except:
traceback.print_exc()
return None
example_link = 'http://www.mdpi.com/search?journal=medsci&year_from=1996&year_to=2017&page_count=200&sort=relevance&view=default'
def article_finder(soup):
one_page_articles_divs = soup.find_all('div', class_='article-content')
for article_div in one_page_articles_divs:
a_link = article_div.find('a', class_='title-link')
link = base_url + a_link.get('href')
print(link)
article_soup = make_soup(link)
grab_author_info(article_soup)
def grab_author_info(article_soup):
# title of the article
article_title = article_soup.find('h1', class_="title").text
print(article_title)
# affiliation
affiliations_div = article_soup.find('div', class_='art-affiliations')
affiliation_dict = {}
aff_indexes = affiliations_div.find_all('div', class_='affiliation-item')
aff_values = affiliations_div.find_all('div', class_='affiliation-name')
for i, index in enumerate(aff_indexes): # 0, 1
affiliation_dict[int(index.text)] = aff_values[i].text
# authors names
authors_div = article_soup.find('div', class_='art-authors')
authors_spans = authors_div.find_all('span', class_='inlineblock')
for span in authors_spans:
name_and_email = span.find_all('a') # name and email
name = name_and_email[0].text
# email
email = name_and_email[1].get('href')[7:]
# affiliation_index
affiliation_index = span.find('sup').text
indexes = set()
if len(affiliation_index) > 2:
for i in affiliation_index.strip():
try:
ind = int(i)
indexes.add(ind)
except ValueError:
pass
print(name)
for index in indexes:
print('affiliation =>', affiliation_dict[index])
print('email: {}'.format(email))
if __name__ == '__main__':
article_finder(make_soup(example_link))
What should I do in order to get what I want?
Ah that is easy, you haven't managed to log in correctly. If you look at the response from your initial call you will see that you are returned the login page HTML instead of the my profile page. The reason for this is that you are not submitted the hidden token on the form.
The solution request the login page, and then use either lxml or BeautifulSoup to parse the hidden input 'form[_token]'. Get that value and then add it to your login_data payload.
Then submit your login request and you'll be in.
I've tried two completely different methods. But still I can't get the data that is only present after loggin in.
I've tried doing one using requests but the xpath returns a null
import requests
from lxml import html
USERNAME = "xxx"
PASSWORD = "xxx"
LOGIN_URL = "http://www.reginaandrew.com/customer/account/loginPost/referer/aHR0cDovL3d3dy5yZWdpbmFhbmRyZXcuY29tLz9fX19TSUQ9VQ,,/"
URL = "http://www.reginaandrew.com/gold-leaf-glass-top-table"
def main():
FormKeyTxt = ""
session_requests = requests.session()
# Get login csrf token
result = session_requests.get(LOGIN_URL)
tree = html.fromstring(result.text)
# Create payload
formKey = str((tree.xpath("//*[ # id = 'login-form'] / input / # value")))
FormKeyTxt = "".join(formKey)
#print(FormKeyTxt.replace("['","").replace("']",""))
payload = {
"login[username]": USERNAME,
"login[password]": PASSWORD,
"form_key": FormKeyTxt,
"persistent_remember_me": "checked"
}
# Perform login
result = session_requests.post(LOGIN_URL, data=payload)
# Scrape url
result = session_requests.get(URL, data=payload)
tree = html.fromstring(result.content)
bucket_names = tree.xpath("//span[contains(#class, 'in-stock')]/text()")
print(bucket_names)
print(result)
print(result.status_code)
if __name__ == '__main__':
main()
ive tried another one using Mechanical soup but still it returns a null
import argparse
import mechanicalsoup
import urllib.request
from bs4 import BeautifulSoup
parser = argparse.ArgumentParser(description='Login to GitHub.')
parser.add_argument("username")
parser.add_argument("password")
args = parser.parse_args()
browser = mechanicalsoup.Browser()
login_page = browser.get("http://www.reginaandrew.com/gold-leaf-glass-top-table")
login_form = login_page.soup.select("#login-form")[0]
login_form.input({"login[username]": args.username, "login[password]": args.password})
page2 = browser.submit(login_form,login_page.url )
messages = page2.soup.find(class_='in-stock1')
if messages:
print(messages.text)
print(page2.soup.title.text)
I understand the top solution better so id like to do it using that but is there anything I'm missing? (I'm sure I'm missing a lot)
This should do it
import requests
import re
url = "http://www.reginaandrew.com/"
r = requests.session()
rs = r.get(url)
cut = re.search(r'<form.+?id="login-form".+?<\/form>', rs.text, re.S|re.I).group()
action = re.search(r'action="(.+?)"', cut).group(1)
form_key = re.search(r'name="form_key".+?value="(.+?)"', cut).group(1)
payload = {
"login[username]": "fugees",
"login[password]": "nugees",
"form_key": form_key,
"persistent_remember_me": "on"
}
rs = r.post(action, data=payload, headers={'Referer':url})
I have problems with authorization in python. I want automatic enter to website, but i can't. I used many libraries: Grab, urlib2, request, but i never entered(
For check myself i enter pege with account data
It's real site, login and password
URL="http://pin-im.com/accounts/login/"
LOGIN="testuser"
PASSWORD="test12345user"
urlib2:
def authorization():
import urllib2
gh_url = 'http://pin-im.com/accounts/login/'
gh_user= 'testuser'
gh_pass = 'test12345user'
req = urllib2.Request(gh_url)
password_manager = urllib2.HTTPPasswordMgrWithDefaultRealm()
password_manager.add_password(None, gh_url, gh_user, gh_pass)
auth_manager = urllib2.HTTPBasicAuthHandler(password_manager)
opener = urllib2.build_opener(auth_manager)
urllib2.install_opener(opener)
handler = urllib2.urlopen(req)
Grab:
def autorization():
g = Grab()
g.setup(post={'username':'testuser', 'Password':'test12345user', 'act': 'submit'})
g.go("http://pin-im.com/accounts/login/")
g.go("http://pin-im.com/user/my-profile/")
print g.response.code
Request(i used all methods in Request Lib for authorization, one of them):
from requests.auth import HTTPBasicAuth
requests.get('http://pin-im.com/accounts/login/', auth=HTTPBasicAuth('testuser', 'test12345user'))
r. get("http://pin-im.com/user/my-profile/")
r.status_code
I'm despair, can you help me login to this site? and what i did wrong?
userData = "Basic " + ("testuser:test12345user").encode("base64").rstrip()
req = urllib2.Request('http://pin-im.com/accounts/login')
req.add_header('Accept', 'application/json')
req.add_header("Content-type", "application/x-www-form-urlencoded")
req.add_header('Authorization', userData)
res = urllib2.urlopen(req)
This site uses CSRF protection, so you should get csrftoken cookie and send it back to server with your request:
import Cookie
from urllib import urlencode
import httplib2
URL="http://pin-im.com/accounts/login/"
LOGIN="testuser"
PASSWORD="test12345user"
http = httplib2.Http()
response, _ = http.request(URL)
cookies = Cookie.BaseCookie()
cookies.load(response["set-cookie"])
csrftoken = cookies["csrftoken"].value
headers = {'Content-type': 'application/x-www-form-urlencoded'}
headers['Cookie'] = response['set-cookie']
data = {
"csrfmiddlewaretoken": csrftoken,
"username":LOGIN,
"password": PASSWORD
}
response, _ = http.request(URL, "POST", headers=headers, body=urlencode(data))
response, content = http.request(
"http://pin-im.com/user/my-profile/",
"GET",
headers={'Cookie': response['set-cookie']}
)
print response, content