I am trying to download a file from a password protected site. I am using the following code, but when I run it nothing happens....no error, just nothing is downloaded. Any insights would be appreciated!
import requests
from bs4 import BeautifulSoup
from urllib.request import urlopen
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36'
}
with requests.Session() as s:
url1 = "http://....."
url2 = "http://......tab"
def login(url1, url2):
r = requests.get(url1)
bs = BeautifulSoup(r.text, 'html.parser')
csrf_token = bs.find('input', attrs={'name': '###_CSRF-Token'})['value']
credentials = {
'username': '#####',
'password': '#####',
'###_CSRF-Token': csrf_token,
}
s.post(url1, data=credentials, headers=headers)
resp = s.get(url2)
with open('/Users/...../Website\ Grab/october.vcf', 'wb') as f:
f.write(r.content)
#urllib.request.urlretrieve(url2, '/Users/.../Downloads/october.vcf')
Related
I'm trying to log in to facebook using requests module. Although it seems I've prepared payload in the right way but when I send it with post requests, I don't get desired content in the response. I get 200 status code, though. To let you know, If I get response accordingly, I should find my fullname within it.
I initially tried like the following:
import requests
from bs4 import BeautifulSoup
from datetime import datetime
link = 'https://www.facebook.com/'
base_url = 'https://www.facebook.com{}'
time = int(datetime.now().timestamp())
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36',
'referer': 'https://www.facebook.com/',
}
with requests.Session() as s:
r = s.get(link)
soup = BeautifulSoup(r.text,"lxml")
form_url = soup.select_one("form[data-testid='royal_login_form']")['action']
post_url = base_url.format(form_url)
payload = {i['name']:i.get('value','') for i in soup.select('input[name]')}
payload['email'] = 'YOUR_EMAIL'
payload['encpass'] = f'#PWD_BROWSER:0:{time}:YOUR_PASSWORD'
payload.pop('pass')
res = s.post(post_url,data=payload,headers=headers)
print(res.url)
print(res.text)
This is another way I tried which didn't work out either:
import requests
from bs4 import BeautifulSoup
from datetime import datetime
login_url = 'https://www.facebook.com/login/device-based/regular/login/?login_attempt=1&lwv=101'
time = int(datetime.now().timestamp())
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'origin': 'https://www.facebook.com',
'referer': 'https://www.facebook.com/login/device-based/regular/login/?login_attempt=1&lwv=101'
}
with requests.Session() as s:
r = s.get(login_url)
soup = BeautifulSoup(r.text,"lxml")
payload = {i['name']:i.get('value','') for i in soup.select('input[name]')}
payload['email'] = 'YOUR_EMAIL'
payload['encpass'] = f'#PWD_BROWSER:0:{time}:YOUR_PASSWORD'
payload['had_password_prefilled'] = 'true'
payload['had_cp_prefilled'] = 'true'
payload['prefill_source'] = 'browser_dropdown'
payload['prefill_type'] = 'contact_point'
payload['first_prefill_source'] = 'last_login'
payload['first_prefill_type'] = 'contact_point'
payload['prefill_contact_point'] = 'YOUR_EMAIL'
payload.pop('pass')
r = s.post(login_url,data=payload,headers=headers)
print(r.status_code)
print(r.url)
How can I log in to facebook using requests?
This might be a case of xy problem
I recommend trying Selenium in accessing Facebook programmatically.
More examples using Selenium in logging in.
https://www.askpython.com/python/examples/python-automate-facebook-login
https://www.guru99.com/facebook-login-using-python.html
If logging in is all that you require, then using selenium, you could do it as:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
URL = 'https://www.facebook.com/'
PATH = r'C:\Program Files (x86)\chromedriver.exe'
driver = webdriver.Chrome(PATH)
driver.get(URL)
email = driver.find_element_by_id('email')
email.send_keys('YourEmail')
password = driver.find_element_by_id('pass')
password.send_keys('YourPassword')
password.send_keys(Keys.RETURN)
I would recommend that you use the browser that you frequently use to login for this process.
I am attempting to read and parse a website that returns a JSON. Every attempt I have made, it gives me a timeout error or not an error at all(I have to stop it)
URL:
https://api.louisvuitton.com/api/eng-us/catalog/availability/M57089
Code I have tried:
import requests
from urllib.request import Request, urlopen
#Trial 1
BASE_URL = 'https://api.louisvuitton.com/api/eng-us/catalog/availability/M57089'
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_1_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36'
}
response = requests.get(BASE_URL, headers=headers)
#Trial2
url = ('https://api.louisvuitton.com/api/eng-us/catalog/availability/M57089')
req = Request(url, headers= headers)
webpage = urlopen(req).read()
page_soup = soup(webpage, "html.parser")
obj=json.loads(str(page_soup))
#Trial3
import dload
j = dload.json('https://api.louisvuitton.com/api/eng-us/catalog/availability/M57089')
print(j)
So far none of these attempts or any variation similar to these have been successful to open the website and read it. Any help would be appreciated.
I'm using this code to try and do some web scraping. I'm trying to access my school grades using requests and beautiful soup and I'm having a lot of trouble logging in. I just get the error:
TypeError: 'NoneType' object has no attribute '__getitem__'
Here's the code that I'm using:
import requests
from bs4 import BeautifulSoup
headers = {
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
}
login_data = {
'name': 'my_username',
'pass': 'my_password',
'form_id': 'new_login_form',
'op': 'Login'
}
with requests.Session() as s:
url = 'https://irc.d125.org'
r = s.get(url, headers=headers)
soup = BeautifulSoup(r.content, 'html5lib')
login_data['form_build_id'] = soup.find('input', attrs={'name': 'form_build_id'})['value']
r = s.post(url, data=login_data, headers=headers)
print(r.content)
Any help is appreciated! Thanks so much!
When the login button is pressed, the site sends an xhr request with the login information. The following should work, just replace your username and password in the space provided.
Code
import requests
from bs4 import BeautifulSoup
headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
}
login_data = {
"UserName": "REPLACE_USER", # Enter Username
"Password": "REPLACE_PASSWORD", # Enter password
"RememberMe": False,
}
with requests.Session() as s:
url = 'https://irc.d125.org/Login'
s.get(url, headers=headers)
r = s.post(url, data=login_data)
print(r.text)
You should use something to render the javascript of the webpage before posting the data. A good approach to do that is to put your login script inside a Scrapy spider in combination with Splash:
see https://github.com/scrapy-plugins/scrapy-splash
You can use selenium. I use it to get my grades from school page, too.
import requests
from bs4 import BeautifulSoup as soup
import webbrowser
from urllib.request import Request, urlopen
import requests
import urllib
import re
import time
import math ## i know some of the imports are unnecessary, but i just copy paste them everywhere
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
j=0
proxies = {'http': "http://213.6.68.78:53281",
'https': "https://213.6.68.78:53281"}
login_data = {
'username': 'my_nickname420',
'password': 'password1234',
'phone': ''
}
with requests.Session() as s:
while(1):
try:
s.proxies = proxies
break
except:
print('error in connecting to proxy... ')
url = "https://www.TOPSECRETSITE.com/login"
r = s.get(url)
page_soup = soup(r.content, 'html5lib')
login_data['_csrf'] = page_soup.find('input', attrs={'name': '_csrf'}) ['value']
r = s.post(url, data=login_data, headers=headers)
print(r.content)
When I run the code, it gives me an output but somehow I'm still not connected to site, it shows me as guest user.
Here is the things you see when you click inspect element after logging(normally from browser)
*Form Data
username: my_nickname420
password: password1234
_csrf: m4beGyaG-CGYiSd2PHSjt3yFyHBsK1YP4GbI
phone:
There is also some "set cookie" things in above this, if that helps.
Referring to this post: Unable to log in to Amazon using Python
I tried using the suggested answer, but still cannot login.
I added code to display what the result is. It's inputting the email into the box, but I'm still seeing "Enter a valid email" in the result. I'm pretty sure I selected the form correctly, and the name's of the input fields are correct, but need a little guidance to debug this.
import bs4, requests
import os
import webbrowser
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.110 Safari/537.36'
}
from bs4 import BeautifulSoup
with requests.Session() as s:
s.headers = headers
r = s.get('https://www.amazon.com/ap/signin?_encoding=UTF8&ignoreAuthState=1&openid.assoc_handle=usflex&openid.claimed_id=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.identity=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.mode=checkid_setup&openid.ns=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0&openid.ns.pape=http%3A%2F%2Fspecs.openid.net%2Fextensions%2Fpape%2F1.0&openid.pape.max_auth_age=0&openid.return_to=https%3A%2F%2Fwww.amazon.com%2F%3Fref_%3Dnav_signin&switch_account=')
soup = BeautifulSoup(r.content, "html.parser")
signin_data = {s["name"]: s["value"]
for s in soup.select("form[name=signIn]")[0].select("input[name]")
if s.has_attr("value")}
signin_data[u'email'] = 'xx'
signin_data[u'password'] = 'xx'
response = s.post('https://www.amazon.com/ap/signin', data=signin_data)
soup = bs4.BeautifulSoup(response.text, "html.parser")
html = response.content
path = os.path.abspath('temp.html')
url = 'file://' + path
with open(path, 'w') as f:
f.write(str(html))
webbrowser.open(url)
I don't know about BeautifulSoup, but here's how I did it using requests.
from getpass import getpass
import webbrowser
import requests
import os
amazon_username = raw_input("Amazon email: ")
amazon_password = getpass()
headers = {
"User-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.110 Safari/537.36",
"action": "sign-in",
"email": amazon_username,
"password": amazon_password
}
r = requests.get("https://www.amazon.com/gp/sign-in.html", headers=headers)
print(r.status_code)
r = requests.get("https://www.amazon.com/gp/flex/sign-in/select.html", headers=headers)
print(r.status_code)
r = requests.get("https://www.amazon.com/", headers=headers)
print(r.status_code)