I am trying to get some data from site built in React but I cannot extract what I need. Basically, I want to get the datetime presents on site, but my script could not find the div.
Here is my code:
Site url: https://gisaid.org/phylodynamics/china-cn/
def config_webdriver(browser: str):
chrome_options = ChromeOptions()
firefox_options = FirefoxOptions()
chrome_options.add_argument("--headless")
firefox_options.add_argument("--headless")
driver = webdriver.Chrome(options=chrome_options) if browser == "chrome" \
else webdriver.Firefox(options=firefox_options)
return driver
def get_date_from_china_phylodynamics(browser: str, url: str):
driver = config_webdriver(browser)
driver.get(url)
wait_driver = WebDriverWait(driver, 20)
try:
element = wait_driver.until(
EC.visibility_of_element_located(
(By.CSS_SELECTOR,
"#root > div > div.mb-3.mt-2.justify-content-center.row > div"
)
))
print(element)
except Exception as error:
print(error)
driver.close()
I think you'd better take the underlying json by pressing F12 > Network
https://phylodynamics2.pandemicprepardness.org/charon/getDataset?prefix=SARS-CoV-2/China5
https://phylodynamics2.pandemicprepardness.org/charon/getAvailable?prefix=SARS-CoV-2/China5
You can retrieve the json and create objects. Parsing HTML will fail if the DOM will be changed. See for instance: https://reqbin.com/code/python/g4nr6w3u/python-parse-json-example
I'm trying to build a program which opens Chrome pages one by one.
I'm using Selenium library.
My idea is to have different IP every time I open a new page.
I read about proxies but I'm not able to manage them.
Can you help me?
This is my code:
class myClass:
def __init__(self):
self.options = Options()
self.options.add_argument('--allow-running-insecure-content')
self.options.add_argument('--ignore-certificate-errors')
self.driver = webdriver.Chrome('./chromedriver',options = self.options)
def openBrowser(self):
self.chrome.get("http://whatismyipaddress.com")
def closeBrowser(self):
driver = self.driver
driver.close()
driver.quit()
if __name__ == "__main__":
#create object list
object = [myClass(i) for i in range(10)]
for i in range(10):
#open Chrome page
object[i].openBrowser()
# here I want to change IP and then I close the page before opening a new one
#close Chrome page
object[i].closeBrowser()
from selenium import webdriver
from time import sleep
filename = "log.txt"
myfile = open(filename, 'w')
class Search(object):
def __init__(self):
self.driver = webdriver.Chrome('chromedriver.exe')
# "This will open a new chrome instance without being logged in to the site"
self.driver.get("Site2")
sleep(2)
self.driver.find_element_by_xpath("/html/body/div[1]/div[4]/div/div/div[3]/div/div/div[2]/div[2]/div[2]/div/div[4]/div[1]/div[2]/div[1]/a").click()
sleep(2)
Texto = self.driver.find_element_by_xpath("/html/body/div[1]/div[4]/div/div/div[4]/div/div/div[1]/div[3]/div/article[1]/div/div[2]/div/div/div/article/div[1]").text
print(Texto)
myfile.write(Texto)
myfile.close()
sleep(10)
import re
Id = re.compile('[0-9]{9}')
Id2 = re.compile('[0-9]{4}')
DobMonth = re.compile('[0-9]{2}')
DobYear = re.compile('[0-9]{2}')
if Id.match(Texto) and Id2.match(Texto) and DobMonth.match(Texto) and DobYear.match(Texto):
print ("Matches")
else:
print("Not match")
sleep (20)
Search()
class BasicBot(object):
def __init__(self, username, pw):
self.driver = webdriver.Chrome('chromedriver.exe')
self.driver.get("site1")
sleep(2)
self.driver.find_element_by_xpath("/html/body/div[1]/div[1]/div/div/div/div[1]/a[1]/span").click()
sleep(2)
self.driver.find_element_by_xpath("//input[#name=\"login\"]")\
.send_keys(username)
self.driver.find_element_by_xpath("//input[#name=\"password\"]")\
.send_keys(pw)
self.driver.find_element_by_xpath('/html/body/div[4]/div/div[2]/div/form/div[1]/dl/dd/div/div[2]/button').click()
sleep(2)
Search()
BasicBot('username',"password")
So the script runs BasicBot(usr,psswd) logs in to the site, and is supposed to go to a different site while logged in, then searches if X post matches the criteria given, if it does that's it if it does not, I should refresh the site and check again.
In the Search class, you start with creating a new chromedriver instance: self.driver = webdriver.Chrome('chromedriver.exe'), this is why it opens another window with a fresh state.
Instead, modify your Search class to
take an existing instance of webdriver.
open a new window by using script window.open instead of get:
class Search:
def __init__(self, driver):
self.driver = driver
# Open a new window
self.driver.execute_script("window.open('https://site2')")
sleep(2) # visually check to make sure it opened
# Switch to new window
self.driver.switch_to.window(self.driver.window_handles[-1])
# After this you can start doing self.driver.find_element_by_xpath and the rest
In the class BasicBot, modify the last line to pass the driver:
Search(self.driver)
Lastly, you can use refresh to refresh your site2:
else:
print("Not match")
sleep(20)
self.driver.refresh()
Hope, it helps. Good luck!
What I want to do is to open a page (for example youtube) and be automatically logged in, like when I manually open it in the browser.
From what I've understood, I have to use cookies, the problem is that I can't understand how.
I tried to download youtube cookies with this:
driver = webdriver.Firefox(executable_path="driver/geckodriver.exe")
driver.get("https://www.youtube.com/")
print(driver.get_cookies())
And what I get is:
{'name': 'VISITOR_INFO1_LIVE', 'value': 'EDkAwwhbDKQ', 'path': '/', 'domain': '.youtube.com', 'expiry': None, 'secure': False, 'httpOnly': True}
So what cookie do I have to load to automatically log in?
You can use pickle to save cookies as text file and load it later:
def save_cookie(driver, path):
with open(path, 'wb') as filehandler:
pickle.dump(driver.get_cookies(), filehandler)
def load_cookie(driver, path):
with open(path, 'rb') as cookiesfile:
cookies = pickle.load(cookiesfile)
for cookie in cookies:
driver.add_cookie(cookie)
I would advise in using json format, because the cookies are inherently dictionaries and lists. Otherwise this is the approved answer.
import json
def save_cookie(driver, path):
with open(path, 'w') as filehandler:
json.dump(driver.get_cookies(), filehandler)
def load_cookie(driver, path):
with open(path, 'r') as cookiesfile:
cookies = json.load(cookiesfile)
for cookie in cookies:
driver.add_cookie(cookie)
I had a scenario where I would like to reuse once authenticated/logged-in sessions. I'm using multiple browser simultaneously.
I've tried plenty of solutions from blogs and StackOverflow answers.
1. Using user-data-dir and profile-directory
These chrome options which solves purpose if you opening one browser at a time, but if you open multiple windows it'll throw an error saying user data directory is already in use.
2. Using cookies
Cookies can be shared across multiple browsers. Code available in SO answers are have most of the important blocks on how to use cookies in selenium. Here I'm extending those solutions to complete the flow.
Code
# selenium-driver.py
import pickle
from selenium import webdriver
class SeleniumDriver(object):
def __init__(
self,
# chromedriver path
driver_path='/Users/username/work/chrome/chromedriver',
# pickle file path to store cookies
cookies_file_path='/Users/username/work/chrome/cookies.pkl',
# list of websites to reuse cookies with
cookies_websites=["https://facebook.com"]
):
self.driver_path = driver_path
self.cookies_file_path = cookies_file_path
self.cookies_websites = cookies_websites
chrome_options = webdriver.ChromeOptions()
self.driver = webdriver.Chrome(
executable_path=self.driver_path,
options=chrome_options
)
try:
# load cookies for given websites
cookies = pickle.load(open(self.cookies_file_path, "rb"))
for website in self.cookies_websites:
self.driver.get(website)
for cookie in cookies:
self.driver.add_cookie(cookie)
self.driver.refresh()
except Exception as e:
# it'll fail for the first time, when cookie file is not present
print(str(e))
print("Error loading cookies")
def save_cookies(self):
# save cookies
cookies = self.driver.get_cookies()
pickle.dump(cookies, open(self.cookies_file_path, "wb"))
def close_all(self):
# close all open tabs
if len(self.driver.window_handles) < 1:
return
for window_handle in self.driver.window_handles[:]:
self.driver.switch_to.window(window_handle)
self.driver.close()
def quit(self):
self.save_cookies()
self.close_all()
def is_fb_logged_in():
driver.get("https://facebook.com")
if 'Facebook – log in or sign up' in driver.title:
return False
else:
return True
def fb_login(username, password):
username_box = driver.find_element_by_id('email')
username_box.send_keys(username)
password_box = driver.find_element_by_id('pass')
password_box.send_keys(password)
login_box = driver.find_element_by_id('loginbutton')
login_box.click()
if __name__ == '__main__':
"""
Run - 1
First time authentication and save cookies
Run - 2
Reuse cookies and use logged-in session
"""
selenium_object = SeleniumDriver()
driver = selenium_object.driver
username = "fb-username"
password = "fb-password"
if is_fb_logged_in(driver):
print("Already logged in")
else:
print("Not logged in. Login")
fb_login(username, password)
selenium_object.quit()
Run 1: Login & Save Cookies
$ python selenium-driver.py
[Errno 2] No such file or directory: '/Users/username/work/chrome/cookies.pkl'
Error loading cookies
Not logged in. Login
This will open facebook login window and enter username-password to login. Once logged-in it'll close the browser and save cookies.
Run 2: Reuse cookies to continue loggedin session
$ python selenium-driver.py
Already logged in
This will open logged in session of facebook using stored cookies.
Requirements
Python 3.7
Selenium Webdriver
Pickle
I ever met the same issue. Finally I use the chromeoptions to fix this issue instead of cookie file.
import getpass
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("user-data-dir=C:\\Users\\"+getpass.getuser()+"\\AppData\\Local\\Google\\Chrome\\User Data\\Default") # this is the directory for the cookies
driver = webdriver.Chrome(chrome_options=chrome_options)
driver.get(url)
try this, there is a method to add cookie to your driver session
http://selenium-python.readthedocs.io/api.html#selenium.webdriver.remote.webdriver.WebDriver.add_cookie
Here is one possible solution
import pickle
from selenium import webdriver
def save_cookie(driver):
with open("cookie", 'wb') as filehandler:
pickle.dump(driver.get_cookies(), filehandler)
def load_cookie(driver):
with open("cookie", 'rb') as cookiesfile:
cookies = pickle.load(cookiesfile)
for cookie in cookies:
print(cookie)
driver.add_cookie(cookie)
driver = webdriver.Chrome(ChromeDriverManager().install())
url = 'https://www.Youtube.com'
driver.get(url)
#first try to login and generate cookies after that you can use cookies to login eveytime
load_cookie(driver)
# Do you task here
save_cookie(driver)
driver.quit()
I used Selenium to navigate to a URL (i.e. URL_1) with a login/password and provided the login credentials. I'm logged in and the URL (i.e. URL_2) has changed as expected. I don't know how to navigate URL_2 because the driver still refers to URL_1.
Thanks in advance.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
user_name = 'xyz'
password = 'xyz'
def login_process():
driver = webdriver.Firefox()
driver.get("URL_1")
#successfully navigated to URL_1
elem = driver.find_element_by_name("username")
elem.clear()
elem.send_keys(user_name)
elem = driver.find_element_by_name("password")
elem.clear()
elem.send_keys(password)
driver.find_element_by_id("submit").click()
#successfully entered URL_2
def query():
HOW DO I CHANGE THE DRIVER TO URL_2?
#elem = driver.find_element_by_class_name(ticker_box) #this doesn't work, references URL_1 driver
#elem.clear()
#elem.send_keys('xyz')
Instead of having independent functions, create a class with driver instance as an instance variable. Then, use self.driver.get() to navigate to a different URL:
class MyTest(object):
def __init__(self):
self.driver = webdriver.Firefox()
def login_process(self):
self.driver.get("URL_1")
#successfully navigated to URL_1
elem = self.driver.find_element_by_name("username")
elem.clear()
elem.send_keys(user_name)
elem = self.driver.find_element_by_name("password")
elem.clear()
elem.send_keys(password)
self.driver.find_element_by_id("submit").click()
#successfully entered URL_2
def query(self):
self.driver.get("URL2")
# do smth
test = MyTest()
test.login_process()
test.query()
After navigating to the new page if you want to do something on that new page
newURl = driver.window_handles[0]
driver.switch_to.window(newURl)
After doing this you can do something in the new url without getting "no such element exceptions"
First you can assign the url variable as a global:
global url;
url = "firstURL"
At the end of your first function you can change the value of the variable to the new URL:
url = driver.current_url
And then you can get the new url at the beginning of your second function:
driver.get(url)