As a test, I am trying to create a script that goes to my website and clicks on the learn more button, but am having trouble actually automatically clicking the button.
I've tried everything that I've found on stack overflow but nothing has worked.
from selenium import webdriver
import webbrowser
import time
url = 'https://www.mwstan.com'
driver = webbrowser.open_new_tab(url)
element = driver.find_element_by_id('learnmore')
element.click()
You are going to need to install a binary for whatever driver you are going to use
import os
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--window-size=1920x1080")
chrome_driver = os.getcwd() + "/chromedriver"
def get_url_example(url):
driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver)
driver.get(url)
button = driver.find_element_by_id("learnmore")
button.click()
# you can access the page source here using driver.page_source
if __name__ == '__main__':
get_url_page_source("https://www.mwstan.com")
This code works for me and hits your button.
This is using chrome webdriver but you can use another webdriver. JUst makesure you move the driver and access the path correctly like in line
chrome_driver = os.getcwd() + "/chromedriver"
Related
I am currently working on a scraper for aniworld.to.
My goal is it to enter the anime name and get all of the Episodes downloaded.
I have everything working except one thing...
The websites has a Watch button. That Button redirects you to https://aniworld.to/redirect/SOMETHING and that Site has a captcha which means the link is not in the html...
Is there a way to bypass this/get the link in python? Or a way to display the captcha so I can solve it?
Because the captcha only appears every lightyear.
The only thing I need from that page is the redirect link. It looks like this:
https://vidoza.net/embed-something.html
My very very wip code is here if it helps: https://github.com/wolfswolke/aniworld_scraper
Mitchdu showed me how to do it.
If anyone else needs help here is my code: https://github.com/wolfswolke/aniworld_scraper/blob/main/src/logic/captcha.py
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from threading import Thread
import os
def open_captcha_window(full_url):
working_dir = os.getcwd()
path_to_ublock = r'{}\extensions\ublock'.format(working_dir)
options = webdriver.ChromeOptions()
options.add_argument("app=" + full_url)
options.add_argument("window-size=423,705")
options.add_experimental_option('excludeSwitches', ['enable-logging'])
if os.path.exists(path_to_ublock):
options.add_argument('load-extension=' + path_to_ublock)
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
driver.get(full_url)
wait = WebDriverWait(driver, 100, 0.3)
wait.until(lambda redirect: redirect.current_url != full_url)
new_page = driver.current_url
Thread(target=threaded_driver_close, args=(driver,)).start()
return new_page
def threaded_driver_close(driver):
driver.close()
I'm trying to make a script that logs into my online grade book to look for any changes (new grades, etc). This is my code so far.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
import time
def main():
options = Options()
# options.add_argument("--headless")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
driver.maximize_window()
# goes to the desired website
driver.get('https://portal.librus.pl/rodzina')
# searches for and clicks a button that drops down a menu in which link for login form is visible
button = driver.find_element(By.CLASS_NAME, 'btn.btn-third.btn-synergia-top.btn-navbar.dropdown-toggle')
button.click()
# searches and clicks login link
agree = driver.find_element(By.CLASS_NAME, 'zmdi.zmdi-account.dropdown-item__icon')
agree.click()
time.sleep(10)
driver.quit()
if __name__ == '__main__':
main()
And there is a problem, I cannot seem to find a way to make webdriver see what I see. What I mean is that I see the webpage like this and webdriver sees the same webpage like this also the source code is different. I've tried using undetected ChromeDriver with no success. This is my code using UC.
import undetected_chromedriver as uc
import time
from selenium.webdriver.common.by import By
def main():
driver = uc.Chrome()
driver.maximize_window()
# goes to the desired website
driver.get('https://portal.librus.pl/rodzina/home')
# searches for and clicks a button that drops down a menu in which link for login form is visible
button = driver.find_element(By.CLASS_NAME, 'btn.btn-third.btn-synergia-top.btn-navbar.dropdown-toggle')
button.click()
# searches and clicks login link
agree = driver.find_element(By.CLASS_NAME, 'zmdi.zmdi-account.dropdown-item__icon')
agree.click()
time.sleep(5)
driver.execute_script("window.print();")
if __name__ == '__main__':
main()
Has anyone had a similar problem and managed to solve it?
I am sucesfully able to load my chrome profile using the flags:
user-data-dir as well as profile-directory, yet once the profile is loaded and the chrome window is actually open, no webpage appears. It simply gets stuck on a blank screen.
When I remove the code for the profile it is actually able to open the webpage stored in the login-url variable.
Tried updating to latest version of chrome (94.0.4606.81) and I also used the exact steps listed here to ensure I have the right chrome driver version.
I also did the obvious like making sure there are not any instances of chrome running in the background.
Code is as follows:
import os
from os.path import exists
import selenium
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
headless = False
login_url = "https://google.com)"
def startChrome():
global headless
try:
chrome_options = Options()
if headless:
chrome_options.add_argument("--headless")
chrome_options.add_argument("----user-data-dir=C:/Users/ERIK/AppData/Local/Google/Chrome/User Data")
chrome_options.add_argument("--profile-directory=Profile 1")
global driver
driver = webdriver.Chrome(path+"/chromedriver.exe", options=chrome_options)
except:
print("Failed to start Chrome!")
input()
exit()
startChrome()
driver.get(login_url)
input()
The following successfully opens google.com for me.
Selenium Version 3.141.0
ChromeDriver Version 94.0.4606.61
Chrome Version 94.0.4606.71
from os import path
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
def getDriver(profile_directory, headless = False):
chrome_options = Options()
if headless:
chrome_options.add_argument("--headless")
userDataDir = path.expandvars(r"%LOCALAPPDATA%\Google\Chrome\User Data")
chrome_options.add_argument(f"--user-data-dir={userDataDir}")
chrome_options.add_argument(f"--profile-directory={profile_directory}")
return webdriver.Chrome("./chromedriver.exe", options=chrome_options)
driver = getDriver("Profile 2")
driver.get("https://google.com")
I have a code for making temp mail automatically but I have a problem.
This is code:
import time
from selenium import webdriver
browser = webdriver.Chrome()
browser.get("https://temp-mail.org/")
time.sleep(10)
browser.close()
The link opens correctly but I can't pass cloudflare.
Also, I see some errors on my console:
Thanks...
Try adding user agent argument in chrome options and set user agent to any value
ops = Options()
ua='me'
ops.add_argument('--user-agent=%s' % ua)
driver=uc.Chrome(executable_path=r"C:\chromedriver.exe",chrome_options=ops)
Alternatively try using undetected-chromedriver
import undetected_chromedriver as uc
from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
driver = uc.Chrome(options=options)
driver.get("https://temp-mail.org/")
I am having a weird issue with Python and Selenium. I am accessing the URL https://www.biggerpockets.com/users/JarridJ1. When you click more it shows further content. I can understand that it is a React-based website. When I view it on browser and doa View Source I can see the required stuff in a react element <div data-react-class="Profile/Header/Header" data-react-props="{". I tried to automate Firefox via Selenium but I could not even get with that as well.
Check the screenshot:
Below is the code I tried:
from time import sleep
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
def parse(u):
print('Processing... {}'.format(u))
driver.get(u)
sleep(2)
html = driver.page_source
driver.save_screenshot('bp.png')
print(html)
if __name__ == '__main__':
options = Options()
options.add_argument("--headless") # Runs Chrome in headless mode.
options.add_argument('--no-sandbox') # Bypass OS security model
options.add_argument('--disable-gpu') # applicable to windows os only
options.add_argument('start-maximized') #
options.add_argument('disable-infobars')
options.add_argument("--disable-extensions")
driver = webdriver.Firefox()
parse('https://www.biggerpockets.com/users/JarridJ1')
This is a tricky one but I found a way to get to the element you have highlighted. Still not sure why driver.page_source is not return what you are looking for.
def parse(u):
print('Processing... {}'.format(u))
driver.get(u)
sleep(2)
get_everything = driver.find_elements_by_xpath("//*")
for element in get_everything:
print(element .get_attribute('innerHTML'))
#html = driver.page_source
#driver.save_screenshot('bp.png')
#print(html)
Below is my standalone example:
from selenium import webdriver
import time
driver = webdriver.Chrome("C:\Path\To\chromedriver.exe")
driver.get("https://www.biggerpockets.com/users/JarridJ1")
time.sleep(5)
a = driver.find_element_by_xpath("//div[#data-react-class='Profile/Header/Header']")
b = a.get_attribute("data-react-props")
print(b)
c = driver.find_elements_by_xpath("//*")
for i in c:
print(i.get_attribute('innerHTML'))