Selenium keeps printing data:, to window name - python

I'm new to Selenium, just started looking at it a few days ago.
Here's my program.
It just basically opens a website, nothing too much.
But it does not loads the window, just prints data:, to the window header, and a white background color pops up.
from selenium import webdriver
PATH = "/home/MyName/_DEV_/Selenium/chromedriver"
driver = webdriver.Chrome(PATH)
driver.get('google.com')
#driver.close()
What can be the problem? Thanks for the help in advance.

I understand being new. I have provided you a little "template" on what I use as a basis to help you get started. ( Note: I use this in my own custom classes; but, for demonstration purposes, I put it all into one file for you ).
MAIN PROGRAM - For Reference
from selenium import webdriver
from selenium.webdriver.chrome.webdriver import WebDriver as ChromeDriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait as DriverWait
from selenium.webdriver.support import expected_conditions as DriverConditions
def get_chrome_driver():
"""This sets up our Chrome Driver and returns it as an object"""
path_to_chrome = "F:\Selenium_Drivers\Windows_Chrome85_Driver\chromedriver.exe"
chrome_options = webdriver.ChromeOptions()
# Browser is displayed in a custom window size
chrome_options.add_argument("window-size=1500,1000")
return webdriver.Chrome(executable_path = path_to_chrome,
options = chrome_options)
def is_displayed(driver : ChromeDriver, xpath : str, timeout = 5):
"""Checks if our element displays on our page. If it does, return True. Otherwise, return False."""
try:
webElement = DriverWait(driver, timeout).until(
DriverConditions.presence_of_element_located(locator = (By.XPATH, xpath))
)
return True if webElement != None else False
except:
return False
# Gets our chrome driver and opens our site
chrome_driver = get_chrome_driver()
chrome_driver.get("https://www.google.com/")
result = is_displayed(chrome_driver, "//input[#title='Search']")
print(f'Does Search Textbox Display: {result}')
chrome_driver.quit()
chrome_driver.service.stop()

Solved:
First made sudo apt install chromium-chromedriver, than
from selenium import webdriver
driver = webdriver.Chrome()
driver.get(https://google.com')

Related

Selenium driver hanging on OS alert

I'm using Selenium in Python (3.11) with a Firefox (107) driver.
With the driver I navigate to a page which, after several actions, triggers an OS alert (prompting me to launch a program). When this alert pops up, the driver hangs, and only once it is closed manually does my script continue to run.
I have tried driver.quit(), as well as using
os.system("taskkill /F /pid " + str(process.ProcessId))
with the driver's PID, with no luck.
I have managed to prevent the pop-up from popping up with
options.set_preference("security.external_protocol_requires_permission", False)
but the code still hangs the same way at the point where the popup would have popped up.
I don't care whether the program launches or not, I just need my code to not require human intervention at this key point.
here is a minimal example of what I currently have:
from selenium.webdriver import ActionChains, Keys
from selenium.webdriver.firefox.options import Options
from seleniumwire import webdriver
options = Options()
options.binary_location = r'C:\Program Files\Mozilla Firefox\firefox.exe'
options.set_preference("security.external_protocol_requires_permission", False)
driver = webdriver.Firefox(options=options)
# Go to the page
driver.get(url)
user_field = driver.find_element("id", "UserName")
user_field.send_keys(username)
pass_field = driver.find_element("id", "Password")
pass_field.send_keys(password)
pass_field.send_keys(Keys.ENTER)
#this is the point where the pop up appears
reqs = driver.requests
print("Success!")
driver.quit()
There are some prefs you can try
profile = webdriver.FirefoxProfile()
profile.set_preference('dom.push.enabled', False)
# or
profile = webdriver.FirefoxProfile()
profile.set_preference('dom.webnotifications.enabled', False)
profile.set_preference('dom.webnotifications.serviceworker.enabled', False)
Have you tried setting this preference to prevent the particular popup:
profile.set_preference('browser.helperApps.neverAsk.openFile', 'typeOfFile')
# e.g. profile.set_preference('browser.helperApps.neverAsk.openFile', 'application/xml,application/octet-stream')
Or have you tried just dismissing the popup:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
....
pass_field.send_keys(Keys.ENTER)
#this is the point where the pop up appears
WebDriverWait(driver, 5).until(EC.alert_is_present).dismiss()
reqs = driver.requests
...
check this checkbox manually then open the app for every app associated to the links you use, then it will work normally.

How to extract the comments count correctly

I am trying to extract number of youtube comments and tried several methods.
My Code:
from selenium import webdriver
import pandas as pd
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import time
DRIVER_PATH = <your chromedriver path>
wd = webdriver.Chrome(executable_path=DRIVER_PATH)
url = 'https://www.youtube.com/watch?v=5qzKTbnhyhc'
wd.get(url)
wait = WebDriverWait(wd, 100)
time.sleep(40)
v_title = wd.find_element_by_xpath('//*[#id="container"]/h1/yt-formatted-string').text
print("title Is ")
print(v_title)
comments_xpath = '//h2[#id="count"]/yt-formatted-string/span[1]'
v_comm_cnt = wait.until(EC.visibility_of_element_located((By.XPATH, comments_xpath)))
#wd.find_element_by_xpath(comments_xpath)
print(len(v_comm_cnt))
I get the following error:
selenium.common.exceptions.TimeoutException: Message:
I get correct value for title but not for comment_cnt. Can any one please guide me what is wrong with my code?
Please note that comments count path - //h2[#id="count"]/yt-formatted-string/span[1] point to correct place if I search the value in inspect element.
Updated answer
Well, it was tricky!
There are several issues here:
This page has some bad java scripts on it making the Selenium webdriver driver.get() method to wait until the timeout for the page loaded while it looks like the page is loaded. To overcome that I used Eager page load strategy.
This page has several blocks of code for the same areas so as sometimes one of them is used (visible) and sometimes the second. This makes working with element locators difficultly. So, here I am waiting for visibility of title element from one of that blocks. In case it was visible - I'm extracting the text from there, otherwise I'm waiting for the visibility of the second element (it comes immediately) and extracting the text from there.
There are several ways to make page scrolling. Not all of them worked here. I found the one that is working and scrolling not too much.
The code below is 100% working, I run it several times.
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.chrome.service import Service
options = Options()
options.add_argument("--start-maximized")
caps = DesiredCapabilities().CHROME
caps["pageLoadStrategy"] = "eager"
s = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, desired_capabilities=caps, service=s)
url = 'https://www.youtube.com/watch?v=5qzKTbnhyhc'
driver.get(url)
driver.maximize_window()
wait = WebDriverWait(driver, 10)
title_xpath = "//div[#class='style-scope ytd-video-primary-info-renderer']/h1"
alternative_title = "//*[#id='title']/h1"
v_title = ""
try:
v_title = wait.until(EC.visibility_of_element_located((By.XPATH, title_xpath))).text
except:
v_title = wait.until(EC.visibility_of_element_located((By.XPATH, alternative_title))).text
print("Title is " + v_title)
comments_xpath = "//div[#id='title']//*[#id='count']//span[1]"
driver.execute_script("window.scrollBy(0, arguments[0]);", 600)
try:
v_comm_cnt = wait.until(EC.visibility_of_element_located((By.XPATH, comments_xpath)))
except:
pass
v_comm_cnt = driver.find_element(By.XPATH, comments_xpath).text
print("Video has " + v_comm_cnt + " comments")
The output is:
Title is Music for when you are stressed 🍀 Chil lofi | Music to Relax, Drive, Study, Chill
Video has 834 comments
Process finished with exit code 0

How to get a full-page screenshot in Python using Selenium and Screenshot

I'm trying to get a full-length screenshot and haven't been able to make it work. Here's the code I'm using:
from Screenshot import Screenshot
from selenium import webdriver
import time
ob = Screenshot.Screenshot()
driver = webdriver.Chrome()
driver.maximize_window()
driver.implicitly_wait(10)
url = "https://stackoverflow.com/questions/73298355/how-to-remove-duplicate-values-in-one-column-but-keep-the-rows-pandas"
driver.get(url)
img_url = ob.full_Screenshot(driver, save_path=r'.', image_name='example.png')
print(img_url)
driver.quit()
But this gives us a clipped screenshot:
So as you can see that's just what the driver window is showing, not a full-length screenshot. How can I tweak this code to get what I'm looking for?
Here is an example of how you can take full <body> screenshot of a page:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time as t
chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument('disable-notifications')
chrome_options.add_argument("window-size=1280,720")
webdriver_service = Service("chromedriver/chromedriver") ## path to where you saved chromedriver binary
browser = webdriver.Chrome(service=webdriver_service, options=chrome_options)
url = 'https://stackoverflow.com/questions/7263824/get-html-source-of-webelement-in-selenium-webdriver-using-python?rq=1'
browser.get(url)
required_width = browser.execute_script('return document.body.parentNode.scrollWidth')
required_height = browser.execute_script('return document.body.parentNode.scrollHeight')
browser.set_window_size(required_width, required_height)
t.sleep(5)
browser.execute_script("window.scrollTo(0,document.body.scrollHeight);")
required_width = browser.execute_script('return document.body.parentNode.scrollWidth')
required_height = browser.execute_script('return document.body.parentNode.scrollHeight')
browser.set_window_size(required_width, required_height)
t.sleep(1)
body_el = WebDriverWait(browser,10).until(EC.element_to_be_clickable((By.TAG_NAME, "body")))
body_el.screenshot('full_page_screenshot.png')
print('took full screenshot!')
t.sleep(1)
browser.quit()
Selenium setup is for linux, but just note the imports, and the part after defining the browser. Code above is starting from a small window, then it maximizes it to fit in the full page body, then it waits a bit and computes the body size again, just to account for some scripts kicking in on user's input. Then it takes the screenshot - tested and working on a really long page.
To get a full-page screenshot using Selenium-Python clients you can use the GeckoDriver and firefox based save_full_page_screenshot() method as follows:
Code:
driver = webdriver.Firefox(service=s, options=options)
driver.get('https://stackoverflow.com/questions/73298355/how-to-remove-duplicate-values-in-one-column-but-keep-the-rows-pandas')
driver.save_full_page_screenshot('fullpage_gecko_firefox.png')
driver.quit()
Screenshot:
tl; dr
[py] Adding full page screenshot feature for Firefox

GoogleCaptcha roadblock in website scraper

I am currently working on a scraper for aniworld.to.
My goal is it to enter the anime name and get all of the Episodes downloaded.
I have everything working except one thing...
The websites has a Watch button. That Button redirects you to https://aniworld.to/redirect/SOMETHING and that Site has a captcha which means the link is not in the html...
Is there a way to bypass this/get the link in python? Or a way to display the captcha so I can solve it?
Because the captcha only appears every lightyear.
The only thing I need from that page is the redirect link. It looks like this:
https://vidoza.net/embed-something.html
My very very wip code is here if it helps: https://github.com/wolfswolke/aniworld_scraper
Mitchdu showed me how to do it.
If anyone else needs help here is my code: https://github.com/wolfswolke/aniworld_scraper/blob/main/src/logic/captcha.py
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from threading import Thread
import os
def open_captcha_window(full_url):
working_dir = os.getcwd()
path_to_ublock = r'{}\extensions\ublock'.format(working_dir)
options = webdriver.ChromeOptions()
options.add_argument("app=" + full_url)
options.add_argument("window-size=423,705")
options.add_experimental_option('excludeSwitches', ['enable-logging'])
if os.path.exists(path_to_ublock):
options.add_argument('load-extension=' + path_to_ublock)
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
driver.get(full_url)
wait = WebDriverWait(driver, 100, 0.3)
wait.until(lambda redirect: redirect.current_url != full_url)
new_page = driver.current_url
Thread(target=threaded_driver_close, args=(driver,)).start()
return new_page
def threaded_driver_close(driver):
driver.close()

Python Selenium 'clicks' element but nothing happens

So I am trying to scrape a website, and I want to click an element, go to the page that opens from the click, find another element and click that one. The first click seems to work, no errors, but the next page doesn't open, thus I get an error. Here is a screenshot of what I want to click on the fist page: https://prnt.sc/10l8xa4. Clicking that should redirect to the second page. The problem seems to be that the driver clicks the element but nothing happens:
import sys, csv, os
from selenium import webdriver # Selenium 3.141.0
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
from datetime import datetime
from time import sleep
class Scraper(object):
''' A lot of the messy code is just playing with the tags from the page'''
def __init__(self, link):
self.link = link
self.driver = self.configure_driver() # The simulated browser
# Configuring the browser simulator, named driver, that will get all the information
def configure_driver(self):
# Add additional Options to the webdriver
chrome_options = Options()
# add the argument and make the browser Headless. It will work smoother& faster but it will miss the first category
# chrome_options.add_argument("--headless")
driver = webdriver.Chrome(options = chrome_options)
return driver
def click_element(self, selector): # Clicks the provided element from the page, even if not visible
element = WebDriverWait(self.driver, 20).until(
EC.presence_of_element_located((By.CSS_SELECTOR, selector)))
ActionChains(self.driver).move_to_element(element).click(element).perform()
if __name__ == '__main__':
product_link = 'https://www.action.com/nl-nl/click-and-collect-producten/' # An example of a product
app = Scraper(product_link)
with app.driver:
app.driver.get(product_link)
app.click_element('a.content-card.has-text.card-theme--light.card-size--s.card-align--bottom-left') # This gets clicked and should open new page, but it doesn't
sleep(10)
app.click_element('a.product-card__link') # This throws a Timeout, because the element can't be found, which is obvoius because the second page(which has this element) didn't open
sleep(20)
Try use like that:
with app.driver:
app.driver.get(product_link)
sleep(2)
app.click_element('li.has-submenu')
sleep(2)
app.click_element(
'div.grid-item.grid-item--content') # This gets clicked and should open new page, but it doesn't
sleep(2)
you should add step of openning pop-up and then click on your aimed button
Code with required argument but, by xpath:
def click_element(self, selector, by=By.CSS_SELECTOR): # Clicks the provided element from the page, even if not visible
element = WebDriverWait(self.driver, 20).until(
EC.presence_of_element_located((by, selector)))
ActionChains(self.driver).move_to_element(element).click(element).perform()
if __name__ == '__main__':
product_link = 'https://www.action.com/nl-nl/click-and-collect-producten/' # An example of a product
app = Scraper(product_link)
with app.driver:
app.driver.get(product_link)
sleep(2)
app.click_element("//section[#class='grid']/div[#class='grid-item grid-item--content'][1]", By.XPATH)
sleep(2)

Categories

Resources