The headless argument only stops the opening of the Chrome browser window still, the chromedriver.exe window opens. Is there any way to prevent both windows from opening?
Webdriver code
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--disable-gpu')
options.add_argument('--no-sandbox')
options.add_argument('disable-infobars')
driver = webdriver.Chrome(options=options)
driver.get(link)
NovelBox.scroll(driver)
soup = BeautifulSoup(driver.page_source, "lxml")
driver.quit()
Scroll function
def scroll(driver):
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
# Scroll down to bottom
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# Wait to load page
time.sleep(1)
# Calculate new scroll height and compare with last scroll height
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
# If heights are the same it will exit the function
break
last_height = new_height
from selenium.webdriver.chrome.options import Options
options = Options()
options.headless = True
driver = webdriver.Chrome(options=options, executable_path=PATH\TO\SELENIUM\DRIVER\EXE)
The problem was with Python IDLE/Shell. When I run the script through Python IDLE or shell the chromedriver.exe window will be opened. But not in Visual Code or running through the terminal.
Related
I want to click on each product on aliexpress and do something with it.
However, I kept running into an ElementClickInterceptedException
Please verify that the code is correct before answering the question if you are using chat-GPT or any other AI to help with this problem.
These are the things that I tried
for supplier in suppliers:
driver.execute_script("arguments[0].scrollIntoView();", supplier)
actions = ActionChains(driver)
actions.move_to_element(supplier).click().perform()
for supplier in suppliers:
driver.execute_script("arguments[0].scrollIntoView();", supplier)
actions = ActionChains(driver)
actions.move_to_element(supplier)
wait.until(EC.visibility_of_element_located((By.XPATH, ".//*[#class='list--gallery--34TropR']//span/a")))
try:
supplier.click()
except ElementClickInterceptedException:
print('object not on screen')
However, this still gives me the highest click-through-rate
for supplier in suppliers:
try:
supplier.click()
print('Supplier clicked')
time.sleep(1)
except ElementClickInterceptedException:
print('object not on screen')
This is how I initialized the driver and loaded the elements.
search_key = "Motor+toy+boat"
suppliers = []
print("https://www.aliexpress.com/premium/"+search_key+".html?spm=a2g0o.best.1000002.0&initiative_id=SB_20221218233848&dida=y")
# create a webdriver object and set the path to the Chrome driver
service = Service('../venv/chromedriver.exe')
driver = webdriver.Chrome(service=service)
# navigate to the Aliexpress website
driver.get("https://www.aliexpress.com/")
# Wait for the page to load
wait = WebDriverWait(driver, 10)
wait.until(EC.presence_of_element_located((By.ID, "search-key")))
# wait for the page to load
driver.implicitly_wait(10)
driver.get("https://www.aliexpress.com/premium/"+search_key+".html?spm=a2g0o.best.1000002.0&initiative_id=SB_20221218233848&dida=y")
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
driver.execute_script("window.scrollBy(0, 800);")
sleep(1)
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
print(new_height, last_height)
break
last_height = new_height
for element in driver.find_elements(By.XPATH, "//*[contains(#class, 'manhattan--container--1lP57Ag cards--gallery--2o6yJVt')]"):
suppliers.append(element)
Couple of issues I have identified.
It is detecting bot, so after couple of runs it will stop identifying the element.Use --disable-blink-features in chrome options.
Once you iterate the list,it is clicking somewhere else, just wait for a second and then click, it will work.
added code will click only visible element on the page, If you need to click more you needed to scroll the page and then click.
You can check the count of total visible element on the page.
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
import time
chrome_options = webdriver.ChromeOptions()
chrome_options.add_experimental_option("excludeSwitches", ['enable-automation'])
chrome_options.add_experimental_option('useAutomationExtension', False)
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=chrome_options)
driver.get("https://www.aliexpress.com/w/wholesale-uk.html")
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH, "//*[contains(#class, 'manhattan--container--1lP57Ag cards--gallery--2o6yJVt')]")).click()
suppliers=WebDriverWait(driver,10).until(EC.visibility_of_all_elements_located((By.XPATH,".//*[#class='list--gallery--34TropR']//span/a")))
print("Total visible element on the page: " + str(len(suppliers)))
for supplier in suppliers:
time.sleep(1)
supplier.click()
selenium cannot find webelement in headless mode. Nonetheless it finds easily when headless is false. I tried to add time.sleep(), and exlicit wait() but result was unsuccesful. Then I tried to change window size, I commented out 'start-maximized' and gave window-size=1400,800 result is same.
url is
link description here
web element is last page number
count_of_pages=wait.until(EC.presence_of_element_located((By.XPATH,'//span[#class="s-pagination-strip"]/span[last()]')))
chrome_options = Options()
caps = DesiredCapabilities().CHROME
caps["pageLoadStrategy"] = "eager"
chrome_options.headless = True
chrome_options.add_argument("start-maximized")
chrome_options.add_argument("window-size=1400,600")
#chrome_options.add_experimental_option("detach", True)
chrome_options.add_argument("--no-sandbox")
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
chrome_options.add_experimental_option('excludeSwitches', ['enable-logging'])
chrome_options.add_experimental_option('useAutomationExtension', False)
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
webdriver_service = Service('./driver/chromedriver.exe')
driver = webdriver.Chrome(service=webdriver_service, options=chrome_options, desired_capabilities=caps)
driver.set_window_size(1920, 1080)
i am trying to use Selenium Python to open tiktok user page and scroll down to load all user videos
i can open the url and get the source code including all loaded videos data, but when scroll down and time sleep for a while and get source code, the page code is the sane with same videos and nothing new is loaded!!
from selenium import webdriver
from selenium.webdriver.common.by import By
import re
import json
from bs4 import BeautifulSoup
import time
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
# open it, go to a website, and get results
wd = webdriver.Chrome('chromedriver',options=options)
wd.get("https://www.tiktok.com/#tiktok")
time.sleep(20)
#wd.implicitly_wait(10)
#print(wd.page_source)
SCROLL_PAUSE_TIME = 20
# Get scroll height
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
# Scroll down to bottom
wd.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# Wait to load page
time.sleep(SCROLL_PAUSE_TIME)
# Calculate new scroll height and compare with last scroll height
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
break
last_height = new_height
print(wd.page_source)
i also tried to use this code for scroll down
wd.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(10)
print(wd.page_source)
but also nothing is loaded in source code!
, i am using google colab, any help?
update: changed variable "driver" to "wd"
update: that's the install code for chromium driver
install
# install chromium, its driver, and selenium
!apt-get update
!apt install chromium-chromedriver
!cp /usr/lib/chromium-browser/chromedriver /usr/bin
!pip install selenium
In the while you start using an inexistent variable called driver, i changed it for wd and it scrolled down, but the web showed that there is a problem trying to load from there.
and the code also throws an error
[9612:864:0614/164525.919:ERROR:util.cc(127)] Can't create base directory: C:\Program Files\Google\GoogleUpdater
I searched this error and it seems to be related to the version of chrome and chromedriver as stated here:https://www.reddit.com/r/selenium/comments/uqt9z9/cant_create_base_directory/
That's as far as i achived, hope it helps. :)
Here's my current code
from selenium import webdriver
from selenium.webdriver.common.by import By
import re
import json
from bs4 import BeautifulSoup
import time
options = webdriver.ChromeOptions()
#options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
# open it, go to a website, and get results
wd = webdriver.Chrome('chromedriver',options=options)
wd.get("https://www.tiktok.com/#tiktok")
time.sleep(20)
#wd.implicitly_wait(10)
#print(wd.page_source)
SCROLL_PAUSE_TIME = 20
# Get scroll height
last_height = wd.execute_script("return document.body.scrollHeight")
while True:
# Scroll down to bottom
wd.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# Wait to load page
time.sleep(SCROLL_PAUSE_TIME)
# Calculate new scroll height and compare with last scroll height
new_height = wd.execute_script("return document.body.scrollHeight")
if new_height == last_height:
break
last_height = new_height
print(wd.page_source)
I took out the headless argument to see the results more clearly
I am developing a social media web parser with selenium. I hope to get the updated html source after the Chrome driver scroll down to the end of the page.
The scrolling-down works just fine, however the page source I got is not updated. Before scrolling down, I have 15 social media posts on a page. After scrolling down, I should have more than that, however there are still 15 in the output.
Any help will be appreciated!
My code is below:
#set Chrome driver with login cookies
getchrome_options.add_argument(f'user-agent={userAgent}')
prefs = {"profile.default_content_setting_values.notifications" : 2}
chrome_options.add_experimental_option("prefs",prefs)
browser = webdriver.Chrome(chrome_options=chrome_options,executable_path='MYPATH/chromedriver.exe')
last_height = browser.execute_script("return document.body.scrollHeight")
URL_by_day = "URL"
browser.get("LOGIN PAGE")
browser.delete_all_cookies()
for k, v in cookies.items():
browser.add_cookie({'name':k,'value':v})
browser.get(URL_by_day)
#scroll down the page
while True:
# Scroll down to bottom
browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# Wait to load page
time.sleep(0.3)
# Calculate new scroll height and compare with last scroll height
new_height = browser.execute_script("return document.body.scrollHeight")
if new_height == last_height:
break
last_height = new_height
#get page source
r = browser.page_source
browser.close()
time.sleep(2)
I am trying to use selenium to scroll down infinitely this webpage https://gfycat.com/discover/trending-gifs
I try this code:
from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_argument("--disable-extensions")
driver = webdriver.Chrome(options=options, executable_path=r"C:\chromedriver.exe")
driver.get(url)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
driver.quit()
But no scroll down happened.
I also tried:
from selenium.webdriver.common.keys import Keys
for i in range(10):
driver.find_element_by_css_selector('html').send_keys(Keys.END)
But no scroll down happened too.
For infinite of Scrolling website you can using this methods of coding in Selenium as you can see I am using while for making infinite in addition you should be import time module for time out of loading website
def scroll(driver):
timeout = 5
# Get scroll height
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
# Scroll down to bottom
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# load the website
time.sleep(5)
# Calculate new scroll height and compare with last scroll height
new_height = driver.execute_script("return document.body.scrollHeight")