Can't click on element. ElementNotInteractable SELENIUM - python

I'm trying to click on a "next page" button but an "ElementNotInteractable" error appears.
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import pandas as pd
import time
options = Options()
options.headless = False
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36'
options.add_argument('user-agent={0}'.format(user_agent))
options.add_argument('window-size=1920x1080')
website = 'https://www.promoopcion.com/bebidas/termos.html'
path = '/Users/diegotrigal/OneDrive/00000001_HDD_TOSHIBA/diegotrigal/PYHTON/chromedriver'
driver = webdriver.Chrome(path, options=options)
driver.get(website)
driver.maximize_window()
# driver.implicitly_wait(30)
# driver.get_screenshot_as_file("screenshot-3.png")
# pagination
pagination = driver.find_element('xpath', '//div[contains(#class, "products-list")]')
pages = pagination.find_elements_by_tag_name('li')
last_page = 8
current_page = 1
product_name = []
product_sku = []
while current_page <= last_page:
container = driver.find_element_by_xpath('//div[contains(#class, "products-list")]')
productos = container.find_elements_by_xpath('.//li')
for product in productos:
product_name.append(product.find_element_by_class_name("product-item-name").text)
product_sku.append(product.find_element_by_class_name("product-sku").text)
current_page = current_page + 1
driver.execute_script('window.scrollTo(0,2000)')
try:
next_page = driver.find_element('xpath', '//li[contains(#class, "pages-item-next")]')
next_page.click()
except:
pass
df_productos = pd.DataFrame({'Nombre': product_name, 'SKU': product_sku})
df_productos.to_csv('termos.csv', index=False)
driver.quit()
# driver.find_element_by_class_name('product-items')
# driver.find_element_by_class_name('product-item-link')
# driver.find_element_by_class_name('product-sku')
I'm trying to run it by changing the next_page's xpath to different ones but it doesn't work.
I also try the is_displayed condition (once upon the scroll window action is made) and it returns False, but is_enabled() returns True.

With this script you can get what you want, I hope the comments on code helps:
# Needed libs
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium import webdriver
# Initiate the driver and navigate
driver = webdriver.Chrome()
driver.maximize_window()
driver.get('https://www.promoopcion.com/bebidas/termos.html?product_list_limit=24')
# We count how many pages we have counting the number of elements that we have for pagination
number_of_pages = len(WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, "(//div[#class='pages'])[2]//a[#class='page']"))))
# For every number of page we take the titles and then click on next button to go to next page
for i in range(0,number_of_pages):
product_titles = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, "//a[#class='product-item-link']")))
for product_title in product_titles:
print(product_title.text)
next_button = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "(//a[#title='Siguiente'])[2]")))
driver.execute_script("arguments[0].scrollIntoView();", next_button)
next_button.click()
time.sleep(2)
# In this point we are in the last page, so we take the products of last page
product_titles = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, "//a[#class='product-item-link']")))
for product_title in product_titles:
print(product_title.text)

Related

"TypeError: 'function' object is not iterable", why am I getting this error?

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait as wait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium. webdriver. chrome. options import Options
web = 'https://www.amazon.com'
driver_path = 'V:\Python Project\chromedriver_win32\chromedriver.exe'
options = webdriver.ChromeOptions()
options.add_argument('--headless')
s=Service('V:\Python Project\chromedriver_win32\chromedriver.exe')
driver = webdriver.Chrome(service=s)
driver.maximize_window() # For maximizing window
driver.implicitly_wait(30)
# driver = webdriver.Chrome(options=options, executable_path=driver_path)
driver.get(web)
driver.implicitly_wait(5)
keyword = "Table"
search = driver.find_element(By.ID,"twotabsearchtextbox")
search.send_keys(keyword)
# click search button
search_button = driver.find_element(By.ID,'nav-search-submit-button')
search_button.click()
driver.implicitly_wait(5)
product_asin = []
product_name = []
product_price = []
product_ratings = []
product_ratings_num = []
product_link = []
items =(EC.presence_of_all_elements_located((By.CLASS_NAME, "s-result-item sasin"))) *=>Error on this line*
for item in items:
# find name
name = item.find_element(By.CLASS_NAME,"a-size-medium a-color-base a-text-normal")
product_name.append(name.text)
# find ASIN number
data_asin = item.get_attribute("data-asin")
product_asin.append(data_asin)
# find price
whole_price = item.find_element(By.CLASS_NAME,"a-price-whole")
fraction_price = item.find_element(By.CLASS_NAME,"a-price-fraction")
if whole_price != [] and fraction_price != []:
price = '.'.join([whole_price[0].text, fraction_price[0].text])
else:
price = 0
product_price.append(price)
# find ratings box
ratings_box = item.find_element(By.CLASS_NAME,"a-row a-size-small")
# find ratings and ratings_num
if ratings_box != []:
ratings = ratings_box[0].get_attribute('aria-label')
ratings_num = ratings_box[1].get_attribute('aria-label')
else:
ratings, ratings_num = 0, 0
product_ratings.append(ratings)
product_ratings_num.append(str(ratings_num))
# find link
link = item.find_element(By.CLASS_NAME,"a-link-normal a-text-normal").get_attribute("href")
product_link.append(link)
driver.quit()
# to check data scraped
print(product_name)
print(product_asin)
print(product_price)
print(product_ratings)
print(product_ratings_num)
print(product_link)
For the code above I am getting the following error:
for item in items:
TypeError: 'function' object is not iterable
I am working on the above code, but it is giving me the error, "TypeError: 'function' object is not iterable". It should be working fine, as it is mostly correct, but don't know what is missing, which gives the error. Please can anyone provide mw with the solution?.
EC.presence_of_all_elements_located
this function will not return anything it will only check is the item which you want is available or not. Use something like below
WebDriverWait(self.browser, 5).until(
EC.visibility_of_all_elements_located((By.CLASS_NAME, "s-result-item sasin")))
rows = self.browser.find_elements(
By.CLASS_NAME, "s-result-item sasin")
self.assertIn(
rowItem,
[row.text for row in rows]
)
I think there is some mistake in class can you check below code
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait as wait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium. webdriver. chrome. options import Options
web = 'https://www.amazon.com'
driver_path = 'chromedriver_win32\chromedriver.exe'
options = webdriver.ChromeOptions()
options.add_argument('--headless')
# ser=Service(executable_path='chromedriver_win32\chromedriver.exe')
driver = webdriver.Chrome(executable_path='chromedriver_win32\chromedriver.exe')
driver.maximize_window() # For maximizing window
driver.implicitly_wait(30)
# driver = webdriver.Chrome(options=options, executable_path=driver_path)
driver.get(web)
driver.implicitly_wait(5)
keyword = "Table"
search = driver.find_element(By.ID,"twotabsearchtextbox")
search.send_keys(keyword)
# click search button
search_button = driver.find_element(By.ID,'nav-search-submit-button')
search_button.click()
driver.implicitly_wait(5)
product_asin = []
product_name = []
product_price = []
product_ratings = []
product_ratings_num = []
product_link = []
EC.presence_of_all_elements_located((By.CLASS_NAME, "s-result-item"))
rows = driver.find_elements(
By.CLASS_NAME, "s-result-item")
print(rows)

Webscraping Multiple Pages in Python with Selenium - loop not working

I'm quite new to python and have written a script using selenium to scrape a website. I've tried everything but can't get the loop to cycle through pages. It currently just repeats the data on the first page 5 times. I want to scrape all the pages for 'BR1' any help would be great, currently the script below only scrapes the first page 5 times.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
with open('rightmove.csv', 'w') as file:
file.write('PropertyCardcontent \n')
PATH = ("/usr/local/bin/chromedriver")
driver = webdriver.Chrome(PATH)
driver.get("https://www.rightmove.co.uk/house-prices.html")
print(driver.title)
elem = driver.find_element(By.NAME, 'searchLocation') # Find the search box
elem.send_keys('BR1' + Keys.RETURN)
try:
content = WebDriverWait(driver, 15).until(
EC.presence_of_element_located((By.ID,'content'))
)
finally:
time.sleep(3)
for p in range(5):
sold = content.find_elements(By.CLASS_NAME, 'sold-prices-content-wrapper ')
for solds in sold:
address = solds.find_elements(By.CLASS_NAME, 'sold-prices-content ')
for addresses in address:
result = addresses.find_elements(By.CLASS_NAME, 'results ')
for results in result:
card = results.find_elements(By.CLASS_NAME,'propertyCard')
for propertyCard in card:
header = propertyCard.find_elements(By.CLASS_NAME,'propertyCard-content')
for propertyCardcontent in header:
road = propertyCardcontent.find_elements(By.CLASS_NAME,'title')
for propertyCardcontent in header:
road = propertyCardcontent.find_elements(By.CLASS_NAME,'subTitle')
for subtitle in road:
bed = subtitle.find_elements(By.CLASS_NAME, 'propertyType')
with open('rightmove.csv', 'a') as file:
for i in range(len(result)):
file.write(header[i].text + '\n')
button = driver.find_element(By.XPATH, '//*[#id="content"]/div[2]/div[2]/div[4]/div[27]/div[3]/div')
button.click()
file.close()
time.sleep(3)
driver.quit()
Since the website link has page number on it, I recommend you put the base url as "https://www.rightmove.co.uk/house-prices/br1.html?page=1", and loop through the pages while changing the last index of the url with methods like format string.
One other thing, you don't need to implement all those for loops, you can simply assign each variable to its specific value since everything you need is inside an html block which is easy to navigate on it.
Update:
I'm sorry for being late, had unexpected stuff(...).
I've made some changes as I use Brave, so make sure you select your browser, Chrome I believe, the chromedriver(ver:102) stays the same (or depending your Chrome version).
I've also got the Price and Date and stored them in a tuple.
Every record is stored in a list[Title, propertyType, tupleof(Price_Date)]
At the end, it creates a csv and stores everything inside with a ";" as delimter.
You can if you prefer split the price and date for later use, up to you.
Note: This looping method only applies to websites in which the number of page is included within the URL. In this case, both the key and number of page is included in the URL.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
import time
import random
import itertools
options = Options()
options.binary_location = r'C:\Program Files\BraveSoftware\Brave-Browser\Application\brave.exe'
driver = webdriver.Chrome(options = options, service = Service("chromedriver.exe"))
key_word = "BR1".lower()
base_url = f"https://www.rightmove.co.uk/house-prices/{key_word}.html?page=1"
driver.get(base_url)
#Number of pages
pages = driver.find_element(By.XPATH, '//span[#class="pagination-label"][2]').text
pages = int(pages.strip('of'))
WebDriverWait(driver, 15).until(
EC.presence_of_element_located((By.CLASS_NAME, 'results '))
)
data = []
pc = 0
for p in range(1,pages+1):
driver.get(f"https://www.rightmove.co.uk/house-prices/{key_word}.html?page={p}")
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, '//div//div[#class="propertyCard"]'))
)
propertyCards = driver.find_elements(By.XPATH, '//div//div[#class="propertyCard"]')
for propertyCard in propertyCards:
title = propertyCard.find_element(By.CLASS_NAME, 'title').text
propertyType = propertyCard.find_element(By.CLASS_NAME, 'propertyType').text
price_list = propertyCard.find_elements(By.CLASS_NAME, 'price')
date_list = propertyCard.find_elements(By.CLASS_NAME, 'date-sold')
data.append([title,propertyType])
for p, d in itertools.zip_longest(price_list, date_list , fillvalue = None):
try:
price = p.text
date = d.text
data[pc].append((price, date))
except Exception as e:
print(e)
pc+=1
time.sleep(random.randint(1,4))
print(data)
with open('rightmove.csv', 'w') as file:
header = "Title;propertyType;Price_Date\n"
file.write(header)
for record in data:
file.write("{};{};{}\n".format(record[0],record[1],record[2:]))
driver.quit()
You don't have to go down to dom elem by elem, you can just use xpath or class_name (if it's unique, otherwise it's better xpath or css-selector) and get the item you are looking for.
Anyway follow this:
import time
import selenium.webdriver as webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome("/usr/local/bin/chromedriver")
driver.get("https://www.rightmove.co.uk/house-prices.html")
# send query
query = "BR1"
search_bar = driver.find_element(By.XPATH, '//input[#class="searchBox ac_input"]')
search_bar.send_keys(query)
search_bar.send_keys(Keys.ENTER)
# wait to result been loaded
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CLASS_NAME, 'propertyCard'))
)
#get amount of pages
pages = driver.find_element(By.XPATH, '//span[#class="pagination-label"][2]').text
pages = int(pages.replace('of ', ''))
data = []
i = 1
while i <= pages:
WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//div[contains(text(), "Next")]'))
).click()
# wait page load result
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, '//div//div[#class="propertyCard"]'))
)
propertyCards = driver.find_elements(By.XPATH, '//div//div[#class="propertyCard"]')
# loop over result and store data
for propertyCard in propertyCards:
title = propertyCard.find_element(By.CLASS_NAME, 'title').text
propertyType = propertyCard.find_element(By.CLASS_NAME, 'propertyType').text
data.append((title, propertyType))
time.sleep(1)
i += 1
print("you reach the last page")
#get number of results
printf(data)
driver.close()
I use a list of tuple cause in your example you want store 2 item, if you want store more data you can use a dict and then convert into csv with Dictwriter directly. Enjoy.

Get text from WebElement

categories = driver.find_elements(By.XPATH, '//div[starts-with(#class, "item-1EEezFCx")]')
for category in categories:
try:
text = driver.find_element(By.XPATH, '//div[#text()="{category.text}"').click()
print(text)
time.sleep(2)
except ElementNotInteractableException:
pass
Here I have categories as the holder of WebElements with class names all starting with item-1EEezFCx. For each iteration I would like to access the text element of the WebElement, print and click. What can you do to access the text element please?
Full code (edited):
import os
import time
import selenium.webdriver as webdriver
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver import Firefox
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import TimeoutException, NoSuchElementException, ElementNotInteractableException
from bs4 import BeautifulSoup
import pandas as pd
user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:100.0) Gecko/20100101 Firefox/100.0'
path = "C:\\"
FireFoxDriverPath = os.path.join(path, 'Python39', 'geckodriver.exe')
FireFoxProfile = r'C:\Users\username\AppData\Roaming\Mozilla\Firefox\Profiles\ltk7fdt2.default'
options = Options()
options.set_preference('profile', FireFoxProfile)
service = Service(r'C:\Python39\geckodriver.exe')
driver = Firefox(service=service, options=options)
url = "https://www.tradingview.com/markets/cryptocurrencies/prices-all/"
driver.get(url)
# Step 1. Toggle the active currency
currency = 'USD'
active_currency = driver.find_element(By.XPATH, '//span[contains(#class, "modeTitleActive-bJ0BPoV3")]')
if active_currency.text == currency:
pass
else:
driver.find_element(By.XPATH, '//input[#type="checkbox")]').click()
# Step 2. Import tables
xlwriter = pd.ExcelWriter('TradingView Crypto Prices.xlsx')
categories = driver.find_elements(By.XPATH, '//div[starts-with(#class, "item-1EEezFCx")]')
# Load columns one by one
for category in categories:
try:
driver.find_element(By.XPATH, category).text()
time.sleep(2)
except ElementNotInteractableException:
pass
load_more = True
while load_more:
try:
driver.find_element(By.CLASS_NAME, 'tv-load-more__btn').click()
time.sleep(1)
except ElementNotInteractableException:
load_more = False
df = pd.read_html(driver.page_source)[0]
df.to_excel(xlwriter, sheet_name=category.text, index=False)
xlwriter.save()
driver.quit()
for category in categories:
print(category.text)
category.click()

Selenium button not being clicked but is being highlighted

What I'm trying to do is making nike product auto buyer the problem is after selecting size it doesn't let me click through selenium I even tried to click manually but nothing pops up this is my code where I try to click (not full code):
from selenium import webdriver
from selenium.common.exceptions import JavascriptException
from selenium.webdriver import ChromeOptions
import re
from bs4 import BeautifulSoup
import requests
import json
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import os
user = os.environ['USERNAME']
snkrsurl = "https://www.nike.com/t/air-zoom-pegasus-38-womens-running-shoe-wide-gg8GBK/CW7358-500" #input("Please input your SNKRS url \n")
size = float(input("Please input size \n"))
options = ChromeOptions()
options.add_experimental_option('excludeSwitches',['enable-logging'])
options.add_experimental_option("useAutomationExtension", False)
options.add_experimental_option("detach",True)
options.add_argument("--disable-notifications")
chrome = webdriver.Chrome(options=options)
if "https://" in snkrsurl:
pass
elif "http://" in snkrsurl:
pass
else:
snkrsurl = "http://"+snkrsurl
chrome.get(snkrsurl)
with requests.Session() as session:
soup = BeautifulSoup(session.get(snkrsurl).text, features="lxml")
script = soup.find("script", string=re.compile('INITIAL_REDUX_STATE')).string
redux = json.loads(script[script.find('{'):-1])
products = redux["Threads"]["products"]
wait = WebDriverWait(chrome, 15)
def step1(i,v):
for key, product in products.items():
if float(product["skus"][i]["nikeSize"]) == v:
print("Found")
if v.is_integer():
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="gen-nav-footer"]/nav/button'))).click()
wait.until(EC.element_to_be_clickable((By.XPATH, "//*[text()='{}']".format(int(v))))).click()
chrome.execute_script("window.scroll(0,609)")
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[text()="Add to Bag"]'))).click()
break
else:
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="gen-nav-footer"]/nav/button'))).click()
wait.until(EC.element_to_be_clickable((By.XPATH, "//*[text()='{}']".format(v)))).click()
e = chrome.find_element_by_css_selector("#floating-atc-wrapper > div > button.ncss-btn-primary-dark.btn-lg.add-to-cart-btn")
chrome.execute_script("arguments[0].scrollIntoView(true);")
e.click()
break
else:
pass
for i,v in products.items():
global length
length = len(v['skus'])
break
for i in range(length):
length -=1
step1(length,size)
I use window.scroll to go to that element because if I don't it throws error saying element is not interactable and yes checkout is being only clickable from real chrome.
Thanks

python webscraping with selenium repeating certain process

i try to scrape the contact data from companies from this website:
https://de.statista.com/companydb/suche?idCountry=276&idBranch=0&revenueFrom=-1000000000000000000&revenueTo=1000000000000000000&employeesFrom=0&employeesTo=100000000&sortMethod=revenueDesc&p=4
I can do this with the following Code:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
import pandas as pd
import time
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
company_list= [] #create empty list
driver = webdriver.Chrome('/Users/rieder/Anaconda3/chromedriver_win32/chromedriver.exe') #define driver
driver.get('https://de.statista.com/companydb/suche?idCountry=276&idBranch=0&revenueFrom=-1000000000000000000&revenueTo=1000000000000000000&employeesFrom=0&employeesTo=100000000&sortMethod=revenueDesc&p=1') # open Website
driver.find_element_by_id("cookiesNotificationConfirm").click(); #accept cookies
driver.find_element_by_xpath("//*[#id='content']/section[3]/div/div/form/div/div[2]/div[2]/table/tr[2]/td[1]/a").click(); #click on the first company namelink
contact_data = WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.XPATH, "/html/body/div[3]/div[4]/section[6]/div/div[2]/div[2]/div/div"))) #get the contactdata from the company you chose before
for cn in contact_data:
company_list.append(cn.text) # this stores the text in the list
driver.back() #navigate to previous site
time.sleep(5) #wait for the pop-up window to appear
driver.find_element_by_xpath("/html/body/div[15]/div[3]/div[3]/div[1]/button[1]").click(), #deny the websites popup
time.sleep(5) #wait for the popup to vanish
driver.find_element_by_xpath("//*[#id='content']/section[3]/div/div/form/div/div[2]/div[2]/table/tr[3]/td[1]/a").click(); #click on the next company namelink
contact_data2 = WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.XPATH, "/html/body/div[3]/div[4]/section[6]/div/div[2]/div[2]/div/div"))) #get the contactdata from the company you chose before
for cn in contact_data2:
company_list.append(cn.text) # this stores the text in the list
print(company_list) #show the list
My Output is this:
['GUTex GmbH\nGerhard-Unland-Str. 1\n26683\nSaterland\nDeutschland', 'Robert Bosch GmbH\nRobert-Bosch-Platz 1\n70839\nGerlingen\nDeutschland']
Problem:
I want, that my code does this to the whole list on page 1 and then goes on on the next page and do it again. This shall go on until I have for example 100 adresses in the list. I would do this with a "while loop" but my xpaths for finding the adress are too specified, so it would always loop the same companies.
Thanks a lot inbefore
Try below code for one page data extract. Update the code for iterating over the next page records.
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
company_list= [] #create empty list
driver = webdriver.Chrome() #define driver
driver.get('https://de.statista.com/companydb/suche?idCountry=276&idBranch=0&revenueFrom=-1000000000000000000&revenueTo=1000000000000000000&employeesFrom=0&employeesTo=100000000&sortMethod=revenueDesc&p=1') # open Website
if len(driver.find_elements_by_id("cookiesNotificationConfirm")) > 0:
driver.find_element_by_id("cookiesNotificationConfirm").click(); # accept cookies
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, '//table[#class="zebraTable zebraTable--companies"]//td[1]')))
elementsSize = len(driver.find_elements_by_xpath('//table[#class="zebraTable zebraTable--companies"]//td[1]'))
# To iterate over the company list and click on the company name then capture the address on navigated page
# come back to previous page and repeat the same.
for i in range(elementsSize):
WebDriverWait(driver, 20).until(
EC.element_to_be_clickable((By.XPATH, '//table[#class="zebraTable zebraTable--companies"]//td[1]')))
elements = driver.find_elements_by_xpath('//table[#class="zebraTable zebraTable--companies"]//td[1]/a')
company_name = elements[i].text
elements[i].click() # click on the first company namelink
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH,
'//*[#id="contactInformation"]//div[#class="companyContactBox"]'))) # get the contactdata from the company you chose before
contact_data = driver.execute_script("return document.getElementsByClassName('companyContactBox')[0].innerText")
# print(contact_data)
company_list.append(company_name + " : " + contact_data)
driver.back() # navigate to previous site
print(company_list)
Thanks to Dilip Meghwals comment above i could finish my Code:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time
company_list= [] #create empty list
count = 25
chrome_options = webdriver.ChromeOptions()
prefs = {"profile.default_content_setting_values.notifications" : 2}
chrome_options.add_experimental_option("prefs",prefs)
driver = webdriver.Chrome('/Users/rieder/Anaconda3/chromedriver_win32/chromedriver.exe', chrome_options=chrome_options) #define driver
driver.get('https://de.statista.com/companydb/suche?idCountry=276&idBranch=0&revenueFrom=-1000000000000000000&revenueTo=1000000000000000000&employeesFrom=0&employeesTo=100000000&sortMethod=revenueDesc&p=1') # open Website
if len(driver.find_elements_by_id("cookiesNotificationConfirm")) > 0:
driver.find_element_by_id("cookiesNotificationConfirm").click(); # accept cookies
while len(company_list) < 1000:
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, '//table[#class="zebraTable zebraTable--companies"]//td[1]')))
elementsSize = len(driver.find_elements_by_xpath('//table[#class="zebraTable zebraTable--companies"]//td[1]'))
# To iterate over the company list and click on the company name then capture the address on navigated page
# come back to previous page and repeat the same.
for i in range(elementsSize):
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, '//table[#class="zebraTable zebraTable--companies"]//td[1]')))
elements = driver.find_elements_by_xpath('//table[#class="zebraTable zebraTable--companies"]//td[1]/a')
company_name = elements[i].text
elements[i].click() # click on the first company namelink
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH,'//*[#id="contactInformation"]//div[#class="companyContactBox"]'))) # get the contactdata from the company you chose before
contact_data = driver.execute_script("return document.getElementsByClassName('companyContactBox')[0].innerText")
# print(contact_data)
company_list.append(contact_data)
driver.back() # navigate to previous site
time.sleep(5)
driver.find_element_by_xpath("//*[#id='content']/section[3]/div/div/form/div/div[2]/div[2]/div[2]/div/button[2]").click();
company_list = [w.replace('\n', ', ') for w in company_list]
print(company_list)
df_company_name = pd.DataFrame(company_list, columns =['Name'])
df_company_name.to_excel("company_name.xlsx")

Categories

Resources