Why my Python code is extracting the same data for all the elements in my list?

Why my Python code is extracting the same data for all the elements in my list? - python

My project consists of making a competitive watch table for hotel rates for an agency. It is a painful action that I wanted to automate, the code extract correctly the name of hotels and the prices I want to extract but it's working correctly only for the first hotel and I don't know where is the problem. I provide you with the code and the output, if any of you can help me and thank you in advance.
NB : the code 2 works correctly but when i've added more operations the problem appeared
code 1
#!/usr/bin/env python
# coding: utf-8
import time
from time import sleep
import ast
import pandas as pd
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.common.exceptions import StaleElementReferenceException, NoSuchElementException
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
driver = webdriver.Chrome("C:\\Users\\marketing2\\Documents\\chromedriver.exe")
driver.get('https://tn.tunisiebooking.com/')
# params to select
params = {
'destination': 'Tozeur',
'date_from': '11/09/2021',
'date_to': '12/09/2021',
'bedroom': '1'
}
# select destination
destination_select = Select(WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, 'ville_des'))))
destination_select.select_by_value(params['destination'])
# select bedroom
bedroom_select = Select(WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, 'select_ch'))))
bedroom_select.select_by_value(params['bedroom'])
# select dates
script = f"document.getElementById('checkin').value ='{params['date_from']}';"
script += f"document.getElementById('checkout').value ='{params['date_to']}';"
script += f"document.getElementById('depart').value ='{params['date_from']}';"
script += f"document.getElementById('arrivee').value ='{params['date_to']}';"
driver.execute_script(script)
# submit form
btn_rechercher = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//*[#id="boutonr"]')))
btn_rechercher.click()
urls = []
hotels = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, "//div[starts-with(#id,'produit_affair')]")))
for hotel in hotels:
link = hotel.find_element_by_xpath(".//span[#class='tittre_hotel']/a").get_attribute("href")
urls.append(link)
for url in urls:
driver.get(url)
def existsElement(xpath):
try:
driver.find_element_by_id(xpath);
except NoSuchElementException:
return "false"
else:
return "true"
if (existsElement('result_par_arrangement')=="false"):
btn_t = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//*[#id="moteur_rech"]/form/div/div[3]/div')))
btn_t.click()
sleep(10)
else :
pass
try:
name = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//div[#class='bloc_titre_hotels']/h2"))).text
arropt = driver.find_element_by_xpath("//div[contains(#class,'line_result')][1]")
opt = arropt.find_element_by_tag_name("b").text
num = len(arropt.find_elements_by_tag_name("option"))
optiondata = {}
achats = {}
marges= {}
selection = Select(driver.find_element_by_id("arrangement"))
for i in range(num):
try:
selection = Select(driver.find_element_by_id("arrangement"))
selection.select_by_index(i)
time.sleep(2)
arr = driver.find_element_by_xpath("//select[#id='arrangement']/option[#selected='selected']").text
prize = driver.find_element_by_id("prix_total").text
optiondata[arr] = (int(prize))
btn_passe = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//*[#id="resultat"]/div/form/div/div[2]/div[1]/div[2]/div[2]/div')))
btn_passe.click()
# params to select
params = {
'civilite_acheteur': 'Mlle',
'prenom_acheteur': 'test',
'nom_acheteur': 'test',
'e_mail_acheteur': 'test#gmail.com',
'portable_acheteur': '22222222',
'ville_acheteur': 'Test',
}
# select civilite
civilite_acheteur = Select(WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.NAME, 'civilite_acheteur'))))
civilite_acheteur.select_by_value(params['civilite_acheteur'])
# saisir prenom
script = f"document.getElementsByName('prenom_acheteur')[0].value ='{params['prenom_acheteur']}';"
script += f"document.getElementsByName('nom_acheteur')[0].value ='{params['nom_acheteur']}';"
script += f"document.getElementsByName('e_mail_acheteur')[0].value ='{params['e_mail_acheteur']}';"
script += f"document.getElementsByName('portable_acheteur')[0].value ='{params['portable_acheteur']}';"
script += f"document.getElementsByName('ville_acheteur')[0].value ='{params['ville_acheteur']}';"
driver.execute_script(script)
# submit form
btn_agence = driver.find_element_by_id('titre_Nabeul')
btn_agence.click()
btn_continuez = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, 'boutonr')))
btn_continuez.click()
achat = int(driver.find_element_by_xpath('/html/body/header/div[2]/div[1]/div[1]/div[4]/div[2]/div[2]').text.replace(' TND', ''))
achats[arr]=achat
marge =int(((float(prize) - float(achat)) / float(achat)) * 100);
marges[arr]=marge
optiondata[arr]=prize,achat,marge
driver.get(url)
btn_display = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//*[#id="moteur_rech"]/form/div/div[3]/div')))
btn_display.click()
sleep(10)
except StaleElementReferenceException:
pass
except NoSuchElementException:
pass
s="- {} | {} : {}".format(name, opt, optiondata)
print(s)
ds = []
for l in s.splitlines():
d = l.split('-')
if len(d) > 1:
df = pd.DataFrame(ast.literal_eval(d[1].strip()))
ds.append(df)
for df in ds:
df.reset_index(drop=True, inplace=True)
df = pd.concat(ds, axis= 1)
cols = df.columns
cols = [((col.split('.')[0], col)) for col in df.columns]
df.columns=pd.MultiIndex.from_tuples(cols)
print(df.T)
#print("{} : {} - {}".format(name, opt, optiondata))
code 2
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import StaleElementReferenceException,NoSuchElementException
urls = []
hotels = driver.find_elements_by_xpath("//div[starts-with(#id,'produit_affair')]")
for hotel in hotels:
link = hotel.find_element_by_xpath(".//span[#class='tittre_hotel']/a").get_attribute("href")
urls.append(link)
for url in urls:
driver.get(url)
try:
name = driver.find_element_by_xpath("//div[#class='bloc_titre_hotels']/h2").text
arropt = driver.find_element_by_xpath("//div[contains(#class,'line_result')][1]")
opt = arropt.find_element_by_tag_name("b").text
num = len(arropt.find_elements_by_tag_name("option"))
optiondata = {}
selection = Select(driver.find_element_by_id("arrangement"))
for i in range(num):
try:
selection = Select(driver.find_element_by_id("arrangement"))
selection.select_by_index(i)
time.sleep(2)
arr = driver.find_element_by_xpath("//select[#id='arrangement']/option[#selected='selected']").text
prize = driver.find_element_by_id("prix_total").text
optiondata[arr]=prize
except StaleElementReferenceException:
pass
except NoSuchElementException:
pass
print("{} : {} - {} - {}".format(name,opt,num,optiondata))

Your code is outdated. The HTML has been changed/updated and elements such as the one with identity boutonr doesn't exist on the page anymore.
Your loop and order of execution is wrong so this makes the code evaluating still the same fields.
You should not use or at least minimise the usage of time.sleep() to an absolute minimum as it is a waste of time for your code execution. Use WebDriverWait(...) instead
I don't speak French so I could not understand what you are after in your code, but this minimised example below should help you to understand the principle.
#!/usr/bin/env python
# coding: utf-8
import time
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.common.exceptions import StaleElementReferenceException, NoSuchElementException
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
driver = webdriver.Chrome("C:\chromedriver.exe")
driver.get('https://tn.tunisiebooking.com/')
# params to select
params = { 'destination': 'Nabeul',
'date_from': '25/08/2021',
'date_to': '26/08/2021',
'bedroom': '1' }
# select destination
destination_select = Select(WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, 'ville_des'))))
destination_select.select_by_value(params['destination'])
# select bedroom
bedroom_select = Select(WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, 'select_ch'))))
bedroom_select.select_by_value(params['bedroom'])
# select dates
script = f"document.getElementById('checkin').value ='{params['date_from']}';"
script += f"document.getElementById('checkout').value ='{params['date_to']}';"
script += f"document.getElementById('depart').value ='{params['date_from']}';"
script += f"document.getElementById('arrivee').value ='{params['date_to']}';"
driver.execute_script(script)
# submit form
btn_rechercher = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//div[#onclick="return submit_hotel_recherche()"]')))
btn_rechercher.click()
urls = []
hotels = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, "//div[starts-with(#id,'produit_affair')]")))
for hotel in hotels:
link = hotel.find_element_by_xpath(".//span[#class='tittre_hotel']/a").get_attribute("href")
urls.append(link)
for url in urls:
driver.get(url)
try:
name = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//div[#class='bloc_titre_hotels']/h2"))).text
arropt = driver.find_element_by_xpath("//div[contains(#class,'line_result')][1]")
opt = arropt.find_element_by_tag_name("b").text
num = len(arropt.find_elements_by_tag_name("option"))
optiondata = {}
achats = {}
marges= {}
for i in range(num):
try:
selection = Select(WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, 'arrangement')))).select_by_index(i)
time.sleep(0.5)
arr = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//select[#id='arrangement']/option[#selected='selected']"))).text
prize = driver.find_element_by_id("prix_total").text
optiondata[arr] = int(prize)
except StaleElementReferenceException:
pass
print("{} : {} - {}".format(name, opt, optiondata))
except NoSuchElementException:
pass
driver.quit()
Result:
Byzance Nabeul : Chambre Double - {'All Inclusive soft': 93, 'Demi Pension': 38, 'Petit Dejeuner': 28, 'Pension Complete': 78}
Palmyra Club Nabeul Nabeul : Double Standard - {'All Inclusive soft': 92}
The following code goes to the payment page and extracts all the info there:
#!/usr/bin/env python
# coding: utf-8
import time
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.common.exceptions import StaleElementReferenceException, NoSuchElementException
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
driver = webdriver.Chrome("/usr/local/bin/chromedriver")
driver.get('https://tn.tunisiebooking.com/')
# params to select
params = {
'destination': 'Nabeul',
'date_from': '29/08/2021',
'date_to': '30/08/2021',
'bedroom': '1'
}
# select destination
destination_select = Select(WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, 'ville_des'))))
destination_select.select_by_value(params['destination'])
# select bedroom
bedroom_select = Select(WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, 'select_ch'))))
bedroom_select.select_by_value(params['bedroom'])
# select dates
script = f"document.getElementById('checkin').value ='{params['date_from']}';"
script += f"document.getElementById('checkout').value ='{params['date_to']}';"
script += f"document.getElementById('depart').value ='{params['date_from']}';"
script += f"document.getElementById('arrivee').value ='{params['date_to']}';"
driver.execute_script(script)
# submit form
btn_rechercher = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//div[#onclick="return submit_hotel_recherche()"]')))
btn_rechercher.click()
urls = []
hotels = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, "//div[starts-with(#id,'produit_affair')]")))
for hotel in hotels:
link = hotel.find_element_by_xpath(".//span[#class='tittre_hotel']/a").get_attribute("href")
urls.append(link)
for url in urls:
driver.get(url)
try:
name = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//div[#class='bloc_titre_hotels']/h2"))).text
arropt = driver.find_element_by_xpath("//div[contains(#class,'line_result')][1]")
opt = arropt.find_element_by_tag_name("b").text
num = len(arropt.find_elements_by_tag_name("option"))
optiondata = {}
achats = {}
marges= {}
try:
selection = Select(WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, 'arrangement'))))
time.sleep(0.5)
arr = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//select[#id='arrangement']/option[#selected='selected']"))).text
prize = driver.find_element_by_id("prix_total").text
optiondata[arr] = (int(prize))
btn_passe = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, 'resa')))
btn_passe.click()
tot = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, 'montant_total_apres_code')))
total = int(tot.text.replace(' €', ''))
# params to select
params = {
'civilite_acheteur': 'Mlle',
'prenom_acheteur': 'test',
'nom_acheteur': 'test',
'e_mail_acheteur': 'test#gmail.com',
'portable_acheteur': '22222222',
'ville_acheteur': 'Test',
}
# select civilite
civilite_acheteur = Select(WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.NAME, 'civilite_acheteur'))))
civilite_acheteur.select_by_value(params['civilite_acheteur'])
# saisir prenom
script = f"document.getElementsByName('prenom_acheteur')[0].value ='{params['prenom_acheteur']}';"
script += f"document.getElementsByName('nom_acheteur')[0].value ='{params['nom_acheteur']}';"
script += f"document.getElementsByName('e_mail_acheteur')[0].value ='{params['e_mail_acheteur']}';"
script += f"document.getElementsByName('portable_acheteur')[0].value ='{params['portable_acheteur']}';"
script += f"document.getElementsByName('ville_acheteur')[0].value ='{params['ville_acheteur']}';"
driver.execute_script(script)
# submit form
btn_agence = driver.find_element_by_class_name('continuez_resa')
btn_agence.click()
achat1 = int(driver.find_element_by_id('montant_a_payer').text.replace(' €', ''))
achat = int(driver.find_element_by_id('montant_restant').text.replace(' €', ''))
achat3 = float(driver.find_element_by_xpath('//div[#class="ligne_interne_total"]/div[3]/div[#class="prix_total1 text_shadow"]').text.replace(' TND', ''))
achats[arr]=achat
marge =int(((float(prize) - float(achat)) / float(achat)) * 100);
marges[arr]=marge
optiondata[arr]=prize,total,achat1,achat,achat3,marge
except StaleElementReferenceException:
pass
print("{} : {} - {}".format(name, opt, optiondata))
except NoSuchElementException:
pass
driver.quit()
Output:
Byzance Nabeul : Chambre Double - {'Petit Dejeuner': (36, 41, 12, 29, 4.0, 24)}
Where:
36 = Prix Total
41 = Montant Total
12 = Montant de l'acompte
29 = Vous payerez le reste à votre arrivée à l'hôtel
4.0 = Total taxe de séjour à payer sur place à l'hôtel est
24 = Marges
Hotel page:

You are using sleeps to load the pages in your first example but not in your second one (the one that you state works just fine).
This is typically not the way you want to actually use selenium and leads me to believe that your timing is off.
This SO answer shows you how to use "Explicit Waits" on "expected_conditions" to not have "specific timings" which can/will fail.
You even create a wait object but never use it.
Use it in conjunction with expected_conditions and remove the specific timed sleeps and things will get better.
expected_conditions docs are here

The problem was that it can't access to the element listing arrangements for the rest of the hotels in the list i've added a function that tests the presence of the data and it workod
for url in urls:
driver.get(url)
def existsElement(xpath):
try:
driver.find_element_by_id(xpath);
except NoSuchElementException:
return "false"
else:
return "true"
if (existsElement('result_par_arrangement')=="false"):
btn_t = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//*[#id="moteur_rech"]/form/div/div[3]/div')))
btn_t.click()
else :
pass

Related

Webscraping Multiple Pages in Python with Selenium - loop not working

I'm quite new to python and have written a script using selenium to scrape a website. I've tried everything but can't get the loop to cycle through pages. It currently just repeats the data on the first page 5 times. I want to scrape all the pages for 'BR1' any help would be great, currently the script below only scrapes the first page 5 times.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
with open('rightmove.csv', 'w') as file:
file.write('PropertyCardcontent \n')
PATH = ("/usr/local/bin/chromedriver")
driver = webdriver.Chrome(PATH)
driver.get("https://www.rightmove.co.uk/house-prices.html")
print(driver.title)
elem = driver.find_element(By.NAME, 'searchLocation') # Find the search box
elem.send_keys('BR1' + Keys.RETURN)
try:
content = WebDriverWait(driver, 15).until(
EC.presence_of_element_located((By.ID,'content'))
)
finally:
time.sleep(3)
for p in range(5):
sold = content.find_elements(By.CLASS_NAME, 'sold-prices-content-wrapper ')
for solds in sold:
address = solds.find_elements(By.CLASS_NAME, 'sold-prices-content ')
for addresses in address:
result = addresses.find_elements(By.CLASS_NAME, 'results ')
for results in result:
card = results.find_elements(By.CLASS_NAME,'propertyCard')
for propertyCard in card:
header = propertyCard.find_elements(By.CLASS_NAME,'propertyCard-content')
for propertyCardcontent in header:
road = propertyCardcontent.find_elements(By.CLASS_NAME,'title')
for propertyCardcontent in header:
road = propertyCardcontent.find_elements(By.CLASS_NAME,'subTitle')
for subtitle in road:
bed = subtitle.find_elements(By.CLASS_NAME, 'propertyType')
with open('rightmove.csv', 'a') as file:
for i in range(len(result)):
file.write(header[i].text + '\n')
button = driver.find_element(By.XPATH, '//*[#id="content"]/div[2]/div[2]/div[4]/div[27]/div[3]/div')
button.click()
file.close()
time.sleep(3)
driver.quit()

Since the website link has page number on it, I recommend you put the base url as "https://www.rightmove.co.uk/house-prices/br1.html?page=1", and loop through the pages while changing the last index of the url with methods like format string.
One other thing, you don't need to implement all those for loops, you can simply assign each variable to its specific value since everything you need is inside an html block which is easy to navigate on it.
Update:
I'm sorry for being late, had unexpected stuff(...).
I've made some changes as I use Brave, so make sure you select your browser, Chrome I believe, the chromedriver(ver:102) stays the same (or depending your Chrome version).
I've also got the Price and Date and stored them in a tuple.
Every record is stored in a list[Title, propertyType, tupleof(Price_Date)]
At the end, it creates a csv and stores everything inside with a ";" as delimter.
You can if you prefer split the price and date for later use, up to you.
Note: This looping method only applies to websites in which the number of page is included within the URL. In this case, both the key and number of page is included in the URL.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
import time
import random
import itertools
options = Options()
options.binary_location = r'C:\Program Files\BraveSoftware\Brave-Browser\Application\brave.exe'
driver = webdriver.Chrome(options = options, service = Service("chromedriver.exe"))
key_word = "BR1".lower()
base_url = f"https://www.rightmove.co.uk/house-prices/{key_word}.html?page=1"
driver.get(base_url)
#Number of pages
pages = driver.find_element(By.XPATH, '//span[#class="pagination-label"][2]').text
pages = int(pages.strip('of'))
WebDriverWait(driver, 15).until(
EC.presence_of_element_located((By.CLASS_NAME, 'results '))
)
data = []
pc = 0
for p in range(1,pages+1):
driver.get(f"https://www.rightmove.co.uk/house-prices/{key_word}.html?page={p}")
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, '//div//div[#class="propertyCard"]'))
)
propertyCards = driver.find_elements(By.XPATH, '//div//div[#class="propertyCard"]')
for propertyCard in propertyCards:
title = propertyCard.find_element(By.CLASS_NAME, 'title').text
propertyType = propertyCard.find_element(By.CLASS_NAME, 'propertyType').text
price_list = propertyCard.find_elements(By.CLASS_NAME, 'price')
date_list = propertyCard.find_elements(By.CLASS_NAME, 'date-sold')
data.append([title,propertyType])
for p, d in itertools.zip_longest(price_list, date_list , fillvalue = None):
try:
price = p.text
date = d.text
data[pc].append((price, date))
except Exception as e:
print(e)
pc+=1
time.sleep(random.randint(1,4))
print(data)
with open('rightmove.csv', 'w') as file:
header = "Title;propertyType;Price_Date\n"
file.write(header)
for record in data:
file.write("{};{};{}\n".format(record[0],record[1],record[2:]))
driver.quit()

You don't have to go down to dom elem by elem, you can just use xpath or class_name (if it's unique, otherwise it's better xpath or css-selector) and get the item you are looking for.
Anyway follow this:
import time
import selenium.webdriver as webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome("/usr/local/bin/chromedriver")
driver.get("https://www.rightmove.co.uk/house-prices.html")
# send query
query = "BR1"
search_bar = driver.find_element(By.XPATH, '//input[#class="searchBox ac_input"]')
search_bar.send_keys(query)
search_bar.send_keys(Keys.ENTER)
# wait to result been loaded
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CLASS_NAME, 'propertyCard'))
)
#get amount of pages
pages = driver.find_element(By.XPATH, '//span[#class="pagination-label"][2]').text
pages = int(pages.replace('of ', ''))
data = []
i = 1
while i <= pages:
WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//div[contains(text(), "Next")]'))
).click()
# wait page load result
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, '//div//div[#class="propertyCard"]'))
)
propertyCards = driver.find_elements(By.XPATH, '//div//div[#class="propertyCard"]')
# loop over result and store data
for propertyCard in propertyCards:
title = propertyCard.find_element(By.CLASS_NAME, 'title').text
propertyType = propertyCard.find_element(By.CLASS_NAME, 'propertyType').text
data.append((title, propertyType))
time.sleep(1)
i += 1
print("you reach the last page")
#get number of results
printf(data)
driver.close()
I use a list of tuple cause in your example you want store 2 item, if you want store more data you can use a dict and then convert into csv with Dictwriter directly. Enjoy.

How do I press load more button while scraping comments on Instagram with Selenium Python

I'm working on a project that can scrape comments off posts on instagram and write them into an excel file.
Here's my code:
from selenium.webdriver.common.by import By
from selenium import webdriver
import time
import sys
import pandas as pd
from pandas import ExcelWriter
import os.path
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
url = [
"https://www.instagram.com/p/CcVTqRtJ2gj/",
"https://www.instagram.com/p/CcXpLHepve-/",
]
user_names = []
user_comments = []
driver = driver = webdriver.Chrome("C:\chromedriver.exe")
driver.get(url[0])
time.sleep(3)
username = WebDriverWait(driver, 30).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='username']")))
password = WebDriverWait(driver, 30).until(EC.element_to_be_clickable((By.CSS_SELECTOR,"input[name='password']")))
username.clear()
username.send_keys("username")
password.clear()
password.send_keys("pwd")
Login_button = (
WebDriverWait(driver, 2)
.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button[type='submit']")))
.click()
)
time.sleep(4)
not_now = (
WebDriverWait(driver, 30)
.until(
EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Not Now")]'))
)
.click()
)
for n in url:
try:
driver.get(n)
time.sleep(3)
load_more_comment = driver.find_element_by_xpath("//button[class='wpO6b ']")
print("Found {}".format(str(load_more_comment)))
i = 0
while load_more_comment.is_displayed() and i < 10:
load_more_comment.click()
time.sleep(1.5)
load_more_comment = driver.find_element_by_xpath(
"//button[class='wpO6b ']"
)
print("Found {}".format(str(load_more_comment)))
i += 1
user_names.pop(0)
user_comments.pop(0)
except Exception as e:
print(e)
pass
comment = driver.find_elements_by_class_name("gElp9 ")
for c in comment:
container = c.find_element_by_class_name("C4VMK")
name = container.find_element_by_class_name("_6lAjh ").text
content = container.find_element_by_class_name("MOdxS ").text
content = content.replace("\n", " ").strip().rstrip()
user_names.append(name)
user_comments.append(content)
print(content)
user_names.pop(0)
user_comments.pop(0)
# export(user_names, user_comments)
driver.close()
df = pd.DataFrame(list(zip(user_names, user_comments)), columns=["Name", "Comments"])
# df.to_excel("Anime Content Engagement.xlsx")
print(df)
And the load-more-comments part, doesn't seem to work.
Since there are more than one buttons with the same class name, I"m not able to choose the right button to click on. And I'm a beginner so if there's anyone with any solution to how I can solve this it would be great.

you can select by aria-label text:
driver.find_element_by_css_selector("svg._8-yf5[aria-label='TEXT']")
i believe the text inside changes according to instagram language, put it according to what appears on your

Already complete scraping scrapes everything on the page. I would like to limit the scraping to only a certain section

I placed the code of a complete and properly functioning scraping that I own. Successfully scrapes all elements on the page.
However, I would like to scrape only a small limited section of the page with the same elements as scraping. This limited section is already scraped correctly along with all elements of the page, but I would like to scrape only it and not "all + it". The link is here
There are 4 tables on the page, but I would like to scrape just one, that is the table called "Programma", ie the html section "event-summary event" or "leagues-static event-summary-leagues ". But of this section only the elements of the last round (Matchday 14). Matchday 14 only. No round 15. So obviously that with each update of the page rounds, the last round is always scraped as well.
So I would need to insert something that makes scraping understand to download only the elements (which it already owns and scrapes) of of that section and the last round.
The code is already complete and works fine, so I'm not looking for code services, but for a little hint to tell me how to limit the scraping to just the section mentioned above. Scraping is in Selenium. I would like to stick with Selenium and my code as it is already functional and complete. Thanks
import selenium
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Firefox()
driver.get("url")
driver.implicitly_wait(12)
#driver.minimize_window()
wait = WebDriverWait(driver, 10)
all_rows = driver.find_elements(By.CSS_SELECTOR, "div[class^='event__round'],div[class^='event__match']")
current_round = '?'
for bundesliga in all_rows:
classes = bundesliga.get_attribute('class')
#print(classes)
if 'event__round' in classes:
#round = row.find_elements(By.CSS_SELECTOR, "[class^='event__round event__round--static']")
#current_round = row.text # full text `Round 20`
current_round = bundesliga.text.split(" ")[-1] # only `20` without `Round`
else:
datetime = bundesliga.find_element(By.CSS_SELECTOR, "[class^='event__time']")
#Divide la data e l'ora
date, time = datetime.text.split(" ")
date = date.rstrip('.') # right-strip to remove `.` at the end of date
team_home = bundesliga.find_element(By.CSS_SELECTOR, "[class^='event__participant event__participant--home']")
team_away = bundesliga.find_element(By.CSS_SELECTOR, "[class^='event__participant event__participant--away']")
score_home = bundesliga.find_element(By.CSS_SELECTOR, "[class^='event__score event__score--home']")
score_away = bundesliga.find_element(By.CSS_SELECTOR, "[class^='event__score event__score--away']")
bundesliga = [current_round, date, time, team_home.text, team_away.text, score_home.text, score_away.text]
bundesliga.append(bundesliga)
print(bundesliga)

I think all you need to do is limit all_rows variable. One way to do this is finding the tab you are looking for with text and then getting the parent elements.
import selenium
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
driver = webdriver.Firefox()
driver.get("https://www.someurl/some/other/page")
driver.implicitly_wait(12)
#driver.minimize_window()
wait = WebDriverWait(driver, 10)
# all_rows = driver.find_elements(By.CSS_SELECTOR, "div[class^='event__round'],div[class^='event__match']")
############### UPDATE ####################
def parent_element(element):
return element.find_element(By.XPATH, './..')
programma_element = WebDriverWait(driver, 10).until(
EC.visibility_of_element_located((By.XPATH, "//div[text()='Programma']")))
programma_element_p1 = parent_element(programma_element)
programma_element_p2 = parent_element(programma_element_p1)
programma_element_p3 = parent_element(programma_element_p2)
all_rows = programma_element_p3.find_elements(By.CSS_SELECTOR, "div[class^='event__round'],div[class^='event__match']")
filter_rows = []
for row in all_rows:
if "event__match--last" in row.get_attribute('class'):
filter_rows.append(row)
break
else:
filter_rows.append(row)
############### UPDATE ####################
current_round = '?'
for bundesliga in filter_rows:
classes = bundesliga.get_attribute('class')
#print(classes)
if 'event__round' in classes:
#round = row.find_elements(By.CSS_SELECTOR, "[class^='event__round event__round--static']")
#current_round = row.text # full text `Round 20`
current_round = bundesliga.text.split(" ")[-1] # only `20` without `Round`
else:
datetime = bundesliga.find_element(By.CSS_SELECTOR, "[class^='event__time']")
#Divide la data e l'ora
date, time = datetime.text.split(" ")
date = date.rstrip('.') # right-strip to remove `.` at the end of date
team_home = bundesliga.find_element(By.CSS_SELECTOR, "[class^='event__participant event__participant--home']")
team_away = bundesliga.find_element(By.CSS_SELECTOR, "[class^='event__participant event__participant--away']")
# score_home = bundesliga.find_element(By.CSS_SELECTOR, "[class^='event__score event__score--home']")
# score_away = bundesliga.find_element(By.CSS_SELECTOR, "[class^='event__score event__score--away']")
try:
score_home = bundesliga.find_element(By.CSS_SELECTOR, "[class^='event__score event__score--home']")
except (TimeoutException, NoSuchElementException):
MyObject = type('MyObject', (object,), {})
score_home = MyObject()
score_home.text = "-"
try:
score_away = bundesliga.find_element(By.CSS_SELECTOR, "[class^='event__score event__score--away']")
except (TimeoutException, NoSuchElementException):
MyObject = type('MyObject', (object,), {})
score_away = MyObject()
score_away.text = "-"
bundesliga = [current_round, date, time, team_home.text, team_away.text, score_home.text, score_away.text]
bundesliga.append(bundesliga)
print(bundesliga)

I am very new to scraping please bear with me and this is my 1st project. I am trying to scrape a site using selenium

"problem lines"
for_tariff_loop = driver.find_elements_by_xpath("//span[#class='phx-radio__element']")
radio_label_list = for_tariff_loop[i].find_element_by_css_selector('span[class="phx-radio__label"]')
print(radio_label_list)
time.sleep(1)
website I'm scraping https://www.telekom.de/unterwegs/apple/apple-iphone-13-pro/graphit-512gb
label image
I was not able to print the radio buttons label according to checked button. I don't know what is the mistake and where I did it. could anyone help on this. It will be helpful for me to learn. Change tariff links given below links,
import xlwt
from selenium import webdriver
import re
import time
from datetime import date
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
class telekommobiles:
def __init__(self):
self.url="https://www.telekom.de/mobilfunk/geraete/smartphone?page=1&pageFilter=promotion"
self.country='DE'
self.currency='GBP'
self.VAT='Included'
self.shipping = 'free shipping within 3-4 weeks'
self.Pre_PromotionPrice ='N/A'
self.color ='N/A'
def telekom(self):
#try:
driver=webdriver.Chrome()
driver.maximize_window()
driver.get(self.url)
today = date.today()
time.sleep(5)
cookies = driver.find_element_by_css_selector('button.cl-btn.cl-btn--accept-all').click()
print("cookies accepted")
links_prod_check = []
prod_models = []
prod_manufacturer =[]
prod_memorys = []
product_colors =[]
product_price_monthly_payments = []
product_price_one_time_payments =[]
product_links = []
containers = driver.find_elements_by_css_selector('div[class="styles_item__12Aw4"]')
i = 1
for container in containers:
p_links =container.find_element_by_tag_name('a').get_attribute('href')
i = i + 1
product_links.append(p_links)
#print(p_links)
for links in product_links:
driver.get(links)
#time.sleep(5)
#print(driver.current_url)
#links_prod_check.append(driver.current_url)
coloroptions = WebDriverWait(driver, 30).until(EC.presence_of_all_elements_located((By.XPATH,"//li[#data-qa='list_ColorVariant']")))
#print(coloroptions)
for i in range(len(coloroptions)):
coloroption = driver.find_elements_by_xpath("//li[#data-qa='list_ColorVariant']")
coloroption[i].click()
#print(coloroption[i])
time.sleep(3)
memoryoptions = WebDriverWait(driver, 30).until(EC.presence_of_all_elements_located((By.XPATH,"//span[#class='phx-radio__element']")))
for i in range(len(memoryoptions)):
memoryoption = driver.find_elements_by_xpath("//span[#class='phx-radio__element']")
try:
memoryoption[i].click()
except:
pass
time.sleep(5)
change_traiff = driver.find_element_by_css_selector('button[class="phx-link phx-list-of-links__link js-mod tracking-added"]').click()
time.sleep(3)
#looping for each section
section_loops = driver.find_elements_by_css_selector('section[class="tariff-catalog--layer"]')
#print(len(section_loops))
for section_loop in section_loops:
#print(section_loop)
time.sleep(5)
#Headings
heading_1 = section_loop.find_element_by_css_selector('h2[class="page-title page-title--lowercase"]').text
print(heading_1)
# looping for each separate boxes
each_box_subcontainers = section_loop.find_elements_by_css_selector('.phx-tariff-box__section')
#print(len(each_box_subcontainers))
for subcontainer in each_box_subcontainers:
#print(subcontainer)
looping_for_tariff = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH,"//span[#class='phx-radio__element']")))
#print(looping_for_tariff)
for i in range(len(looping_for_tariff)):
#print(i)
try:
for_tariff_loop = driver.find_elements_by_xpath("//span[#class='phx-radio__element']")
for_tariff_loop[i].click()
time.sleep(3)
except:
pass
for_tariff_loop = driver.find_elements_by_xpath("//span[#class='phx-radio__element']")
radio_label_list = for_tariff_loop[i].find_element_by_css_selector('span[class="phx-radio__label"]')
print(radio_label_list)
time.sleep(1)
change_traiff_close_button = driver.find_element_by_css_selector('span[class="icon-after-yellow-close right close popup-close-tr js-popup-close"]').click()
telekom_de=telekommobiles()
telekom_de.telekom()

You are trying to find element within an element. Finding radio_label_list using for_tariff_loop[i], xpath for radio_label_list will become like below:
//span[#class='phx-radio__element']//span[#class="phx-radio__label"]
Which does not exist in the DOM.
I tried the last part of the code. And was able to print the Memory size like below. Do try and confirm:
Replaced css-selector for radio_label_list with this xpath ./following-sibling::span
looping_for_tariff = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, "//span[#class='phx-radio__element']")))
# print(looping_for_tariff)
for i in range(len(looping_for_tariff)):
# print(i)
try:
for_tariff_loop = driver.find_elements_by_xpath("//span[#class='phx-radio__element']")
for_tariff_loop[i].click()
time.sleep(3)
except:
pass
for_tariff_loop = driver.find_elements_by_xpath("//span[#class='phx-radio__element']")
radio_label_list = for_tariff_loop[i].find_element_by_xpath("./following-sibling::span").text
print(radio_label_list)
time.sleep(1)
As per the comments, check this code:
driver.get("https://www.telekom.de/unterwegs/apple/apple-iphone-13-pro/graphit-512gb")
wait = WebDriverWait(driver,30)
wait.until(EC.element_to_be_clickable((By.XPATH,"//button[text()='Accept All']"))).click()
wait.until(EC.element_to_be_clickable((By.XPATH,"//ul[contains(#class,'phx-tariff-notification-box-new__element--desktop-tablet')]/li[2]/button"))).click()
length = len(driver.find_elements_by_class_name("phx-tariff-box__section"))
for i in range(length):
print("----------------------------------------------------------------------------------------------------------")
options = driver.find_elements_by_class_name("phx-tariff-box__section")
datas = options[i].find_element_by_xpath(".//div[contains(#class,'phx-tariff-box__volume')]").get_attribute("innerText")
print("data: {}".format(datas))
len_types = len(options[i].find_elements_by_xpath(".//div[#class='phx-tariff-box__radios-inner']//label"))
types = options[i].find_elements_by_xpath(".//div[#class='phx-tariff-box__radios-inner']//label")
if len(types) == 0:
price = options[i].find_element_by_xpath(".//p[#data-qa='block_TariffPrice']").get_attribute("innerText")
print(price)
else:
for j in range(len_types):
types[j].click()
time.sleep(2)
options = driver.find_elements_by_class_name("phx-tariff-box__section")
types = options[i].find_elements_by_xpath(".//div[#class='phx-tariff-box__radios-inner']//label")
try:
types[j].find_element_by_xpath("./input[#checked]")
type = types[j].find_element_by_xpath("./span[2]").get_attribute("innerText")
price = options[i].find_element_by_xpath(".//p[#data-qa='block_TariffPrice']").get_attribute("innerText")
print(f"{type}: {price}")
except:
pass

Waiting for data table to load after click / Selenium

I am trying to read the data table from the Indian Central Pollution Controal Board using selenium/python. Here is an example of output.
I am essentially following the approach presented here: https://github.com/RachitKamdar/Python-Scraper.
Thanks to #Prophet, I was able to read data from the first page (Select element using XPATH with Python?) but I cannot get selenium to wait for the data table to reload when switching to page 2.
I tried to add a webdriverwait instruction but this does seem to work. Any help would be greatly appreciated. Thanks
Here is what I tried to do
browser.find_element_by_tag_name("select").send_keys("100")
WebDriverWait(browser, timeout).until(EC.visibility_of_element_located((By.XPATH, "//*[#id='DataTables_Table_0_paginate']/span/a")))
maxpage = int(browser.find_elements(By.XPATH,"//*[#id='DataTables_Table_0_paginate']/span/a")[-1].text)
i = 1
while i < maxpage + 1:
browser.find_element(By.XPATH,"//*[#id='DataTables_Table_0_paginate']/span/a[contains(text(),'{}')]".format(i)).click()
WebDriverWait(browser, timeout).until(EC.visibility_of_element_located((By.ID,"DataTables_Table_0_wrapper")))
#this works ok for page 1
#this does not wait after the click for the data table to update. As a result res is wrong for page 2 [empty].
res = browser.page_source
soup = BeautifulSoup(res, 'html.parser')
soup = soup.find(id = 'DataTables_Table_0')
...
i = i + 1
Update 1:
Following Prophet's suggestion, I made the following modification:
browser.find_element_by_tag_name("select").send_keys("100")
WebDriverWait(browser, timeout).until(EC.visibility_of_element_located((By.ID,"DataTables_Table_0_wrapper")))
WebDriverWait(browser, timeout).until(EC.visibility_of_element_located((By.XPATH, "//*[#id='DataTables_Table_0_paginate']/span/a")))
maxpage = int(browser.find_elements(By.XPATH,"//*[#id='DataTables_Table_0_paginate']/span/a")[-1].text)
print(maxpage)
i = 1
while i < maxpage + 1:
WebDriverWait(browser, timeout).until(EC.visibility_of_element_located((By.ID,"DataTables_Table_0_wrapper")))
res = browser.page_source
soup = BeautifulSoup(res, 'html.parser')
soup = soup.find(id = 'DataTables_Table_0')
if i == 1:
data = getValsHtml(soup)
else:
data = data.append(getValsHtml(soup))
print(i)
print(data)
i = i + 1
browser.find_element(By.XPATH,'//a[#class="paginate_button next"]').click()
This still crashes on page 2 (data is empty). In addition, data should contain 100 items from page 1 but only contains 10. The maxpage number is correct (15).
Update 2:
here is the whole script after incorporating Prophet's recommendations [original script follows https://github.com/RachitKamdar/Python-Scraper].
This only retrieves 10 points from the first page and fails to switch to the next page.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from bs4 import BeautifulSoup
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import Select
def getValsHtml(table):
data = []
heads = table.find_all('th')
data.append([ele.text.strip() for ele in heads])
rows = table.find_all('tr')
for row in rows:
cols = row.find_all('td')
cols = [ele.text.strip() for ele in cols]
data.append([ele for ele in cols]) # Get rid of empty values
data.pop(1)
data = pd.DataFrame(data[1:],columns = data[0])
return data
def parameters(br,param):
br.find_element_by_class_name("list-filter").find_element_by_tag_name("input").send_keys(param)
br.find_elements_by_class_name("pure-checkbox")[1].click()
br.find_element_by_class_name("list-filter").find_element_by_tag_name("input").clear()
timeout = 60
url = 'https://app.cpcbccr.com/ccr/#/caaqm-dashboard-all/caaqm-landing/data'
chdriverpath="/net/f1p/my_soft/chromedriver"
option = webdriver.ChromeOptions()
browser = webdriver.Chrome(executable_path="{}".format(chdriverpath), chrome_options=option)
browser.get(url)
station="Secretariat, Amaravati - APPCB"
state="Andhra Pradesh"
city="Amaravati"
sd=['01', 'Jan', '2018']
ed=['31', 'Dec', '2021']
duration="24 Hours"
WebDriverWait(browser, timeout).until(EC.visibility_of_element_located((By.CLASS_NAME,"toggle")))
browser.find_elements_by_class_name("toggle")[0].click()
browser.find_element_by_tag_name("input").send_keys(state)
browser.find_element_by_class_name("options").click()
browser.find_elements_by_class_name("toggle")[1].click()
browser.find_element_by_tag_name("input").send_keys(city)
browser.find_element_by_class_name("options").click()
browser.find_elements_by_class_name("toggle")[2].click()
browser.find_element_by_tag_name("input").send_keys(station)
browser.find_element_by_class_name("options").click()
browser.find_elements_by_class_name("toggle")[4].click()
browser.find_element_by_class_name("filter").find_element_by_tag_name("input").send_keys(duration)
browser.find_element_by_class_name("options").click()
browser.find_element_by_class_name("c-btn").click()
for p in ['NH3']:
print(p)
try:
parameters(browser,p)
except:
print("miss")
browser.find_element_by_class_name("list-filter").find_element_by_tag_name("input").clear()
pass
browser.find_element_by_class_name("wc-date-container").click()
browser.find_element_by_class_name("month-year").click()
browser.find_element_by_id("{}".format(sd[1].upper())).click()
browser.find_element_by_class_name("year-dropdown").click()
browser.find_element_by_id("{}".format(int(sd[2]))).click()
browser.find_element_by_xpath('//span[text()="{}"]'.format(int(sd[0]))).click()
browser.find_elements_by_class_name("wc-date-container")[1].click()
browser.find_elements_by_class_name("month-year")[1].click()
browser.find_elements_by_id("{}".format(ed[1].upper()))[1].click()
browser.find_elements_by_class_name("year-dropdown")[1].click()
browser.find_element_by_id("{}".format(int(ed[2]))).click()
browser.find_elements_by_xpath('//span[text()="{}"]'.format(int(ed[0])))[1].click()
browser.find_elements_by_tag_name("button")[-1].click()
next_page_btn_xpath = '//a[#class="paginate_button next"]'
actions = ActionChains(browser)
#This is how you should treat the Select drop down
select = Select(browser.find_element_by_tag_name("select"))
select.select_by_value('100')
WebDriverWait(browser, timeout).until(EC.visibility_of_element_located((By.XPATH,'//div[#class="dataTables_wrapper no-footer"]')))
WebDriverWait(browser, timeout).until(EC.visibility_of_element_located((By.XPATH, "//*[#id='DataTables_Table_0_paginate']/span/a")))
maxpage = int(browser.find_elements(By.XPATH,"//*[#id='DataTables_Table_0_paginate']/span/a")[-1].text)
i = 1
while i < maxpage + 1:
res = browser.page_source
soup = BeautifulSoup(res, 'html.parser')
soup = soup.find(id = 'DataTables_Table_0')
if i == 1:
data = getValsHtml(soup)
else:
data = data.append(getValsHtml(soup))
print(i)
print(data)
i = i + 1
#scroll to the next page btn and then click it
next_page_btn = browser.find_element_by_xpath(next_page_btn_xpath)
actions.move_to_element(next_page_btn).perform()
browser.find_element(By.XPATH,next_page_btn).click()
browser.quit()

Instead of
browser.find_element(By.XPATH,"//*[#id='DataTables_Table_0_paginate']/span/a[contains(text(),'{}')]".format(i)).click()
Try clicking on this element:
browser.find_element(By.XPATH,'//a[#class="paginate_button next"]').click()
It's simply next page button and it fill not change per page you are on.
Also, instead of
WebDriverWait(browser, timeout).until(EC.visibility_of_element_located((By.ID,"DataTables_Table_0_wrapper")))
Try this
WebDriverWait(browser, timeout).until(EC.visibility_of_element_located((By.XPATH,'//div[#class="dataTables_wrapper no-footer"]')))
This element will be same for all the pages while that you trying to use defined for first page only.
UPD
The correct code should be like this:
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import Select
next_page_btn_xpath = '//a[#class="paginate_button next"]'
actions = ActionChains(driver)
#This is how you should treat the Select drop down
select = Select(driver.find_element_by_tag_name("select"))
select.select_by_value('100')
WebDriverWait(browser, timeout).until(EC.visibility_of_element_located((By.XPATH,'//div[#class="dataTables_wrapper no-footer"]')))
maxpage = int(browser.find_elements(By.XPATH,"//*[#id='DataTables_Table_0_paginate']/span/a")[-1].text)
i = 1
while i < maxpage + 1:
res = browser.page_source
soup = BeautifulSoup(res, 'html.parser')
soup = soup.find(id = 'DataTables_Table_0')
if i == 1:
data = getValsHtml(soup)
else:
data = data.append(getValsHtml(soup))
print(i)
print(data)
i = i + 1
#scroll to the next page btn and then click it
next_page_btn = driver.find_element_by_xpath(next_page_btn_xpath)
actions.move_to_element(next_page_btn).perform()
browser.find_element(By.XPATH,next_page_btn).click()

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Why my Python code is extracting the same data for all the elements in my list? - python

Related

Webscraping Multiple Pages in Python with Selenium - loop not working

How do I press load more button while scraping comments on Instagram with Selenium Python

Already complete scraping scrapes everything on the page. I would like to limit the scraping to only a certain section

I am very new to scraping please bear with me and this is my 1st project. I am trying to scrape a site using selenium

Waiting for data table to load after click / Selenium

Categories

Resources