Selenium getting wrong data

Selenium getting wrong data - python

I have a scraper to get movies from IMDB, it seems to work fine, but in some cases it gets the wrong data. And that doesn't always happen, but for example, it opens the page of the movie and takes the image of that movie, then the window is closed and a new one opens with another movie, but it ends up saving the image of the previous movie and in some cases does not get all the data. But that doesn't always happen, so I have no idea what might be happening. Can anyone give me an idea of what could be happening?
My code is this:
import re
from time import sleep
import csv
import sqlite3
import pickle
from turtle import title
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from os.path import exists
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import NoSuchElementException
from requests_html import HTML
import pandas
from slugify import slugify
da=[]
def parse_details(driver,asin,data_id):
sleep(2)
nulo='NULL'
try:
titulo_filme=driver.find_element(By.XPATH,'//h1[#data-testid="hero-title-block__title"]').text
slug_titulo = slugify(titulo_filme)
except:
titulo_filme=' '
try:
nota_imdb=driver.find_element(By.XPATH,'//div[#data-testid="hero-rating-bar__aggregate-rating__score"]/span[1]').text
except:
nota_imdb=' '
try:
#genum=driver.find_element(By.XPATH,'//*[#data-testid="storyline-genres"]/div/ul/li[1]').text
#gendois=driver.find_element(By.XPATH,'//*[#data-testid="storyline-genres"]/div/ul/li[2]').text
#gentres=driver.find_element(By.XPATH,'//*[#data-testid="storyline-genres"]/div/ul/li[3]').text
#genquatro=driver.find_element(By.XPATH,'//*[#data-testid="storyline-genres"]/div/ul/li[4]').text
#genero = genum + '- ' + gendois + '- ' + gentres + '- ' + genquatro
get_gen = driver.find_elements(By.XPATH,'//*[#data-testid="genres"]/div/a')
lista_gen = []
for gene in get_gen:
lista_gen.append(gene.text)
genero = lista_gen
except:
get_gen = driver.find_elements(By.XPATH,'//*[#data-testid="storyline-genres"]/div/ul/li/a')
lista_gen = []
for gene in get_gen:
lista_gen.append(gene.text)
genero = lista_gen
try:
data_lancamento=driver.find_element(By.XPATH,'//*[#data-testid="title-details-releasedate"]/div').text
except:
data_lancamento=''
try:
tempo_duracao=driver.find_element(By.XPATH,'//*[#data-testid="title-techspec_runtime"]/div').text
except:
tempo_duracao=''
try:
idioma=driver.find_elements(By.XPATH,'//*[#data-testid="title-details-languages"]/div/*/li/a')
lista_idioma = []
for idiom in idioma:
lista_idioma.append(idiom.text)
idioma=lista_idioma
except:
idioma=''
try:
empresa=driver.find_elements(By.XPATH,'//*[#data-testid="title-details-companies"]/div/*/li/a')
lista_empresa = []
for empres in empresa:
lista_empresa.append(empres.text)
empresa_produtora=lista_empresa
except:
empresa_produtora=''
try:
tbm_conhecido_como=driver.find_element(By.XPATH,'//*[#data-testid="title-details-akas"]/div').text
except:
tbm_conhecido_como=''
try:
pais=driver.find_elements(By.XPATH,'//*[#data-testid="title-details-origin"]/div/*/li/a')
lista_pais = []
for pai in pais:
lista_pais.append(pai.text)
pais_origem=lista_pais
except:
pais_origem=''
try:
som=driver.find_elements(By.XPATH,'//*[#data-testid="title-techspec_soundmix"]/div/*/li/a')
lista_som = []
for so in som:
lista_som.append(so.text)
codec_som=lista_som
except:
codec_som=''
try:
geet=driver.find_elements(By.XPATH,'//*[#data-testid="title-cast-item__actor"]')
lista = []
for gen in geet:
lista.append(gen.text)
elenco_principal = lista
except:
elenco_principal=''
try:
diretor_get=driver.find_elements(By.XPATH,'//*[#data-testid="title-pc-wide-screen"]//*[#data-testid="title-pc-principal-credit"]//*[contains(text(),"Direção")]/parent::li/div/ul/li')
diretor_get_criacao=driver.find_elements(By.XPATH,'//*[#data-testid="title-pc-wide-screen"]//*[#data-testid="title-pc-principal-credit"]//*[contains(text(),"Criação")]/parent::li/div/ul/li')
lista_diretor = []
for diret in diretor_get:
lista_diretor.append(diret.text)
for diret_criacao in diretor_get_criacao:
lista_diretor.append(diret_criacao.text)
diretor=lista_diretor
except:
diretor=''
try:
roteiristas_get=driver.find_elements(By.XPATH,'//*[#data-testid="title-pc-wide-screen"]//*[#data-testid="title-pc-principal-credit"]//*[contains(text(),"Roteiristas")]/parent::li/div/ul/li')
roteiristas_get_single=driver.find_elements(By.XPATH,'//*[#data-testid="title-pc-wide-screen"]//*[#data-testid="title-pc-principal-credit"]//*[contains(text(),"Roteirista")]/parent::li/div/ul/li')
lista_roteiristas = []
for roteiro in roteiristas_get:
lista_roteiristas.append(roteiro.text)
for roteiro_single in roteiristas_get_single:
lista_roteiristas.append(roteiro_single.text)
roteiristas=lista_roteiristas
except:
roteiristas=''
try:
artistas_get=driver.find_elements(By.XPATH,'//*[#data-testid="title-pc-wide-screen"]//*[#data-testid="title-pc-principal-credit"]//*[contains(text(),"Artistas")]/parent::li/div/ul/li')
lista_artistas_principal = []
for artist in artistas_get:
lista_artistas_principal.append(artist.text)
artistas_principal=lista_artistas_principal
except:
artistas_principal=''
try:
classif_indicativa=WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//*[#data-testid='storyline-certificate']/div/ul/li"))).text
except:
classif_indicativa=''
try:
slogan=driver.find_element(By.XPATH,'//*[#data-testid="storyline-taglines"]/div').text
except:
slogan=''
try:
img=WebDriverWait(driver, 2).until(EC.presence_of_element_located((By.XPATH, "//img[#class='ipc-image']"))).get_attribute('srcset')
except:
img='testeeee.jpg'
try:
des=driver.find_element(By.XPATH,'//meta[#name="description"]').get_attribute('content')
except:
des=''
try:
tipo=driver.find_element(By.XPATH,'//*[#data-testid="hero-title-block__metadata"]//*[contains(text(),"Minissérie")] | //*[#data-testid="hero-title-block__metadata"]//*[contains(text(),"Série de TV")] | //*[#data-testid="hero-title-block__metadata"]//*[contains(text(),"Especial de TV")]').text
except:
tipo='Filme'
try:
prime_video_link=driver.find_element(By.XPATH,'//*[#data-testid="tm-box-pwo-btn"]//div[contains(text(),"Prime Video")]/ancestor::a').get_attribute('href')
except:
try:
driver.find_element(By.XPATH,'//*[#data-testid="tm-box-mwo-btn"]//*[contains(text(),"Mais opções")]').click()
sleep(1)
prime_video_link=driver.find_element(By.XPATH,'//*[#data-testid="promptable"]//*[#data-focus-lock-disabled="false"]//*[contains(text(),"RENT/BUY")]/parent::div/ul/a').get_attribute('href')
except:
prime_video_link=''
da.append([nulo, data_id, titulo_filme, slug_titulo, tipo, nota_imdb, prime_video_link, data_lancamento, img, des, genero, tempo_duracao, idioma, empresa_produtora, tbm_conhecido_como, pais_origem, codec_som, elenco_principal, diretor, roteiristas, artistas_principal, classif_indicativa, slogan])
df=pandas.DataFrame(da,columns=['nulo','id','titulo_filme', 'slug_titulo', 'tipo', 'nota_imdb', 'prime_video_link', 'data_lancamento', 'imagem','descricao', 'genero', 'tempo_duracao', 'idioma', 'empresa_produtora', 'tbm_conhecido_como', 'pais_origem', 'codec_som', 'elenco_principal', 'diretor', 'roteiristas', 'artistas_principal', 'classif_indicativa', 'slogan'])
df.to_csv(f'{file_name}.csv',index=False)
print(da)
driver.close()
def collecting_links(linkss):
for link in linkss:
id_link = link.get_attribute('href')
#asi = id_link.split("/")[5]
dat_id=id_link.split('/')[-2]
driver.execute_script(f"window.open('{id_link}')")
driver.switch_to.window(driver.window_handles[-1])
parse_details(driver,asin='asi',data_id=dat_id)
driver.switch_to.window(driver.window_handles[0])
def main(driver):
sub_category_link=[]
driver.get(input_url)
#change_location()
links = driver.find_elements(By.XPATH, '//h3[#class="lister-item-header"]/a')
collecting_links(linkss=links)
while True:
try:
driver.find_element(By.LINK_TEXT,'Next »').click()
links = driver.find_elements(By.XPATH, '//h3[#class="lister-item-header"]/a')
collecting_links(linkss=links)
except:
print("Finalizado ---- Links:")
print(driver.find_element(By.LINK_TEXT,'Previous').get_attribute('href'))
break
if __name__ == "__main__":
print("Starting Scraper")
daa=[]
input_url=input('Enter your url: ')
file_name=input('Enter your file name: ')
daa.append([input_url,str(file_name)+'.csv'])
df=pandas.DataFrame(daa,columns=['','']).to_csv('scraped_url.txt',index=False,header=False,mode='a')
s = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.maximize_window()
main(driver)
sleep(2)

Related

Python Selenium .send_keys() only sending first character of my string

I was trying to automate a post to Facebook using Python Selenium, and it was 90% complete. The only issue is that the string I give is "test," but when Facebook posts, it just sends the first character of "test," which is "t."
This is the code:
#libraries
from selenium import webdriver
from selenium.webdriver.common.by import By
import selenium.webdriver.common.keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import bs4
from bs4 import BeautifulSoup as soup
from urllib.request import Request, urlopen
from time import sleep
import pyautogui
#fetching hashtags
def hashtags(hash_idea):
url = 'http://best-hashtags.com/hashtag/' + hash_idea
try:
req = Request(url, headers={'User-Agent' : 'Mozilla/5.0'})
page = urlopen(req, timeout=10)
page_html = page.read()
page.close()
page_soup = soup(page_html, 'html.parser')
result = page_soup.find('div',{'class':'tag-box tag-box-v3 margin-bottom-40'})
tags = result.decode()
start_index = tags.find('#')
end_index = tags.find('</p1>')
tags = tags[start_index:end_index]
return tags
except:
print('Something went wrong While Fetching hashtags')
def login(username, password):
try:
url = 'https://facebook.com'
driver.get(url)
user = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.NAME, 'email')))
user.send_keys(username)
pas = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.NAME, 'pass')))
pas.send_keys(password)
login_btn = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.NAME,'login')))
login_btn.click()
except:
print('Something went wrong while login process')
def upload(img_path,caption):
try:
btn1 = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[1]/div/div[3]/div/div/div/div[1]/div[1]/div/div[2]/div/div/div/div[3]/div/div[2]/div/div/div/div[1]/div/div[1]')))
btn1.click()
btn2= WebDriverWait(driver,20).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[1]/div/div[4]/div/div/div[1]/div/div[2]/div/div/div/form/div/div[1]/div/div/div/div[3]/div[1]/div[2]/div/div[1]/div/span/div/div/div[1]/div/div/div[1]/i')))
btn2.click()
btn3 = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[1]/div/div[4]/div/div/div[1]/div/div[2]/div/div/div/form/div/div[1]/div/div/div/div[2]/div[1]/div[2]/div/div[1]/div/div/div/div[1]/div/div/div/div[1]/div/i')))
btn3.click()
pyautogui.write(img_path)
pyautogui.press('enter')
cap = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[1]/div/div[4]/div/div/div[1]/div/div[2]/div/div/div/form/div/div[1]/div/div/div/div[2]/div[1]/div[1]/div[1]/div/div/div[1]')))
cap.send_keys(caption)
sleep(5) # this is mandatory while doing some thing with bot
btn_post = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[1]/div/div[4]/div/div/div[1]/div/div[2]/div/div/div/form/div/div[1]/div/div/div/div[3]/div[2]/div/div/div[1]/div')))
btn_post.click()
except:
print('Something Went Wrong While posting the image or video')
if __name__== "__main__":
#turn for credentials, driver, and caption
username = input('username : ')
password = input('pass : ')
img_path = 'pic1.jpg'
hash_idea = 'covid'
caption = 'test' # if you want to
caption = caption + '\n' + hashtags(hash_idea)
driver = webdriver.Firefox(executable_path="C:/Users/Asus/Downloads/Compressed/geckodriver-v0.32.0-win64/geckodriver.exe")
login(username,password)
upload(img_path,caption)
I wanted to automate the post with the text I provided in the code.

You can try several alternatives
In the definition of cap replace presence_of_element_located with element_to_be_clickable.
Do what in 1. and moreover add
cap = ...
cap.clear()
cap.click()
cap.send_keys(caption)
Do what in 1. and moreover use ActionChains
from selenium.webdriver.common.action_chains import ActionChains
actions = ActionChains(driver)
cap = ...
actions.move_to_element(cap) # move the mouse to the middle of element
actions.click()
actions.send_keys(caption).perform()
If none works, then you can always send one character at a time
[cap.send_keys(c) for c in caption]

Data are overwrite how to solve that

With every iteration through the loop, the previously extracted data is overwritten. How can I solve this problem?
from selenium import webdriver
import time
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support.select import Select
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
# url='https://www.amazon.com/dp/B00M0DWQYI?th=1'
# url='https://www.amazon.com/dp/B010RWD4GM?th=1'
PATH="C:\Program Files (x86)\chromedriver.exe"
driver =webdriver.Chrome(PATH)
df_urls = pd.read_csv('D:/selenium/inputs/amazone-asin.csv',encoding='utf-8')
list_dicts_urls =df_urls.to_dict('records')
item=dict()
product=[]
for url in list_dicts_urls:
product_url = 'https://' + url['MARKETPLACE'] + '/dp/' + url['ASIN']
driver.get(product_url)
try:
item['title'] = driver.find_element(By.CSS_SELECTOR,'span#productTitle').text
except:
item['title'] = ''
try:
item['brand'] = driver.find_element(By.CSS_SELECTOR,'a#bylineInfo').text.replace('Visit the','').replace('Store','').strip()
except:
item['brand'] = ''
try:
rating = driver.find_element(By.CSS_SELECTOR,'span#acrCustomerReviewText').text.replace('ratings','').strip()
rating = int(rating.replace(',', ''))
item['rating'] = rating
except:
item['rating'] = ''
time.sleep(2)
try:
p1=driver.find_element(By.XPATH, '//span[#class="a-price-whole"]').text
p2= driver.find_element(By.XPATH, '//span[#class="a-price-fraction"]').text
item['price']=p1+p2
except:
item['price']=''
product.append(item)
df=pd.DataFrame(product)
df.to_csv("ama.csv")

I think you need to define item=dict() inside the for loop. Otherwise this is the same, single item object used in all the loop iterations.
Try this:
from selenium import webdriver
import time
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support.select import Select
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
# url='https://www.amazon.com/dp/B00M0DWQYI?th=1'
# url='https://www.amazon.com/dp/B010RWD4GM?th=1'
PATH="C:\Program Files (x86)\chromedriver.exe"
driver =webdriver.Chrome(PATH)
df_urls = pd.read_csv('D:/selenium/inputs/amazone-asin.csv',encoding='utf-8')
list_dicts_urls =df_urls.to_dict('records')
product=[]
for url in list_dicts_urls:
item=dict()
product_url = 'https://' + url['MARKETPLACE'] + '/dp/' + url['ASIN']
driver.get(product_url)
try:
item['title'] = driver.find_element(By.CSS_SELECTOR,'span#productTitle').text
except:
item['title'] = ''
try:
item['brand'] = driver.find_element(By.CSS_SELECTOR,'a#bylineInfo').text.replace('Visit the','').replace('Store','').strip()
except:
item['brand'] = ''
try:
rating = driver.find_element(By.CSS_SELECTOR,'span#acrCustomerReviewText').text.replace('ratings','').strip()
rating = int(rating.replace(',', ''))
item['rating'] = rating
except:
item['rating'] = ''
time.sleep(2)
try:
p1=driver.find_element(By.XPATH, '//span[#class="a-price-whole"]').text
p2= driver.find_element(By.XPATH, '//span[#class="a-price-fraction"]').text
item['price']=p1+p2
except:
item['price']=''
product.append(item)
df=pd.DataFrame(product)
df.to_csv("ama.csv")

web scraping with python selenium loop and save problem

'Hi,I want to save the data I took as csv and txt, but I couldn't.
Moreover;
How can I repeat this process multiple times?'
nextInput = driver.find_element("xpath",'//*[#id="pnnext"]/span[2]').click()
result = driver.find_elements(By.CSS_SELECTOR, ".GyAeWb cite.iUh30")
'
Code;
'
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import pandas as pd
import time
import csv
import re
driver = webdriver.Chrome()
url ="http://google.com"
driver.get(url)
searchInput = driver.find_element("xpath",'/html/body/div[1]/div[3]/form/div[1]/div[1]/div[1]/div/div[2]/input')
time.sleep(1)
searchInput.send_keys("dişçi")
time.sleep(2)
searchInput.send_keys(Keys.ENTER)
time.sleep(2)
result = driver.page_source
result = driver.find_elements(By.CSS_SELECTOR, ".GyAeWb cite.iUh30")
for index,element in enumerate (result):
print(index+1,element.text)
result = []
result = list(set(result))
time.sleep(2)
nextInput = driver.find_element("xpath",'//*[#id="pnnext"]/span[2]').click()
result = driver.find_elements(By.CSS_SELECTOR, ".GyAeWb cite.iUh30")
for index,element in enumerate (result):
print(index+1,element.text)
count = 1
with open("siteler.txt","w",encoding="UTF-8") as file:
for item in result:
file.write(f"{count}-{item}\n")
count+=1
driver.close()

try:
while 1:
nextInput = driver.find_element("xpath",'//*[#id="pnnext"]/span[2]').click()
result = driver.find_elements(By.CSS_SELECTOR, ".GyAeWb cite.iUh30")
for index,element in enumerate (result):
print(index+1,element.text)
count = 1
with open("siteler.txt","w",encoding="UTF-8") as file:
for item in result:
file.write(f"{count}-{item}\n")
count+=1
except Exception as e:
print(e)
finally:
print("there is no element with '//*[#id='pnnext']/span[2]' XPATH")

I am very new to scraping please bear with me and this is my 1st project. I am trying to scrape a site using selenium

"problem lines"
for_tariff_loop = driver.find_elements_by_xpath("//span[#class='phx-radio__element']")
radio_label_list = for_tariff_loop[i].find_element_by_css_selector('span[class="phx-radio__label"]')
print(radio_label_list)
time.sleep(1)
website I'm scraping https://www.telekom.de/unterwegs/apple/apple-iphone-13-pro/graphit-512gb
label image
I was not able to print the radio buttons label according to checked button. I don't know what is the mistake and where I did it. could anyone help on this. It will be helpful for me to learn. Change tariff links given below links,
import xlwt
from selenium import webdriver
import re
import time
from datetime import date
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
class telekommobiles:
def __init__(self):
self.url="https://www.telekom.de/mobilfunk/geraete/smartphone?page=1&pageFilter=promotion"
self.country='DE'
self.currency='GBP'
self.VAT='Included'
self.shipping = 'free shipping within 3-4 weeks'
self.Pre_PromotionPrice ='N/A'
self.color ='N/A'
def telekom(self):
#try:
driver=webdriver.Chrome()
driver.maximize_window()
driver.get(self.url)
today = date.today()
time.sleep(5)
cookies = driver.find_element_by_css_selector('button.cl-btn.cl-btn--accept-all').click()
print("cookies accepted")
links_prod_check = []
prod_models = []
prod_manufacturer =[]
prod_memorys = []
product_colors =[]
product_price_monthly_payments = []
product_price_one_time_payments =[]
product_links = []
containers = driver.find_elements_by_css_selector('div[class="styles_item__12Aw4"]')
i = 1
for container in containers:
p_links =container.find_element_by_tag_name('a').get_attribute('href')
i = i + 1
product_links.append(p_links)
#print(p_links)
for links in product_links:
driver.get(links)
#time.sleep(5)
#print(driver.current_url)
#links_prod_check.append(driver.current_url)
coloroptions = WebDriverWait(driver, 30).until(EC.presence_of_all_elements_located((By.XPATH,"//li[#data-qa='list_ColorVariant']")))
#print(coloroptions)
for i in range(len(coloroptions)):
coloroption = driver.find_elements_by_xpath("//li[#data-qa='list_ColorVariant']")
coloroption[i].click()
#print(coloroption[i])
time.sleep(3)
memoryoptions = WebDriverWait(driver, 30).until(EC.presence_of_all_elements_located((By.XPATH,"//span[#class='phx-radio__element']")))
for i in range(len(memoryoptions)):
memoryoption = driver.find_elements_by_xpath("//span[#class='phx-radio__element']")
try:
memoryoption[i].click()
except:
pass
time.sleep(5)
change_traiff = driver.find_element_by_css_selector('button[class="phx-link phx-list-of-links__link js-mod tracking-added"]').click()
time.sleep(3)
#looping for each section
section_loops = driver.find_elements_by_css_selector('section[class="tariff-catalog--layer"]')
#print(len(section_loops))
for section_loop in section_loops:
#print(section_loop)
time.sleep(5)
#Headings
heading_1 = section_loop.find_element_by_css_selector('h2[class="page-title page-title--lowercase"]').text
print(heading_1)
# looping for each separate boxes
each_box_subcontainers = section_loop.find_elements_by_css_selector('.phx-tariff-box__section')
#print(len(each_box_subcontainers))
for subcontainer in each_box_subcontainers:
#print(subcontainer)
looping_for_tariff = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH,"//span[#class='phx-radio__element']")))
#print(looping_for_tariff)
for i in range(len(looping_for_tariff)):
#print(i)
try:
for_tariff_loop = driver.find_elements_by_xpath("//span[#class='phx-radio__element']")
for_tariff_loop[i].click()
time.sleep(3)
except:
pass
for_tariff_loop = driver.find_elements_by_xpath("//span[#class='phx-radio__element']")
radio_label_list = for_tariff_loop[i].find_element_by_css_selector('span[class="phx-radio__label"]')
print(radio_label_list)
time.sleep(1)
change_traiff_close_button = driver.find_element_by_css_selector('span[class="icon-after-yellow-close right close popup-close-tr js-popup-close"]').click()
telekom_de=telekommobiles()
telekom_de.telekom()

You are trying to find element within an element. Finding radio_label_list using for_tariff_loop[i], xpath for radio_label_list will become like below:
//span[#class='phx-radio__element']//span[#class="phx-radio__label"]
Which does not exist in the DOM.
I tried the last part of the code. And was able to print the Memory size like below. Do try and confirm:
Replaced css-selector for radio_label_list with this xpath ./following-sibling::span
looping_for_tariff = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, "//span[#class='phx-radio__element']")))
# print(looping_for_tariff)
for i in range(len(looping_for_tariff)):
# print(i)
try:
for_tariff_loop = driver.find_elements_by_xpath("//span[#class='phx-radio__element']")
for_tariff_loop[i].click()
time.sleep(3)
except:
pass
for_tariff_loop = driver.find_elements_by_xpath("//span[#class='phx-radio__element']")
radio_label_list = for_tariff_loop[i].find_element_by_xpath("./following-sibling::span").text
print(radio_label_list)
time.sleep(1)
As per the comments, check this code:
driver.get("https://www.telekom.de/unterwegs/apple/apple-iphone-13-pro/graphit-512gb")
wait = WebDriverWait(driver,30)
wait.until(EC.element_to_be_clickable((By.XPATH,"//button[text()='Accept All']"))).click()
wait.until(EC.element_to_be_clickable((By.XPATH,"//ul[contains(#class,'phx-tariff-notification-box-new__element--desktop-tablet')]/li[2]/button"))).click()
length = len(driver.find_elements_by_class_name("phx-tariff-box__section"))
for i in range(length):
print("----------------------------------------------------------------------------------------------------------")
options = driver.find_elements_by_class_name("phx-tariff-box__section")
datas = options[i].find_element_by_xpath(".//div[contains(#class,'phx-tariff-box__volume')]").get_attribute("innerText")
print("data: {}".format(datas))
len_types = len(options[i].find_elements_by_xpath(".//div[#class='phx-tariff-box__radios-inner']//label"))
types = options[i].find_elements_by_xpath(".//div[#class='phx-tariff-box__radios-inner']//label")
if len(types) == 0:
price = options[i].find_element_by_xpath(".//p[#data-qa='block_TariffPrice']").get_attribute("innerText")
print(price)
else:
for j in range(len_types):
types[j].click()
time.sleep(2)
options = driver.find_elements_by_class_name("phx-tariff-box__section")
types = options[i].find_elements_by_xpath(".//div[#class='phx-tariff-box__radios-inner']//label")
try:
types[j].find_element_by_xpath("./input[#checked]")
type = types[j].find_element_by_xpath("./span[2]").get_attribute("innerText")
price = options[i].find_element_by_xpath(".//p[#data-qa='block_TariffPrice']").get_attribute("innerText")
print(f"{type}: {price}")
except:
pass

what happens when find_elements can't find the class?

I am trying to find a particular class on a website. The class is sometimes present and sometimes it is absent.
So when the class is present, it takes a few seconds for the script to locate the element(logo). When the class is not present,the script runs for a long time and then end.
Why is that? is there any way to speed it up when the class doesn't exist?
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from time import sleep
chrome_path = r"C:\Users\peter\Desktop\chromedriver.exe"
driver = webdriver.Chrome(executable_path=r"C:\Users\peter\Desktop\chromedriver.exe")
driver.get("https://example.com/app/login")
driver.minimize_window()
driver.implicitly_wait(300)
input_email = driver.find_element_by_xpath("//input[#type='email']")
input_email.send_keys('example#gmail.com')
input_password = driver.find_element_by_xpath("//input[#type='password']")
input_password.send_keys('example')
click_login = driver.find_element_by_xpath("//button[#type='submit']")
click_login.click()
driver.find_element_by_id("schedule-today").click()
sleep(2)
logo = driver.find_elements_by_xpath( "//*[contains(#class, 'lbl_lesson_status label label-info lbl_lesson_open')]" );
if not logo:
print("empty")
f = open("reserved_date", "a+")
for i in logo:
opendate = i.get_attribute("data-t-start-local");
f.write((opendate)+'\n')
print(opendate)
driver.close()

You Need To Add Wait And Add Try Except for example if element not found throw message and quit that script
I Simply Code For You!
Try This Code:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import *
from selenium.webdriver.common.keys import Keys
import time
chrome_path = r"C:\Users\peter\Desktop\chromedriver.exe"
driver = webdriver.Chrome(executable_path=r"C:\Users\peter\Desktop\chromedriver.exe")
driver.get("https://example.com/app/login")
driver.minimize_window()
try:
input_email = WebDriverWait(driver,20).until(EC.element_to_be_clickable((By.XPATH,"//input[#type='email']")))
input_email.send_keys('example#gmail.com')
except (TimeoutException,NoSuchElementException):
print('There is No Email Input!')
quit()
try:
input_password = WebDriverWait(driver,20).until(EC.element_to_be_clickable((By.XPATH,"//input[#type='password']")))
input_password.send_keys('example')
except (TimeoutException,NoSuchElementException):
print('There is No Password Input!')
quit()
try:
click_login = WebDriverWait(driver,20).until(EC.element_to_be_clickable((By.XPATH,"//button[#type='submit']")))
click_login.click()
except (TimeoutException,NoSuchElementException):
print('There is No Login Button!')
quit()
try:
WebDriverWait(driver,20).until(EC.element_to_be_clickable((By.CSS_SELECTOR,"#schedule-today")))
time.sleep(2)
except (TimeoutException,NoSuchElementException):
print("Can't Find schedule-today id!")
quit()
try:
logo = WebDriverWait(driver,20).until(EC.element_to_be_clickable((By.XPATH,"//*[contains(#class, 'lbl_lesson_status label label-info lbl_lesson_open')]")))
f = open("reserved_date", "a+")
for i in logo:
opendate = i.get_attribute("data-t-start-local");
f.write((opendate)+'\n')
print(opendate)
except (TimeoutException,NoSuchElementException):
print("Can't Find Logo Button!")
quit()
driver.close()

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Selenium getting wrong data - python

Related

Python Selenium .send_keys() only sending first character of my string

Data are overwrite how to solve that

web scraping with python selenium loop and save problem

I am very new to scraping please bear with me and this is my 1st project. I am trying to scrape a site using selenium

what happens when find_elements can't find the class?

Categories

Resources