How to print webelements in a table - Selenium Python - python

This code will go to a website, launch it, extract 2 web-elements(email, and ticket#) and print them successfully.
from audioop import add
from inspect import isframe
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.support import expected_conditions as EC
#Firefox Driver
driver = webdriver.Firefox(service=Service(r'C:\geckodriver-v0.32.0-win-aarch64\geckodriver.exe'))
#Launches Ticketing website
driver.get('WebsiteURl')
wait = WebDriverWait(driver, 10)
#Switches to iFrame
iframe = driver.find_element(By.XPATH,'//*[#id="gsft_main"]')
driver.switch_to.frame(iframe)
#Calls for value in row (Email and Ticket #)
Email = driver.find_element(By.XPATH,"//table/tbody/tr[1]/td[8]")
Ticket = driver.find_element(By.XPATH,"//table/tbody/tr[1]/td[3]")
print(Ticket.text + " : " + Email.text)
This is the output:
TicketNumber001 : useremail#domain.com
The output works just as intended, but now I am looking to do this for the below columns utilizing the next 10 consecutive XPaths:
tr[1]/td[3]
...
..
.
tr[10]/td[3]
Which should look like this and I should be able to export that into a CSV to interact with a Powershell Script I have:
TicketNumber001 useremail#domain.com
...
..
.
TicketNumber010 useremail10#domain.com
I would appreciate your input, I'm a total newb with python and this is the first time using selenium.
Thank you,

If there are no other extra data present for your table, you could use find_elements to fetch only the table data directly.
Tickets = driver.find_elements(By.XPATH,"//table//td[3]")
This would give you an array of elements that you can manipulate.
You can iterate through the array to get the texts
for ticket in Tickets:
print("Ticket:"+ ticket.text)
Assuming your email and Ticket are always equal
Emails = driver.find_elements(By.XPATH,"//table//td[8]")
Tickets = driver.find_elements(By.XPATH,"//table//td[3]")
for i in range(0, len(Tickets)-1):
print(Tickets[i].text + " : " + Emails[i].text)

Related

Selenium - Iterate Through Grid Elements?

Working on a project to make reservations and I'm very rusty. I am able to navigate dynamically to the page for a reservation 2 weeks out, but I am unable to locate and click on the time slots.
My final line throws me an error, but my ultimate goal is to develop a code block that will iterate through the available time with some ranking system. For example, I set a ranked order of 8pm, 7:45pm, 7:30pm, 7:15pm, 8:15pm, etc. These time slots go fast, so I'll have to be able to handle the possibility of the reservation being gone or even taken while completing the checkout process.
I know this is a lot, so any help or guidance is appreciated!
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
import datetime
ResDate = datetime.date.fromordinal(datetime.date.today().toordinal()+14).strftime("%Y-%m-%d")
print(ResDate)
URL = "https://resy.com/cities/ny/lartusi-ny?date={}&seats=2".format(ResDate)
timeout = 30
driver = webdriver.Chrome()
driver.get(URL)
TimeBlock = WebDriverWait(driver, timeout).until(EC.element_to_be_clickable((By.PARTIAL_LINK_TEXT, '10:00')))
TimeBlock.click()
wait = WebDriverWait(driver, 3)
ranking_list=['8:00PM','7:45PM','10:00PM']
for rank in ranking_list:
try:
wait.until(EC.element_to_be_clickable((By.XPATH,f"//div[#class='ReservationButton__time' and text()='{rank}']"))).click()
wait.until(EC.frame_to_be_available_and_switch_to_it((By.XPATH,"//iframe[#aria-label='Book with Resy']")))
wait.until(EC.element_to_be_clickable((By.XPATH,"//button[./span[.='Reserve Now']]"))).click()
break
except:
print('No availability: ',rank)
Imports:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
Basically access each element of your ranking_list and then proceed to click on the reservation with that text. You can exit the loop if you can click on the reservation with break optional.
I didn't get your question about the ranking system. But for button clicking issue, try this code:
time_to_book = "10:15PM"
time_in_a_day = driver.find_elements(By.XPATH,"//*[#class='ReservationButton__time']")
# print(len(time_in_a_day))
time_text = []
for i in range(len(time_in_a_day)):
time_text.append(time_in_a_day[i].text)
for i in range(len(time_text)):
if time_text[i] == time_to_book:
element = driver.find_element(By.XPATH,"(//*[#class='ReservationButton__time'])[" + str(i + 1) + "]//parent::button")
driver.execute_script("arguments[0].click();", element)
break

How can I iterate through rows of web form using selenium?

Please I have a web form that I use selenium script to autofill in data row by row. Each time I run add_event_button, a new row is created, then I autofill the date_field to the remarks_field. All the fields within one row have similar ID attributes. The only difference is when you click the variable add_event_button, the new row has an ID increment of 1 across all fields.
Here is what I have done:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support.ui import Select
import time
import pandas as pd
from time import sleep
add_event_button = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="tab-voyage-log-12780"]/div[2]/a[2]'))).click()
time.sleep(2)
date_field = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#WGItem02_voyage_log-25_4')))
date_field.send_keys(date_time_row[1])
location_field = Select(driver.find_element(By.ID, 'WGItem09_voyage_log-2_4'))
location_field.select_by_index(8)
event_field = Select(driver.find_element(By.ID, 'WGItem04_voyage_log-2_4'))
event_field.select_by_index(4)
subevent_field = Select(driver.find_element(By.ID, 'WGItem05_voyage_log-2_4'))
subevent_field.select_by_index(3)
remarks_field = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#WGItem06_voyage_log-2_4')))
remarks_field.send_keys(remarks_data_col[1])
How do I iterate through the above block of code to run up to 20 times. For example; WGItem05_voyage_log-2_4 will run till it stops at WGItem05_voyage_log-20_4 and remarks_field.send_keys(remarks_data_col[1]) runs till remarks_field.send_keys(remarks_data_col[20]).
for i in range(2,21):
remarks_field = wait.until(EC.element_to_be_clickable((By.XPATH, f"//*[#id='WGItem06_voyage_log-{i}_4']")))
remarks_field.send_keys(remarks_data_col[i-1])
Did you mean looping it like so and using f string to access the element with that xpath.

python probleme of url web scraping

I want to learn python and for that I started with a small web scraping project.
I want to make a competitive scorecard for a travel agency, First of all here is the site link: tn.tunisiebooking.com
As you see, you have to fill out the form then a list of hotels will be displayed I managed to automate the search but I got stuck in the data extraction step, I don't know why it comes back and extract the data from the home page.
If you can help me and explain to me why it is going like this and thank you in advance. Here is the code I used:
import timer
from selenium.webdriver.common.action_chains import ActionChains
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from bs4 import BeautifulSoup
import requests
PATH="C:\chromedriver.exe"
driver = webdriver.Chrome(PATH)
driver.get('https://tn.tunisiebooking.com/')
wait = WebDriverWait(driver, 20)
# write script
script = "document.getElementById('ville_des').value ='Sousse';document.getElementById('depart').value ='05/08/2021';document.getElementById('checkin').value ='05/08/2021';document.getElementById('select_ch').value = '1';"
# generate a alert via javascript
driver.execute_script(script)
btn_rechercher = driver.find_element_by_id('boutonr')
btn_rechercher.click()
print(driver.current_url)
r = requests.get(driver.current_url)
soup = BeautifulSoup(r.text, 'html.parser')
results = soup.find_all('div', attrs={'class':'bloc_titre'})
len(results)
records = []
for result in results:
nom = result.find('a').text
records.append((nom))
len(records)
import pandas as pd
df = pd.DataFrame(records, columns=['nom'])
df.head()
For more details, this is the home page :
HomePage
and this is the page i want to scrape it's open after that i send a form with my destination and date :
hotelList
the probleme that the output of my code is showing the liste of the home page not the second :
Output
I hope that i made it clear now, Thank you.
this will get the names of the hotels using selenium only
from time import sleep
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
PATH = "C:\chromedriver.exe"
driver = webdriver.Chrome(PATH)
driver.get('https://tn.tunisiebooking.com/')
wait = WebDriverWait(driver, 20)
# write script //Your Script Seems fine
script = "document.getElementById('ville_des').value ='Sousse';document.getElementById('depart').value ='05/08/2021';document.getElementById('checkin').value ='05/08/2021';document.getElementById('select_ch').value = '1';"
# generate a alert via javascript
driver.execute_script(script)
btn_rechercher = driver.find_element_by_id('boutonr')
btn_rechercher.click()
sleep(10)
#getting the hotel names by xpath in a loop
for v in range(1, 20):
hotel_name = driver.find_element_by_xpath('/html/body/div[6]/div[2]/div[1]/div/div[2]/div/div[4]/div[' + str(v) + ']/div/div[3]/div[1]/div[1]/span/a/h3').get_attribute('innerHTML')
print(hotel_name)
I don't know what other details you want but this is an example of hotel names based on your input

Loop is not working properly for Selenium Python

i'm trying to run the following piece of code :
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
driver = webdriver.Chrome('C:/Users/SoumyaPandey/Desktop/Galytix/Scrapers/data_ingestion/chromedriver.exe')
driver.get('https://www.cnhindustrial.com/en-us/media/press_releases/Pages/default.aspx')
years_urls = list()
#ctl00_ctl33_g_8893c127_d0ad_40f2_9856_d85936172f35_years --> id for the year filter
years_elements = driver.find_element_by_id('ctl00_ctl33_g_8893c127_d0ad_40f2_9856_d85936172f35_years').find_elements_by_tag_name('a')
for i in range(len(years_elements)):
years_urls.append(years_elements[i].get_attribute('href'))
newslinks = list()
for k in range(len(years_urls)):
url = years_urls[k]
driver.get(url)
#link-detailpage --> id for the newslinks in each year
news = driver.find_elements_by_class_name('link-detailpage')
for j in range(len(news)):
newslinks.append(news[j].find_element_by_tag_name('a').get_attribute('href'))
when I run this code, the newslinks list is empty at the end of execution. But if I run it line by line, by assigning the value of 'k' one by one, on my own, it runs successfully.
Where am I going wrong in the logic. Please help.
It seems there is too much redundant code. I would suggest use either linear xpath or css selector to identify the elements.
However some of the pages the new link not appeared you need to handle this using try..except.
Since you need to navigate each url I would suggest use explicit wait WebDriverWait()
Code:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
driver=webdriver.Chrome("C:/Users/SoumyaPandey/Desktop/Galytix/Scrapers/data_ingestion/chromedriver.exe")
driver.get("https://www.cnhindustrial.com/en-us/media/press_releases/Pages/default.aspx")
allyears=WebDriverWait(driver,10).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR,"div#ctl00_ctl33_g_8893c127_d0ad_40f2_9856_d85936172f35_years a")))
yearsurl=[url.get_attribute("href") for url in allyears]
newslinks = list()
for yr in yearsurl:
driver.get(yr)
try:
for element in WebDriverWait(driver,5).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,"div.link-detailpage >a"))):
newslinks.append(element.get_attribute("href"))
except:
continue
print(newslinks)
OutPut:
['https://www.cnhindustrial.com/en-us/media/press_releases/2021/march/Pages/a-problem-solved-at-a-rate-of-knots-the-latest-Top-Story-available-on-CNHIndustrial-com.aspx', 'https://www.cnhindustrial.com/en-us/media/press_releases/2021/march/Pages/CNH-Industrial-acquires-a-minority-stake-in-Augmenta.aspx', 'https://www.cnhindustrial.com/en-us/media/press_releases/2021/march/Pages/CNH-Industrial-presents-YOUNIVERSE.aspx', 'https://www.cnhindustrial.com/en-us/media/press_releases/2021/march/Pages/Calling-of-the-Annual-General-Meeting.aspx', 'https://www.cnhindustrial.com/en-us/media/press_releases/2021/march/Pages/CNH-Industrial-completes-minority-investment-in-Monarch-Tractor.aspx', 'https://www.cnhindustrial.com/en-us/media/press_releases/2021/February/Pages/CNH-Industrial-N-V--announces-the-extension-by-one-additional-year-to-March-2026-of-its-syndicated-credit-facility.aspx', 'https://www.cnhindustrial.com/en-us/media/press_releases/2021/February/Pages/Working-for-a-safer-future-with-World-Class-Manufacturing.aspx', 'https://www.cnhindustrial.com/en-us/media/press_releases/2021/February/Pages/Behind-the-Wheel-CNH-Industrial-supports-the-growing-hemp-industry-in-North-America.aspx', 'https://www.cnhindustrial.com/en-us/media/press_releases/2021/February/Pages/CNH-Industrial-employees-in-Italy-to-receive-contractual-bonus-for-2020-results.aspx', 'https://www.cnhindustrial.com/en-us/media/press_releases/2021/February/Pages/2020-Fourth-Quarter-and-Full-Year-Results.aspx', 'https://www.cnhindustrial.com/en-us/media/press_releases/2021/january/Pages/The-Iveco-Defence-Vehicles-plant-in-Sete-Lagoas,-Brazil-and-the-New-Holland-Agriculture-facility-in-Croix,-France.aspx', 'https://www.cnhindustrial.com/en-us/media/press_releases/2021/january/Pages/CNH-Industrial-to-announce-2020-Fourth-Quarter-and-Full-Year-financial-results-on-February-3-2021.aspx', 'https://www.cnhindustrial.com/en-us/media/press_releases/2021/january/Pages/CNH-Industrial-publishes-its-2021-Corporate-Calendar.aspx', 'https://www.cnhindustrial.com/en-us/media/press_releases/2021/january/Pages/Iveco-Defence-Vehicles-supplies-third-generation-protected-military-GTF8x8-(ZLK-15t)-trucks-to-the-German-Army.aspx', 'https://www.cnhindustrial.com/en-us/media/press_releases/2021/january/Pages/STEYR-New-Holland-Agriculture-CASE-Construction-Equipment-and-FPT-Industrial-win-prestigious-2020-Good-Design%C2%AE-Awards.aspx', 'https://www.cnhindustrial.com/en-us/media/press_releases/2021/january/Pages/CNH-Industrial-completes-the-acquisition-of-four-divisions-of-CEG-in-South-Africa.aspx',so on...]
Update:
If you don't want use webdriverwait which is best practice then use time.sleep() since page needs some time to load and element should be visible before interacting it.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
driver = webdriver.Chrome("C:/Users/SoumyaPandey/Desktop/Galytix/Scrapers/data_ingestion/chromedriver.exe")
driver.get('https://www.cnhindustrial.com/en-us/media/press_releases/Pages/default.aspx')
years_urls = list()
time.sleep(5)
#ctl00_ctl33_g_8893c127_d0ad_40f2_9856_d85936172f35_years --> id for the year filter
years_elements = driver.find_elements_by_xpath('//div[#id="ctl00_ctl33_g_8893c127_d0ad_40f2_9856_d85936172f35_years"]//a')
for i in range(len(years_elements)):
years_urls.append(years_elements[i].get_attribute('href'))
print(years_urls)
newslinks = list()
for k in range(len(years_urls)):
url = years_urls[k]
driver.get(url)
time.sleep(3)
news = driver.find_elements_by_xpath('//div[#class="link-detailpage"]/a')
for j in range(len(news)):
newslinks.append(news[j].get_attribute('href'))
print(newslinks)
There is a popup asking you to accept cookies that you need to click beforehand.
Add this to your script:
WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.ID, "CybotCookiebotDialogBodyButtonAccept")))
driver.find_element_by_id("CybotCookiebotDialogBodyButtonAccept").click()
So the final result will be:
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
driver = webdriver.Chrome('C:/Users/SoumyaPandey/Desktop/Galytix/Scrapers/data_ingestion/chromedriver.exe')
driver.get('https://www.cnhindustrial.com/en-us/media/press_releases/Pages/default.aspx')
# this part is added, together with the necessary imports
WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.ID, "CybotCookiebotDialogBodyButtonAccept")))
driver.find_element_by_id("CybotCookiebotDialogBodyButtonAccept").click()
years_urls = list()
#ctl00_ctl33_g_8893c127_d0ad_40f2_9856_d85936172f35_years --> id for the year filter
# years_elements = driver.find_element_by_css_selector("#ctl00_ctl33_g_8893c127_d0ad_40f2_9856_d85936172f35_years")
years_elements = driver.find_element_by_id('ctl00_ctl33_g_8893c127_d0ad_40f2_9856_d85936172f35_years').find_elements_by_tag_name('a')
for i in range(len(years_elements)):
years_urls.append(years_elements[i].get_attribute('href'))
newslinks = list()
for k in range(len(years_urls)):
url = years_urls[k]
driver.get(url)
#link-detailpage --> id for the newslinks in each year
news = driver.find_elements_by_class_name('link-detailpage')
for j in range(len(news)):
newslinks.append(news[j].find_element_by_tag_name('a').get_attribute('href'))

Scraping a specific table in selenium

I am trying to scrape a table found inside a div on a page.
Basically here's my attempt so far:
# NOTE: Download the chromedriver driver
# Then move exe file on C:\Python27\Scripts
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import sys
driver = webdriver.Chrome()
driver.implicitly_wait(10)
URL_start = "http://www.google.us/trends/explore?"
date = '&date=today%203-m' # Last 90 days
location = "&geo=US"
symbol = sys.argv[1]
query = 'q='+symbol
URL = URL_start+query+date+location
driver.get(URL)
table = driver.find_element_by_xpath('//div[#class="line-chart"]/table/tbody')
print table.text
If I run the script, with an argument like "stackoverflow" I should be able to scrape this site: https://www.google.us/trends/explore?date=today%203-m&geo=US&q=stackoverflow
Apparently the xpath I have there is not working, the program is not printing anything, it's just plain blank.
I am basically in need on the values of the chart that appears on that website. And those values (and dates) are inside a table, here is a screenshot:
Could you help me locate the correct xpath of the table to retrieve those values using selenium on python?
Thanks in advance!
you can use Xpath As Follow:
//div[#class="line-chart"]/div/div[1]/div/div/table/tbody/tr
Here I will Refine my answer and make some changes in your code not it's work.
# NOTE: Download the chromedriver driver
# Then move exe file on C:\Python27\Scripts
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import sys
from lxml.html import fromstring,tostring
driver = webdriver.Chrome()
driver.implicitly_wait(20)
'''
URL_start = "http://www.google.us/trends/explore?"
date = '&date=today%203-m' # Last 90 days
location = "&geo=US"
symbol = sys.argv[1]
query = 'q='+symbol
URL = URL_start+query+date+location
'''
driver.get("https://www.google.us/trends/explore?date=today%203-m&geo=US&q=stackoverflow")
table_trs = driver.find_elements_by_xpath('//div[#class="line-chart"]/div/div[1]/div/div/table/tbody/tr')
for tr in table_trs:
#print tr.get_attribute("innerHTML").encode("UTF-8")
td = tr.find_elements_by_xpath(".//td")
if len(td)==2:
print td[0].get_attribute("innerHTML").encode("UTF-8") +"\t"+td[1].get_attribute("innerHTML").encode("UTF-8")

Categories

Resources