Scrape Name & Address from website site using Selenium Python - python

I would like to scrape the "Name" & "Address" from the following site:
https://register.fca.org.uk/s/firm?id=001b000000MfNWNAA3
However I am struggling with the referencing the correct field within the page and returning the results
Where I need your help is, to provide a working solution where the query, grabs the "name" from the webpage and provides the output of the "name"
Code:
import string
import pandas as pd
from lxml import html
from bs4 import BeautifulSoup
from urllib.request import Request, urlopen
from IPython.core.display import display, HTML
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
Example Reference:
driver = webdriver.Chrome(chrome_options = options, executable_path=r'C:\Downloads\chromedriver.exe')
driver.get("https://register.fca.org.uk/s/firm?id=001b000000MfNWNAA3")
title = driver.find_elements(By.CSS_SELECTOR,'.slds-media__body h1 > a')
print(title.text)
Looking forward to your help!

Use webdriverwait and wait for visibility of element located.
driver.get("https://register.fca.org.uk/s/firm?id=001b000000MfNWNAA3")
name=WebDriverWait(driver,10).until(EC.visibility_of_element_located((By.CSS_SELECTOR,".slds-media__body h1"))).text
print(name)
address=WebDriverWait(driver,10).until(EC.visibility_of_element_located((By.CSS_SELECTOR,"h4[data-aura-rendered-by] ~p:nth-of-type(1)"))).text
print(address)
you need to import below libaries.
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait

To extract the Name and Address ideally you need to induce WebDriverWait for the visibility_of_element_located() and you can use either of the following locator strategies:
Using Name:
driver.get('https://register.fca.org.uk/s/firm?id=001b000000MfQU0AAN')
print(WebDriverWait(driver, 5).until(EC.visibility_of_element_located((By.XPATH, "//h1"))).text)
Using Address:
driver.get('https://register.fca.org.uk/s/firm?id=001b000000MfQU0AAN')
print(WebDriverWait(driver, 5).until(EC.visibility_of_element_located((By.XPATH, "//h4[.//div[contains(., 'Address')]]//following-sibling::p[1]"))).text)
Console Output:
Mason Owen and Partners Ltd
Unity Building
20 Chapel Street
Liverpool
Merseyside
L3 9AG
L 3 9 A G
Note : You have to add the following imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
You can find a relevant discussion in How to retrieve the text of a WebElement using Selenium - Python

In addition to using WebDriverWait and visibility_of_element_located like others are suggesting, it's sometimes necessary to scroll an item into view.
This is a little function to make it more convenient to execute the JavaScript that does it:
def scrollto(element):
driver.execute_script("return arguments[0].scrollIntoView(true);", element)

Related

Python Selenium send_keys writes only one character in WhatsaApp Web's message box

I'm trying to send automatic messages on WhatsApp Web (MacOS, Firefox), I want to use Selenium (pywhatkit also works), but send_keys sends only the first one character.
All the "import" of the code
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.firefox.service import Service
import time
TXT_BAR='/html/body/div[1]/div/div/div[4]/div/footer/div[1]/div/[2]/div/div[2]/div[1]'
txt_bar = wait.until(EC.presence_of_element_located((By.XPATH, TXT_BAR)))
txt_bar.click()
txt_bar.clear()
txt_bar.send_keys("help")
time.sleep(5)
txt_bar.send_keys(Keys.ENTER)
time.sleep(2)
#output -> h
edited code:
txt_bar = wait.until(EC.element_to_be_clickable((By.XPATH, TXT_BAR)))
txt_bar.click()
txt_bar.clear()
txt_bar.send_keys("help")
txt_bar.send_keys(Keys.ENTER)
#output -> h
WhatsApp web elements are dynamic elements. Ideally, to send a character sequence to the element instead of presence_of_element_located() you need to induce WebDriverWait for the element_to_be_clickable() and you can use the following locator strategy:
txt_bar = wait.until(EC.element_to_be_clickable((By.XPATH, TXT_BAR)))
txt_bar.click()
txt_bar.clear()
txt_bar.send_keys("help")
txt_bar.send_keys(Keys.ENTER)
Note: You have to add the following imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC

How to generate the search results in fbref.com sending text to the search field using Python Selenium and send_keys method

I am unable to retrieve any search results in fbref.com when using either of send_keys and execute_script in selenium for python using chrome web driver
This is the code ive used so far:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import csv
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.action_chains import ActionChains
s=Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=s)
driver.get("https://fbref.com/en/")
element = driver.find_element(by=By.CLASS_NAME, value="ac-hint")
action = ActionChains(driver)
element1= driver.find_element(by=By.CSS_SELECTOR, value=("input[type='search']"))
action.click(on_element=element1)
action.perform()
#element.send_keys("lionel messi")
#driver.execute_script("arguments[0].value='lionel messi'",element)
element2=driver.find_element(by=By.CSS_SELECTOR, value=("input[type='submit']"))
action.click(on_element=element2)
action.perform()```
The code is able to interact with the search button and the text is typed and the search button is clicked without any trouble but the search result is as follows:
which basically means that the search was invalid ,ive tried to search manually in the browser window opened by the driver and that gives me a successful result
You are doing your player name input in the wrong field, if you look closely at the html, there are 2 input fields for the search.
instead of the "ac-hint", use "ac-input":
element = driver.find_element(by=By.CLASS_NAME, value="ac-input")
The locator strategy you have used to identify the search field
doesn't identifies the desired element uniquely within the HTML DOM
Solution
To send a character sequence to the element you need to induce WebDriverWait for the element_to_be_clickable() and you can use either of the following solution:
Code Block:
driver.get("https://fbref.com/en/")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[type='search'][placeholder='Enter Person, Team, Section, etc']"))).send_keys("lionel messi" + Keys.RETURN)
Note: You have to add the following imports :
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
Browser Snapshot:

Cannot select the dropdown with selenium in Python

I am trying to webscrap some data from a website and for that I have to go through the age verification using selenium. I was wondering if there is way to change the store location in the popup. Below is my code:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
import pandas as pd
from bs4 import BeautifulSoup
import requests as r
import time
from selenium.webdriver.support.ui import Select
PATH="chromedriver.exe"
driver=webdriver.Chrome(PATH)
url1="https://cannacabana.com/collections/all?page=1"
driver.get(url1)
Select(WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "select#store-select")))).select_by_visible_text('ajax')
if someone can help I would really appreciate. Thanks
Yes, you can select the store location.
It is a Select element there.
There is a special way to use such element with selenium.
You can select an option from the list of available options according to displayed text, index or value as described in the documentation
So your code could be something like this:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
import pandas as pd
from bs4 import BeautifulSoup
import requests as r
import time
from selenium.webdriver.support.ui import Select
PATH="chromedriver.exe"
driver=webdriver.Chrome(PATH)
url1="https://cannacabana.com/collections/all?page=1"
driver.get(url1)
select_element = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.CSS_SELECTOR, "select#store-select")))
select = Select(select_element)
select.select_by_value('rideau')

How to select option from a dropdown when there is optgroup in Python?

I was trying to select an option using Selenium in python.
Below is my code:from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
import pandas as pd
from bs4 import BeautifulSoup
import requests as r
import time
from selenium.webdriver.support.ui import Select
PATH="chromedriver.exe"
driver=webdriver.Chrome(PATH)
url1="https://cannacabana.com/collections/all?page=1"
driver.get(url1)
Select(WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "select#store-select")))).select_by_visible_text('bayview')
I am getting timeout error, could it be because the website has Optgroup? I am not able to find a way through it.
No, it uses ".//" so Optgroup doesn't matter. You can see the implementation here.
I believe value="bayview" is not visible_text so you should use select_by_value() instead.

How to access the 2nd checkbox that have the same class name and tag?

I am trying to access a 2nd or 3rd Checkbox Element that have the same class name and tag..Each of the checkbox is enclosed in a separate div and the div also has the same class name.
I previously searched on stackoverflow and Google and tried some of the following but none of them is working
driver.find_element_by_xpath("(//div)[#class='classname value'])[2]")
driver.find_element_by_xpath("(//div[#class='classname value']) [position()=2]")
Also
driver.find_element_by_xpath("(//span)[#class='classname value'])[2]")
driver.find_element_by_xpath("(//span[#class='classname value']) [position()=2]")
This is somehow the elements look like by inspection
<div class='a checkbox'>
<label><input type='checkbox'><span class='b'>Paid</span></label>
</div>
<div class='a checkbox'>
<label><input type='checkbox'><span class='b'>Free</span></label>
</div>
I basically want to access the checkbox with Free Text..My Overall Code is this
from selenium.webdriver.common.by import By
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
driver=webdriver.Firefox()
driver.get("http://udemy.com/courses/search/?src=ukw&q=python&p=1")
WebDriverWait(driver,20).until(EC.element_to_be_clickable((By.XPATH,"//span[text()='All Filters']"))).click()
time.sleep(10)
WebDriverWait(driver,20).until(EC.element_to_be_clickable((By.XPATH,"//div[#class='modal-body']//label[.//span[#data-purpose='filter-option-title' and text()='Free']]/input[#type='checkbox']"))).click()
You may use contain If the DOM only contain only one Free text as shown below example.
driver.find_elements_by_xpath("//*[contains(text(), 'Free')]")
If the DOM contain multiple Free text, then you need to pass it to List and based on the text position you need to iterate the List.
Amend your code to be looking like this:
from selenium.webdriver.common.by import By
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
driver=webdriver.Firefox()
driver.get("http://udemy.com/courses/search/?src=ukw&q=python&p=1")
WebDriverWait(driver,20).until(EC.element_to_be_clickable((By.XPATH,"//button[.//span[text()='All Filters']]"))).click()
WebDriverWait(driver,20).until(EC.element_to_be_clickable((By.XPATH,"//label[#title and .//span/span[text()='Free']]"))).click()
As it is a <input> element presumably moving ahead you need to invoke click() on the element with respect to the text Free, you you have to induce WebDriverWait for the element_to_be_clickable() and you can use either of the following Locator Strategies:
Using XPATH 1:
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//div[#class='a checkbox']//label//span[#class='b' and text()='Free']//preceding::input[1]"))).click()
Using XPATH 2:
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//span[#class='b' and text()='Free']//preceding::input[1]"))).click()
Note : You have to add the following imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
Try the following xpath to click on Free checkbox on udemy account.
//div[#class='modal-body']//label[.//span[#data-purpose='filter-option-title' and text()='Free']]/input[#type='checkbox']
Induce WebDriverWait and element_to_be_clickable() And Following XPATH locator.
WebDriverWait(driver,20).until(EC.element_to_be_clickable((By.XPATH,"//div[#class='modal-body']//label[.//span[#data-purpose='filter-option-title' and text()='Free']]/input[#type='checkbox']"))).click()
Here is the code.
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
driver=webdriver.Chrome("path of the chrome driver")
driver.get("http://udemy.com/courses/search/?src=ukw&q=python&p=1")
WebDriverWait(driver,20).until(EC.element_to_be_clickable((By.XPATH,"//span[text()='All Filters']"))).click()
WebDriverWait(driver,20).until(EC.element_to_be_clickable((By.XPATH,"//div[#class='modal-body']//label[.//span[#data-purpose='filter-option-title' and text()='Free']]/input[#type='checkbox']"))).click()

Categories

Resources