I am writing a scraping code for the website Upwork, and need to click through each page for job listings. Here is my python code, which I used selenium to web crawl.
from bs4 import BeautifulSoup
import requests
from os.path import basename
from selenium import webdriver
import time
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
driver = webdriver.Chrome("./chromedriver")
driver.get("https://www.upwork.com/o/jobs/browse/c/design-creative/")
link = driver.find_element_by_link_text("Next")
while EC.elementToBeClickable(By.linkText("Next")):
wait.until(EC.element_to_be_clickable((By.linkText, "Next")))
link.click()
There are couple of problems:
EC has no attribute elementToBeClickable. In Python you should use element_to_be_clickable
Your link defined on the first page only, so using it on the second page should give you StaleElementReferenceException
There is no wait variable defined in your code. I guess you mean something like
wait = WebDriverWait(driver, 10)
By has no attribute linkText. Try LINK_TEXT instead
Try to use below code to get required behavior
from selenium.common.exceptions import TimeoutException
while True:
try:
wait(driver, 10).until(EC.element_to_be_clickable((By.LINK_TEXT, Next"))).click()
except TimeoutException:
break
This should allow you to click Next button while it's available
Related
I have a sample code here which keeps saying that it cant find the element im looking for. Please help me, I want to find the element by the name and not the absolute Xpath
from selenium import webdriver
from time import sleep
browser = webdriver.Chrome()
browser.get('https://www.instagram.com')
sleep(5)
x = browser.find_element_by_xpath("//span[text() = 'Sign Up']").click()
When you get the instagram page you need to induce waits for the page to load and then click the parent a tag of that span.
WebDriverWait(browser, 10).until(EC.element_to_be_clickable((By.XPATH, "//span[text()='Sign up']/parent::a"))).click()
Import
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
I'm trying to click a button with its class but it throws an ElementNotInteractableException.
Here is the website HTML code
Here is the code I'm using
driver = webdriver.Chrome('chromedriver.exe', chrome_options=options)
driver.get('https://physionet.org/lightwave/?db=noneeg/1.0.0')
def get_spo2hr(subject):
driver.find_element_by_xpath("//select[#name='record']/option[text()='"+subject+"']").click()
driver.find_element_by_id('ui-id-3').click()
driver.find_element_by_id('viewann').click()
driver.find_element_by_id('viewsig').click()
driver.find_element_by_id('lwform').click()
driver.find_element_by_css_selector(".fwd").click()
driver.save_screenshot('screenie.png')
get_spo2hr('Subject10_SpO2HR')
One thing is (as said in other answers) the unstable css selector prefer xpath
But the main thing is that the div is overlapping the a item at the dom rendering
Just wait one second to wait until the dom loads:
import time
time.sleep(1)
Example code:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions
driver = webdriver.Chrome()
driver.get('https://physionet.org/lightwave/?db=noneeg/1.0.0')
def get_spo2hr(subject):
driver.find_element_by_xpath("//select[#name='record']/option[text()='"+subject+"']").click()
import time
time.sleep(1)
driver.find_element_by_id('ui-id-3').click()
driver.find_element_by_id('viewann').click()
driver.find_element_by_id('viewsig').click()
driver.find_element_by_id('lwform').click()
driver.find_element_by_xpath('/html/body/div[1]/main/div/div/div/form/div[3]/table/tbody/tr/td[2]/div/button[3]').click()
driver.save_screenshot('screenie.png')
get_spo2hr('Subject10_SpO2HR')
I always prefer getting elements using their xpath, of course, in suitable situations. With that being said, I modified your code to find the forward button using its xpath and it works.
Here is the modified code:
driver = webdriver.Chrome('chromedriver.exe', chrome_options=options)
driver.get('https://physionet.org/lightwave/?db=noneeg/1.0.0')
def get_spo2hr(subject):
driver.find_element_by_xpath("//select[#name='record']/option[text()='" + subject + "']").click()
driver.find_element_by_id('ui-id-3').click()
driver.find_element_by_id('viewann').click()
driver.find_element_by_id('viewsig').click()
driver.find_element_by_id('lwform').click()
driver.find_element_by_xpath('/html/body/div[1]/main/div/div/div/form/div[3]/table/tbody/tr/td[2]/div/button[3]').click()
driver.save_screenshot('screenie.png')
get_spo2hr('Subject10_SpO2HR')
I am trying to scrape a website. Where in I have to press a link. for this purpose, I am using selenium library with chrome drive.
from selenium import webdriver
url = 'https://sjobs.brassring.com/TGnewUI/Search/Home/Home?partnerid=25222&siteid=5011&noback=1&fromSM=true#Applications'
browser = webdriver.Chrome()
browser.get(url)
time.sleep(3)
link = browser.find_element_by_link_text("Don't have an account yet?")
link.click()
But it is not working. Any ideas why it is not working? Is there a workaround?
You can get it done in several ways. Here is one of such. I've used driver.execute_script() command to force the clicking. You should not go for hardcoded delay as they are very inconsistent.
Modified script:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait as wait
from selenium.webdriver.support import expected_conditions as EC
url = 'https://sjobs.brassring.com/TGnewUI/Search/Home/Home?partnerid=25222&siteid=5011&noback=1&fromSM=true#Applications'
driver = webdriver.Chrome()
driver.get(url)
item = wait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "a[ng-click='newAccntScreen()']")))
driver.execute_script("arguments[0].click();",item)
I was trying to scrape instagram to get the recent post dates. I was using selenium to get the work done. But when I use get_element_by_xpath and give the path of date text it says element not found . I have tried using scrolling the page but it didn't work.
from bs4 import BeautifulSoup
import requests
import time
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
browser = webdriver.Chrome()
new='https://www.instagram.com/p/Bf1Xl9Pgvvy/?tagged=meditation'
##finding poster user link and date
browser.get(new)
element = WebDriverWait(browser, 10).until(EC.presence_of_element_located(browser.find_element_by_xpath('/html/body/div[4]/div/div[2]/div/article/div[2]/div[2]/a/time')))
You need to use as simple XPath as possible for your task.
This will work for you:
element = WebDriverWait(browser, 10).until(EC.presence_of_element_located(browser.find_element_by_xpath('//time')))
I am using Python and Selenium to scrape a website. What I do is go to the homepage, type in a keyword, such as 1300746-79-5. On the resulting page, I am trying to scrape the data in the "pricing" section. Specifically, I need to get the "SKU-Pack Size" and "Price(USD)" information. But these information is Javascript encripted, so I cannot see them in the source code. I am wondering how I can achieve this.
I have written some code that gets me to the page of interest, but I still cannot see the javascript information. Here is what I have so far.
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pprint
# Create a new instance of the Firefox driver
driver = webdriver.Chrome('C:\Users\Rei\Desktop\chromedriver.exe')
driver.get("http://www.sigmaaldrich.com/united-states.html")
print driver.title
inputElement = driver.find_element_by_name("Query")
# type in the search
inputElement.send_keys("1300746-79-5")
inputElement.submit()
Everything you have done looks correct to me.
"SKU-Pack Size" and "Price(USD)" information are not "encrypted", but retrieved after JavaScript clicking action. All you need to do is to click product name or pricing link.
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pprint
driver = webdriver.Chrome()
driver.get("http://www.sigmaaldrich.com/united-states.html")
print driver.title
inputElement = driver.find_element_by_name("Query")
# type in the search
inputElement.send_keys("1300746-79-5")
inputElement.submit()
pricing_link = driver.find_element_by_css_selector("li.priceValue a")
print pricing_link.text
pricing_link.click()
# then deal with the data you want
price_table = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, ".priceAvailContainer tbody"))
)
print 'price_table.text: ' + price_table.text
driver.quit()