I'm trying to pull some Diablo II trading prices of a trading page using Selenium.
So far I've managed to locate the object I'm interested in using classes, but I can't retrieve the actual text which is what I need.
I have the following code:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.get('https://traderie.com/diablo2resurrected/product/3382986705/buying?prop_Platform=PC&prop_Mode=softcore&makeOffer=false')
# Close cookie popup
driver.find_element(By.XPATH, '//*[#id="tyche_cmp_modal"]/div/div/div/div[5]/div[2]/a').click()
# Locate entire page of offers
All = driver.find_element(By.CLASS_NAME, "row")
# Locate individual offer
Offer = All.find_element(By.CLASS_NAME, "col-xs-12")
# Locate price in each offer
Price = Offer.find_element(By.CLASS_NAME, "sc-eCImPb")
# Print price
print(str(Price.text))
# Close page
driver.close()
The print turns out empty, but it should return something like 3x Vex, or similar. What am I doing wrong?
There are several issues here:
You should add waits. Preferably Expected Conditions explicit waits.
You are using a wrong locator for price element
Since there are multiple offers there you should iterate over the results in a loop.
Variable names should be lowercased according to accepted convention.
I think your code should be something like this:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
wait = WebDriverWait(driver, 20)
driver.get('https://traderie.com/diablo2resurrected/product/3382986705/buying?prop_Platform=PC&prop_Mode=softcore&makeOffer=false')
# Close cookie popup
wait.until(EC.visibility_of_element_located((By.XPATH, '//*[#id="tyche_cmp_modal"]/div/div/div/div[5]/div[2]/a'))).click()
wait.until(EC.visibility_of_element_located((By.XPATH, "//a[contains(#style,'capitalize')]")))
time.sleep(1)
prices = driver.find_elements(By.XPATH, "//a[contains(#style,'capitalize')]")
for price in prices:
print(price.text)
# Close page
driver.close()
UPD
To iterate over offers and get each offer seller's name and the price you can do something like this:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
wait = WebDriverWait(driver, 20)
driver.get('https://traderie.com/diablo2resurrected/product/3382986705/buying?prop_Platform=PC&prop_Mode=softcore&makeOffer=false')
# Close cookie popup
wait.until(EC.visibility_of_element_located((By.XPATH, '//*[#id="tyche_cmp_modal"]/div/div/div/div[5]/div[2]/a'))).click()
wait.until(EC.visibility_of_element_located((By.XPATH, "//a[contains(#style,'capitalize')]")))
time.sleep(1)
offers = driver.find_elements(By.CLASS_NAME, "col-xs-12")
for offer in offers:
name = offer.find_element(By.XPATH, ".//a[contains(#href,'profile')]")
prices = offer.find_elements(By.XPATH, ".//a[contains(#style,'capitalize')]")
#now you can extract texts from name with name.text
#and iterate over prices with
for price in prices:
price_text = price.text
#You can put all these in dictionary etc.
# Close page
driver.close()
Related
I am trying to fetch data from nj.58.com using selenium. I can access the homepapage and some internal links. While navigating through the links, I noticed that the website sees me as a web crawler when I visit a specific url; even if I interact with the links as a human.
I have built my selenium script to a point but I'm stock because the sites throws antibot response back at me.
Here is what I've done:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import Select
import undetected_chromedriver as uc
import time
import pandas as pd
driver = uc.Chrome()
website = 'https://nj.58.com/'
driver.get(website)
driver.implicitly_wait(4)
wait = WebDriverWait(driver, 10)
driver.maximize_window()
switch_city = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#commonTopbar_ipconfig > a"))).click()
city_location = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#selector-search-input')))
city_location.clear()
city_location.send_keys('南京' + Keys.RETURN)
keyword = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#keyword')))
keyword.clear()
keyword.send_keys('"废纸回收"')
time.sleep(2)
search_btn = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#searchbtn')))
search_btn.click()
When I click on search_btn, I'm expecting to see a list of items that I'm interested in. But instead it sees me as a web crawler at this variable position (search_btn) even before using the selenium.
How can I bypass this antibot/antihuman detection at point when I click on search_btn?
new to python and have started with small web scraping projects. And I have now tries to scrape this url.
I want to collect the information in the blue and white boxes. More specifically, the price (536,25) and the name of the provider (Cheap Energy AB), and I would like to collect the top 3, like in the picture below.
The problem is that the output I get is only for the top alternative:
536,25 öre/kWh, Cheap Energy AB
The output I would like is :
536,25 öre/kWh, Cheap Energy AB
544,45 öre/kWh, Vattenfall AB
544,45 öre/kWh, Vattenfall AB
My code is the following:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
s = Service("/Users/brustabl1/lpthw/chromedriver")
url = "https://www.elpriskollen.se/sv/Avtalssida/?ellevid=236&postnummer=18164&forbrukning=20000&avtalId=31792&prevContractTypeId=20"
driver = webdriver.Chrome(service=s)
driver.maximize_window()
driver.implicitly_wait(10)
driver.get(url)
# Presses the buttton asking if you are an individual or company
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, '//*[#id="cookie-customer-type"]/div/div/div/div[3]/nav/ul/li[1]'))).click()
lists = driver.find_elements(By.XPATH, '//*[#id="main"]/div[3]/div[3]/div[2]/ul/li[1]/div[2]/div')
for list in lists:
price = list.find_element(By.XPATH,'//*[#id="main"]/div[3]/div[3]/div[2]/ul/li[1]/div[2]/div/div[1]/div/div/div')
name = list.find_element(By.XPATH, '//*[#id="main"]/div[3]/div[3]/div[2]/ul/li[1]/div[2]/div/div[2]/div/div/div/p[1]')
print(price.text, name.text)
I have tried some things, the first is to put a dot in front of the // for the find_element.( './/' ) but the script doesn't like that. the output I get is :
Message: no such element: Unable to locate element: {"method":"xpath","selector":".//*[#id="main"]/div[3]/div[3]/div[2]/ul/li[1]/div[2]/div/div[1]/div/div/div"}
And right now, I'm kind of stuck.
There are several problems with your code.
First of all this XPath //*[#id="main"]/div[3]/div[3]/div[2]/ul/li[1]/div[2]/div is matching 1 element only so your lists is actually getting 1 element.
You have to improve locators.
And add the delay. Only when elements you want to put in lists are visible you shout collect them.
This code works:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(service=webdriver_service, options=options)
url = 'https://www.elpriskollen.se/sv/Avtal/?avtalstypid=20&forbrukning=20000&postnr=18164'
driver.get(url)
wait = WebDriverWait(driver, 20)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '.modal-content li.customertype.privat'))).click()
panels = wait.until(EC.visibility_of_all_elements_located((By.CLASS_NAME, 'panel-body')))
for panel in panels:
price = panel.find_element(By.XPATH,".//div[contains(#class,'epk-list-price')]//div[#class='epk-list-cell']")
name = panel.find_element(By.XPATH, ".//p[#class='epk-avtalsinfo'][1]")
print(price.text, name.text)
And this is the output:
536,25 öre/kWh Cheap Energy AB
544,45 öre/kWh Vattenfall AB
544,45 öre/kWh Vattenfall AB
If you visit this site,
https://www.premierleague.com/results
You will be able to see several match results. If you click on each match, you will be directed to another website.
My question is how can I get the href (link) of each match.
links = driver.find_elements(By.XPATH, '//*[#id="mainContent"]/div[3]/div[1]')
for link in links:
x = link.get_attribute("href")
List.append(x)
This is what I have so far and it is not working.
I see elements like
<div data-href="//www.premierleague.com/match/66686" ...>
and you could search
//div[#data-href]
and later use get_attribute("data-href")
Full working code
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
#from webdriver_manager.chrome import ChromeDriverManager
from webdriver_manager.firefox import GeckoDriverManager
#import time
url = 'https://www.premierleague.com/results'
#driver = webdriver.Chrome(executable_path=ChromeDriverManager().install())
driver = webdriver.Firefox(executable_path=GeckoDriverManager().install())
driver.get(url)
wait = WebDriverWait(driver, 10)
#time.sleep(5)
# close popup window with "Accept All Cookies"
button = wait.until(EC.visibility_of_element_located((By.XPATH, '//button[text()="Accept All Cookies"]')))
button.click()
all_items = driver.find_elements(By.XPATH, '//div[#data-href]')
print('len(all_items):', len(all_items))
for item in all_items:
print(item.get_attribute('data-href'))
Result:
len(all_items): 40
//www.premierleague.com/match/66686
//www.premierleague.com/match/66682
//www.premierleague.com/match/66687
//www.premierleague.com/match/66689
//www.premierleague.com/match/66691
//www.premierleague.com/match/66684
//www.premierleague.com/match/66705
//www.premierleague.com/match/66677
//www.premierleague.com/match/66674
//www.premierleague.com/match/66675
//www.premierleague.com/match/66676
//www.premierleague.com/match/66679
//www.premierleague.com/match/66672
//www.premierleague.com/match/66678
//www.premierleague.com/match/66680
//www.premierleague.com/match/66681
//www.premierleague.com/match/66673
//www.premierleague.com/match/66633
//www.premierleague.com/match/66584
//www.premierleague.com/match/66513
//www.premierleague.com/match/66637
//www.premierleague.com/match/66636
//www.premierleague.com/match/66635
//www.premierleague.com/match/66666
//www.premierleague.com/match/66670
//www.premierleague.com/match/66668
//www.premierleague.com/match/66665
//www.premierleague.com/match/66667
//www.premierleague.com/match/66669
//www.premierleague.com/match/66654
//www.premierleague.com/match/66656
//www.premierleague.com/match/66659
//www.premierleague.com/match/66657
//www.premierleague.com/match/66655
//www.premierleague.com/match/66652
//www.premierleague.com/match/66660
//www.premierleague.com/match/66661
//www.premierleague.com/match/66653
//www.premierleague.com/match/66658
//www.premierleague.com/match/66524
I think this is more of my inability to read html of a specific website
I am trying to operate some things on this website:
https://fred.stlouisfed.org/series/DGS10
I am having 2 issues with it.
I tried to input the range of the date OR I also tried to put the range of the date at max by doing the either of the following(As long as I get 30 years of data or more I am ok). Below is my attempt to input a specific date and simulate pressing an enter-key on the keyboard.
range_search_bar = driver.find_element(By.XPATH, "//*[#id=\"input-cosd\"]")
range_search_bar.clear()
range_search_bar.send_keys("1980-10-10")
range_search_bar.send_keys(u'\ue007')
and below is an attempt to click the "max" date range button instead of inputting a date - I tried to do this first since this is simpler than the first code.
max_range_button = driver.find_element(By.XPATH, "//*[#id=\"zoom-all\"]")
max_range_button.click()
Unfortunately neither of them seem to change the range of the date...
I am suspecting that they are in a different iframe? But I could not find the iframe change...
I also tried to click the "download" button and then click the type "CSV(data)" button(which initiates the download) by doing the below code.
download_10_button = driver.find_element(By.XPATH, "//*[#id=\"download-button\"]/span")
download_10_button.click()
download_csv_button = driver.find_element(By.XPATH,("//*[#id=\"download-data-csv\"]"))
download_csv_button.click()
But I am getting this error : ElementNotInteractableException: element not interactable
Any ideas on what I might be doing wrong?
Thanks!
I can download file if I use time.sleep(...) because JavaScript needs time to load data and to open menu when it clicks Download
from selenium import webdriver
#from webdriver_manager.chrome import ChromeDriverManager
from webdriver_manager.firefox import GeckoDriverManager
import time
url = 'https://fred.stlouisfed.org/series/DGS10'
#driver = webdriver.Chrome(executable_path=ChromeDriverManager().install())
driver = webdriver.Firefox(executable_path=GeckoDriverManager().install())
driver.get(url)
time.sleep(5)
max_range_button = driver.find_element(By.XPATH, '//*[#id="zoom-all"]')
max_range_button.click()
time.sleep(5)
download_10_button = driver.find_element(By.XPATH, '//*[#id="download-button"]/span')
download_10_button.click()
time.sleep(2)
download_csv_button = driver.find_element(By.XPATH, '//*[#id="download-data-csv"]')
download_csv_button.click()
Or you may use waits for this.
But I still needed sleep to wait for loading all data.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
#from webdriver_manager.chrome import ChromeDriverManager
from webdriver_manager.firefox import GeckoDriverManager
import time
url = 'https://fred.stlouisfed.org/series/DGS10'
#driver = webdriver.Chrome(executable_path=ChromeDriverManager().install())
driver = webdriver.Firefox(executable_path=GeckoDriverManager().install())
driver.get(url)
time.sleep(5)
max_range_button = driver.find_element(By.XPATH, '//*[#id="zoom-all"]')
max_range_button.click()
time.sleep(5)
download_10_button = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH, '//*[#id="download-button"]/span')))
download_10_button.click()
download_csv_button = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH, '//*[#id="download-data-csv"]')))
download_csv_button.click()
And the same with putting date
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
#from selenium.common.exceptions import NoSuchElementException, TimeoutException
#from webdriver_manager.chrome import ChromeDriverManager
from webdriver_manager.firefox import GeckoDriverManager
import time
url = 'https://fred.stlouisfed.org/series/DGS10'
#driver = webdriver.Chrome(executable_path=ChromeDriverManager().install())
driver = webdriver.Firefox(executable_path=GeckoDriverManager().install())
driver.get(url)
time.sleep(5)
#max_range_button = driver.find_element(By.XPATH, '//*[#id="zoom-all"]')
#max_range_button.click()
range_search_bar = driver.find_element(By.XPATH, '//*[#id="input-cosd"]')
range_search_bar.clear()
range_search_bar.send_keys("1980-10-10")
range_search_bar.send_keys(Keys.ENTER) # u'\ue007')
time.sleep(5)
download_10_button = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH, '//*[#id="download-button"]/span')))
download_10_button.click()
download_csv_button = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH, '//*[#id="download-data-csv"]')))
download_csv_button.click()
My code executes and gets into the page I want to scrape. Once I am there, im having a hard time printing any elements, in this case just the Names.
The page log in through the code so you can replace the "ExampleUsername" with any email / fake account if you are skeptical.
Here is the code:
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
productlinks=[]
test1=[]
options = Options()
driver = webdriver.Chrome(ChromeDriverManager().install())
url = "https://www.linkedin.com/uas/login?session_redirect=https%3A%2F%2Fwww%2Elinkedin%2Ecom%2Fsearch%2Fresults%2Fpeople%2F%3FcurrentCompany%3D%255B%25221252860%2522%255D%26geoUrn%3D%255B%2522103644278%2522%255D%26keywords%3Dsales%26origin%3DFACETED_SEARCH%26page%3D2&fromSignIn=true&trk=cold_join_sign_in"
driver.get(url)
time.sleep(2)
username = driver.find_element_by_id('username')
username.send_keys('Example#gmail.com')
password = driver.find_element_by_id('password')
password.send_keys('ExamplePassword')
password.submit()
element1 = driver.find_elements_by_class_name("name actor-name")
title=[t.text for t in element1]
print(title)
find_elements_by_class_name() doesn't accepts multiple class name. Instead you can use css selector.
To avoid synchronization issue Induce WebDriverWait() and wait for visibility_of_all_elements_located() and following css selector.
element1 =WebDriverWait(driver,10).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR,".name.actor-name")))
title=[t.text for t in element1]
print(title)
you need to import below libraries.
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait