Trouble parsing some text from a webpage

Trouble parsing some text from a webpage - python

I've written a script in python using selenium to scrape some text out of a webpage. The text I wanna scrape generates upon filling in an inputbox. My script can fill in in the right way and can populate the value. However, it can't parse the text. How can I do it?
This is what I've tried so far:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome()
wait = WebDriverWait(driver, 10)
driver.get("http://dev.delshlearning.com.au/test.php")
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR,"#AM"))).send_keys("`(2(3^5-sqrt(3)))/2`",Keys.RETURN)
item = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR,"#MQ"))).text
print(item)
driver.quit()
The send_keys() parameter is already filled in within the script for your consideration.

It is storing the text values in the attribute value. This should work:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome()
wait = WebDriverWait(driver, 10)
driver.get("http://dev.delshlearning.com.au/test.php")
# I changed your locater to ID since it's a little more clear
wait.until(EC.visibility_of_element_located((By.ID,"AM"))).send_keys("`(2(3^5-sqrt(3)))/2`",Keys.RETURN)
item = wait.until(EC.visibility_of_element_located((By.ID,"MQ"))).get_attribute('value')
print(item)
driver.quit()
I found it by going to the element's properties as shown highlighted here:

Related

How to click multiple 'Add to cart' button from product list using Selenium and Python?

I am able to add single and all available items, but not sure how to add multiple items
enter image description here
enter image description here

Your question isn't clear, please the next time provide more details and please just don't share the code with a screenshot.....
Anyway,
To click all the buttons:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome()
driver.get("https://rahulshettyacademy.com/seleniumPractise/#/")
WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.CSS_SELECTOR, "[class='product-action']")))
for i in driver.find_elements(by=By.CSS_SELECTOR, value="[class='product-action']"):
i.click()
To click just the first two buttons (again, not sure what you really want):
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome()
driver.get("https://rahulshettyacademy.com/seleniumPractise/#/")
WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.CSS_SELECTOR, "[class='product-action']")))
for i in driver.find_elements(by=By.CSS_SELECTOR, value="[class='product-action']")[0:2]:
i.click()

Remove unnecessary element from items using methods items.remove(), items.pop(), del items[].

How to use mulitple try except in selenium python

this code when given a list of cities goes and searches on google and extract data then covert it into a dataframe
In some cases have to use different xpaths to extract the data. there are three xpaths in total.
Trying to do this :
if
1 doesnt work go to 2
2 doesnt work go to 3
3 doesnt work.
use driver.quit ()
tried this code used NoSuchElementException
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions
import pandas as pd
from selenium.common.exceptions import NoSuchElementException
df_output = pd.DataFrame(columns=["City", "pincode"])
url = "https://www.google.com/"
chromedriver = ('/home/me/chromedriver/chromedriver.exe')
driver = webdriver.Chrome(chromedriver)
driver.implicitly_wait(30)
driver.get(url)
search = driver.find_element_by_name('q')
mlist1=['polasa']
for i in mlist1:
try:
search.send_keys(i,' pincode')
search.send_keys(Keys.RETURN)
WebDriverWait(driver, 10).until(expected_conditions.visibility_of_element_located((By.XPATH, '//div[#class="IAznY"]//div[#class="title"]')))
elmts = driver.find_elements_by_xpath('//div[#class="IAznY"]//div[#class="title"]')
df_output = df_output.append(pd.DataFrame(columns=["City", "pincode"], data=[[i,elmts[0].text]]))
driver.quit()
except NoSuchElementException:
try:
elements=driver.find_element_by_xpath("//div[#class='Z0LcW']")
df_output = df_output.append(pd.DataFrame(columns=["City", "pincode"], data=[[i,elements.text]]))
driver.quit()
except NoSuchElementException:
try:
elements=driver.find_element_by_xpath("//div[#class='Z0LcW AZCkJd']")
df_output = df_output.append(pd.DataFrame(columns=["City", "pincode"], data=[[i,elements.text]]))
driver.quit()
except:
driver.quit()
this code works used one of the 3 tags here
need to combine 3 tags in a single code.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import re
import pandas as pd
import os
import html5lib
import json
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions
import pandas as pd
url = "https://www.google.com/"
chromedriver = ('/home/me/chromedriver/chromedriver.exe')
driver = webdriver.Chrome(chromedriver)
driver.implicitly_wait(30)
driver.get(url)
search = driver.find_element_by_name('q')
search.send_keys('polasa',' pincode')
search.send_keys(Keys.RETURN)
elements=driver.find_element_by_xpath("//div[#class='Z0LcW']")
elements.text
``

You don't really need 3 try-catchs. You can do this without throwing exceptions by locating elements (plural) given a locator and then check the length of the collection returned. If length = 0, no elements were found.
The locators you are using don't require XPath so you can instead use a CSS selector and combine all three with an OR and avoid the three checks. (Note: you can do the same thing with XPath but the results are messier and harder to read)
Here are your 3 locators combined into one using OR (the comma) in CSS selector syntax
div.IAznY div.title, div.Z0LcW, div.Z0LcW.AZCkJd
...and the updated code using the combined locator and without the nested try-catch.
...
locator = (By.CSS_SELECTOR, 'div.IAznY div.title, div.Z0LcW, div.Z0LcW.AZCkJd')
for i in mlist1:
search.send_keys(i,' pincode')
search.send_keys(Keys.RETURN)
WebDriverWait(driver, 10).until(expected_conditions.visibility_of_element_located(*locator)
elements = driver.find_elements_by_css_selector(*locator)
df_output = df_output.append(pd.DataFrame(columns=["City", "pincode"], data=[[i,elements[0].text]]))
driver.quit()
NOTE: I used your original locators and wasn't returning any results with any of the three. Are you sure they are correct?
Also note... I pulled the driver.quit() out of the loop. I'm not sure if you intended it to be inside or not but from the code provided, if the try succeeds in the first iteration, the browser will quit. You only have one item so you probably didn't notice this yet but would have been confused when you added another item to the iteration.

How to click on onclick link with image? Python Selenium

I'm just learning how to webscrape dynamically using Selenium in Python. I'm currently trying to click on a link within the webpage to page forward over search results.
So far this is the code that I'm using:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
driver = webdriver.Chrome('C:\\Users\\km13\\chromedriver.exe')
driver.get("http://www.congreso.gob.pe/pley-2016-2021")
elem = driver.find_element_by_css_selector("img[src='/Sicr/TraDocEstProc/CLProLey2016.nsf/8eac1ef603908b5105256cdf006c41b1/$Body/0.AB2?OpenElement&FieldElemFormat=gif']")
elem.click()
This is the HTML that corresponds with the element I'd like to click on:
`<img src="/Sicr/TraDocEstProc/CLProLey2016.nsf/8eac1ef603908b5105256cdf006c41b1/$Body/0.AB2?OpenElement&FieldElemFormat=gif" width="81" height="16" border="0">`
From my somewhat limited knowledge of HTML this seems like the link is actually embedded in the gif which is why I tried to use the CSS selector that goes along with that image. But this did not work.
Any guidance would be greatly appreciated!
Update:
I changed my code by adding the following import
from selenium.webdriver.common.by import By
And I changed the following:
elem = driver.find_element(By.CSS_SELECTOR, "img[src='/Sicr/TraDocEstProc/CLProLey2016.nsf/8eac1ef603908b5105256cdf006c41b1/$Body/0.AB2?OpenElement&FieldElemFormat=gif']")
elem.click()
Now I get an error for "no such element."

There is an iframe.You need to switch to iframe first to access the element.Try below code.use WebDriverWait to handle dynamic element.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome('C:\\Users\\km13\\chromedriver.exe')
driver.get("http://www.congreso.gob.pe/pley-2016-2021")
WebDriverWait(driver, 20).until(EC.frame_to_be_available_and_switch_to_it((By.NAME, 'ventana02')))
elem = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(
(By.XPATH, "//a[contains(#onclick,'A50')]/img[contains(#src,'Sicr/TraDocEstProc/CLProLey')]")))
elem.click()
EDITED
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome('C:\\Users\\km13\\chromedriver.exe')
driver.get("http://www.congreso.gob.pe/pley-2016-2021")
driver.switch_to.frame(0)
elem=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//a[contains(#onclick,'A50')]/img[contains(#src,'Sicr/TraDocEstProc/CLProLey')]")))
elem.click()

No such Element Exception using selenium in python

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
chrome_path=r"C:\Users\Priyanshu\Downloads\Compressed\chromedriver_win32\chromedriver.exe"
driver=webdriver.Chrome(chrome_path)
driver.get("https://www.flipkart.com/?")
search = driver.find_element_by_name('q')
search.send_keys("laptop")
search.send_keys(Keys.RETURN)
driver.find_element_by_xpath(""" //*[#id="container"]/div/div[2]/div[2]/div/div[2]/div[2]/div/section/ul/li[2]""").click()
I am getting no such element present in the last line of code.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
chrome_path=r"C:\Users\Priyanshu\Downloads\Compressed\chromedriver_win32\chromedriver.exe"
driver=webdriver.Chrome(chrome_path)
driver.get("https://www.flipkart.com/search?q=laptop&otracker=start&as-show=off&as=off")
driver.find_element_by_xpath(""" //*[#id="container"]/div/div[2]/div[2]/div/div[2]/div[2]/div/section/ul/li[2]""").click()
If I am doing like this its working fine.

The element is not immediately available, wait for it to be present first:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
wait = WebDriverWait(driver, 10)
search = wait.until(EC.presence_of_element_located((By.NAME, 'q')))
search.send_keys("laptop")
search.send_keys(Keys.RETURN)
element = wait.until(EC.presence_of_element_located(By.XPATH, '//*[#id="container"]/div/div[2]/div[2]/div/div[2]/div[2]/div/section/ul/li[2]'))
element.click()
Note that, assuming you want to get to the "Popularity" menu header, why don't simplify the XPath expression and use the element's text:
//li[. = "Popularity"]

python dynamic webscraping javascript content

I am using Python and Selenium to scrape a website. What I do is go to the homepage, type in a keyword, such as 1300746-79-5. On the resulting page, I am trying to scrape the data in the "pricing" section. Specifically, I need to get the "SKU-Pack Size" and "Price(USD)" information. But these information is Javascript encripted, so I cannot see them in the source code. I am wondering how I can achieve this.
I have written some code that gets me to the page of interest, but I still cannot see the javascript information. Here is what I have so far.
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pprint
# Create a new instance of the Firefox driver
driver = webdriver.Chrome('C:\Users\Rei\Desktop\chromedriver.exe')
driver.get("http://www.sigmaaldrich.com/united-states.html")
print driver.title
inputElement = driver.find_element_by_name("Query")
# type in the search
inputElement.send_keys("1300746-79-5")
inputElement.submit()

Everything you have done looks correct to me.
"SKU-Pack Size" and "Price(USD)" information are not "encrypted", but retrieved after JavaScript clicking action. All you need to do is to click product name or pricing link.
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pprint
driver = webdriver.Chrome()
driver.get("http://www.sigmaaldrich.com/united-states.html")
print driver.title
inputElement = driver.find_element_by_name("Query")
# type in the search
inputElement.send_keys("1300746-79-5")
inputElement.submit()
pricing_link = driver.find_element_by_css_selector("li.priceValue a")
print pricing_link.text
pricing_link.click()
# then deal with the data you want
price_table = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, ".priceAvailContainer tbody"))
)
print 'price_table.text: ' + price_table.text
driver.quit()

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Trouble parsing some text from a webpage - python

Related

How to click multiple 'Add to cart' button from product list using Selenium and Python?

How to use mulitple try except in selenium python

How to click on onclick link with image? Python Selenium

No such Element Exception using selenium in python

python dynamic webscraping javascript content

Categories

Resources