hi tried to get a text from the page but unable to get with selenium here is my code and i want copy onlty text from web page my other xath element working just this getting error please help me to solve this one here is screen shot https://prnt.sc/qymf1s
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from urllib.parse import urlparse
import urllib.request
import mysql.connector
import sys, os
import requests
css_album = '/html/body/div[3]/div[2]/div[1]/div/article/div/div/div[2]/p/text()[1]'
driver = 0
def openbrowser():
global driver
xoptions = Options()
#xoptions.add_argument("--headless") # Runs Chrome in headless mode.
xoptions.add_argument('--no-sandbox') # Bypass OS security model
xoptions.add_argument('--disable-gpu') # applicable to windows os only
xoptions.add_experimental_option("excludeSwitches",["ignore-certificate-errors"])
xoptions.add_argument('disable-infobars')
xoptions.add_argument("--disable-extensions")
try:
driver = webdriver.Chrome(options=xoptions)
driver.set_window_size(55, 55)
except:
try:
driver.close()
except:
print('Error in opening chrome')
if len (sys.argv) <= 1:
print('Please provide a URL')
sys.exit()
openbrowser()
userurl = sys.argv[1]
driver.get(userurl)
wait = WebDriverWait(driver, 50000)
album = '';
try:
album = driver.find_element_by_xpath(css_album).text
print('Album:', album);
except:
print('Error in album')
driver.close()
I wasn't able to access the element you are trying to retrieve the text with the xpath you are using. I was able to get it with this xpath:
/html//div[#id='content']/article//div[#class='width:100%;position:relative']/div[2]/p
One issue though is that the 4 lines of text are all within this one element, so if you get the text you will get it all. This is what the html looks like for this:
<p style="">Song – Sheikh<br>
Singers – Karan Aujla<br>
Musicians – Deep Jandu<br>
Lyricists – Karan Aujla
</p>
You can get the text and then take the substring of the song text that you are looking for.
You can try using the following xpath :
driver.findElement(By.xpath("//div[#class='top_ad']//following-sibling::p")).getText()
This will return you:
Song – Sheikh
Singers – Karan Aujla
Musicians – Deep Jandu
Lyricists – Karan Aujla
Hope this helps.
Related
I want to get a href link, it should be something like the link here
A Link in the Chain
instead I get
'https://www.allmusic.com/advanced-search#'
Heres the code. I think the problem is that the page is that I gotta click stuff first to pull it up, idk tho. Thxs for any help
#This gets every album in the page
browser = webdriver.Chrome('c:\\Users\\16308\\Documents\\VSCPython\chromedriver')
def get_page(pge_url):
browser.get(pge_url)
time.sleep(1)
add_stuff(pge_url)
time.sleep(2)
thing1 = browser.find_element_by_class_name('content-container')
time.sleep(4)
thing2 = thing1.find_element_by_class_name('content')
time.sleep(8)
thing3 = thing2.find_element_by_class_name('results')
time.sleep(4)
thing4 = thing3.find_element_by_class_name('desktop-results')
time.sleep(4)
thing5 = browser.find_element_by_class_name('title').find_element_by_css_selector('a').get_attribute('href')
print("5: ",thing5)
return thing5
get_page('https://www.allmusic.com/advanced-search')
#clicks range of years
def add_stuff(current_page):
time.sleep(1)
browser.get(current_page)
time.sleep(2)
selectOne = Select(browser.find_element_by_class_name('start-year'))
time.sleep(4)
selectOne.select_by_visible_text('1920')
time.sleep(8)
selectTwo = Select(browser.find_element_by_class_name('end-year'))
time.sleep(8)
selectTwo.select_by_visible_text('2022')
#IMPORT STATEMENTS
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import time
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
from selenium.webdriver.support import expected_conditions
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import Select
import dns
import pymongo
import sys
from pymongo import MongoClient
I am assuming that you have a div for that class which is called as "title". So in that case, you can easily use xpath to get that href easily.
thing5 = browser.find_element_by_xpath('.//div[#class="title"]/a').get_attribute('href')
You can try Xpath
driver.find_element(By.XPATH,"*The full XPATH here*")
Ive been attempting to use selenium to go through elements on soundclouds website and am having trouble interacting with the input tags. When I try to write in the input tag of the class "headerSearch__input" with the send keys command, I get back the error "Message: element not interactable". May someone please explain to me what im doing wrong?
from tkinter import *
import random
import urllib.request
from bs4 import BeautifulSoup
from selenium import webdriver
import time
import requests
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.options import Options
driver = webdriver.Chrome(executable_path='/Users/quanahbennett/PycharmProjects/SeleniumTest/chromedriver')
url= "https://soundcloud.com/"
driver.get(url)
#time.sleep(30)
wait = WebDriverWait(driver, 30)
#link = driver.find_elements_by_link_text("Sign in")
#link[0].click()
#driver.execute_script("arguments[0].click();", link[0])
#SUCCESFUL LOGIN BUTTON PUSH
#please = driver.find_element_by_css_selector('button.frontHero__loginButton')
#please.click()
attempt = driver.find_element_by_css_selector('input.headerSearch__input')
time.sleep(10)
attempt.send_keys('Hello')
breakpoint()
#driver.quit()
The locator - input.headerSearch__input is highlighting two different elements in the DOM. Its important to find unique locators. Link to refer
And also close the cookie pop-up. And then try to interact with elements.
Try like below and confirm.
driver.get("https://soundcloud.com/")
wait = WebDriverWait(driver,30)
# Click on Accept cookies button
wait.until(EC.element_to_be_clickable((By.ID,"onetrust-accept-btn-handler"))).click()
search_field = wait.until(EC.element_to_be_clickable((By.XPATH,"//div[#id='content']//input")))
search_field.send_keys("Sample text")
As the title said, I'd like to performa a Google Search using Selenium and then open all results of the first page on separate tabs.
Please have a look at the code, I can't get any further (it's just my 3rd day learning Python)
Thank you for your help !!
Code:
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
import pyautogui
query = 'New Search Query'
browser = webdriver.Chrome('/Users/MYUSERNAME/Desktop/Desktop-Files/Chromedriver/chromedriver')
browser.get('http://www.google.com')
search = browser.find_element_by_name('q')
search.send_keys(query)
search.send_keys(Keys.RETURN)
element = browser.find_element_by_class_name('LC20lb')
element.click()
The reason why I imported pyautogui is because I tried simulating a right click and then open in new tab for each result but it was a little confusing :)
Forget about pyautogui as what you want to do can be done in Selenium. Same with most of the rest. You just do not need it. See if this code meets your needs.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
query = 'sins of a solar empire' #my query about a video game
browser = webdriver.Chrome()
browser.get('http://www.google.com')
search = browser.find_element_by_name('q')
search.send_keys(query)
search.send_keys(Keys.RETURN)
links = browser.find_elements_by_class_name('r') #I went on Google Search and found the container class for the link
for link in links:
url = link.find_element_by_tag_name('a').get_attribute("href") #this code extracts the url of the HTML link
browser.execute_script('''window.open("{}","_blank");'''.format(url)) # this code uses Javascript to open a new tab and open the given url in that new tab
print(link.find_element_by_tag_name('a').get_attribute("href"))
I try to download CSV data from GoogleTrend by selenium(python).
In previous, I tried to print source page and extract data that I want later.
It worked for some period, but now it does not work.
I try to click download button to got CSV file but nothing happen.
Do you have any idea for this case?
I got button path from firebug+firepath (firefox plugin).
html/body/div[2]/div[2]/div/md-content/div/div/div[1]/trends-widget/ng-include/widget/div/div/div/widget-actions/div/button[1]
I try on chrome driver and firefox drive.
This code; put 1 (word)argument that you want to get trend of search.
import sys
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
def run_text_extract(search_word):
try:
print(search_word)
driver = webdriver.Firefox('/home/noah/Desktop/Google_Trend_downloader/chromedriver/geckodriver')
# driver = webdriver.Chrome('/home/noah/Desktop/Google_Trend_downloader/chromedriver')
driver.get("https://trends.google.com/trends/explore?date=all&geo=TH&q="+search_word)
driver.find_element_by_xpath('html/body/div[2]/div[2]/div/md-content/div/div/div[1]/trends-widget/ng-include/widget/div/div/div/widget-actions/div/button[1]').click()
try:
driver.manage().deleteAllCookies()
clear_cache(driver)
except TimeoutException as ex:
isrunning = 0
print("Exception has been thrown. " + str(ex))
print("Timeout line is", line ,".")
driver.close()
except Exception:
print ("Here 5")
pass
time.sleep(2)
driver.close()
print("======== END_OF_FILE ===============")
except:
pass
if name == 'main':
run_text_extract(sys.argv[1])
time.sleep(8)
# run_text_extract()
I have navigated to the link you have provided.
If you search for any term, you can see download csv button link will appear at the right side. But there will be 3 download csv buttton links with the same class or css selector are present. So you need to collect all the elements and loop through it so that you can click on specific element. In your case, I assume you want to click on first element. so below code should work. If you want 2nd or 3rd element to click change the index accordingly.
def run_text_extract(search_word):
from selenium import webdriver
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
import time
profile = webdriver.FirefoxProfile()
profile.set_preference("browser.download.folderList", 2)
profile.set_preference("browser.download.manager.showWhenStarting", False)
profile.set_preference("browser.download.dir", 'C:\\Python27')
profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/csv")
driver = webdriver.Firefox(firefox_profile=profile,executable_path=r'C:\\Python27\\geckodriver.exe')
driver.get("https://trends.google.com/trends/explore?date=all&geo=TH&q="+ search_word)
time.sleep(7)
lst = driver.find_elements_by_css_selector(".widget-actions-item.export")
lst[0].click()
run_text_extract("selenium")
Hello all my task is to scrap source urls from offer link like this one.
But when I try to get the elements like this (Note that I make 2 requests of the url to get the cookies, because the first time is redirecting me to the main page):
driver = webdriver.Firefox(executable_path="C:\\selenium-drivers\\geckodriver.exe")
driver.get("http://www.kmart.com/joe-boxer-men-s-pajama-shirt-pants-plaid/p-046VA92629712P")
driver.get("http://www.kmart.com/joe-boxer-men-s-pajama-shirt-pants-plaid/p-046VA92629712P")
img_element = driver.find_elements_by_class_name("main-image")
No elements are found and when i try to search them in the source code in the browser with Ctrl+U they are missing. Why is this happening? Is anyone who can tell me how to get these images.
You just need to tell selenium to be patient and wait for element's visibility:
from selenium.webdriver.support.ui import WebDriverWait
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
# driver definition here
driver.get("http://www.kmart.com/joe-boxer-men-s-pajama-shirt-pants-plaid/p-046VA92629712P")
wait = WebDriverWait(driver, 10)
# get the main image element
img_element = wait.until(EC.visibility_of_element_located((By.CLASS_NAME, 'main-image')))
print(img_element.get_attribute("alt"))
driver.close()
For the demonstration purposes, it prints the alt attribute of the image, which is:
Joe Boxer Men's Pajama Shirt & Pants - Plaid
Or you can just find by xpath and then get image url
>>> driver.get('http://www.kmart.com/joe-boxer-men-s-pajama-shirt-pants-plaid/p-046VA92629712P')
>>> s = driver.find_element_by_xpath('//*[#id="overview"]/div[1]/img')
>>> s.get_attribute('src')
'http://c.shld.net/rpx/i/s/i/spin/-122/prod_2253990712?hei=624&wid=624&op_sharpen=1'