I'm trying to save a file of a Captcha sound button to turn the sound into a string. I thought of saving it in .wav that is in the HTML in href and then using the speech recogniti module, because I am not getting it just with the sound of the website button when clicking.
The website is https://servicos.receita.fazenda.gov.br/Servicos/CPF/ConsultaSituacao/ConsultaPublica.asp?Error=5
Basically I want to solve the captcha in "Realizar consulta com Captcha sonoro" automatically. For this, I used selenium to do the automation and I'm trying to get the sound to solve it, but I can't. So, I changed my mind, and decided to save the sound in a .wav file and thus use speech recognition to return the audio as a string.
Bellow is the code:
import pandas as pd
from datetime import datetime, date, timedelta
from selenium.webdriver.common.action_chains import ActionChains
from selenium import webdriver
from msedge.selenium_tools import Edge,EdgeOptions
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.edge.service import Service
from selenium.webdriver.edge.options import Options
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import speech_recognition
import pyaudio
import pyttsx3
options = EdgeOptions()
options.add_argument("start-maximized")
#options.add_argument("unhandledPromptBehavior")
options.add_argument("ignore")
#options.use_chromium = True
#webdriver_service = Service('C:/Users/F00010764/msedgedriver.exe')
nav=Edge(executable_path = 'C:/Users/F00010764/msedgedriver.exe', options=options) #o webdrive e o parâmetro mostrando o caminho
wait = WebDriverWait(nav, 20)
website_b3 ="https://servicos.receita.fazenda.gov.br/Servicos/CPF/ConsultaSituacao/ConsultaPublica.asp?Error=5" #Link do site b3
nav.get(website_b3) #entrando no website, nesse em especifico no site da BaseException
time.sleep(5)
#wait.until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR,"iframe#main")))
wait.until(EC.element_to_be_clickable((By.ID, "id_captchasonoro"))).click()
time.sleep(5)
audio_src =wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#idAntiRobo > div > div > div:nth-child(2) > a:nth-child(1)"))).get_attribute('href')
content = requests.get(audio_src).content
# save the content into a file where you would want to
open('your_desired_location\\captcha_file.wav', 'wb').write(content)
sr = speech_recognition.Recognizer()
with speech_recognition.Microphone() as source2:
sr.adjust_for_ambient_noise(source2)
audio2 = sr.listen(source2)
try:
print ("O áudio convertido é :" + sr.recognize_google(audio2,language = 'pt-BR'))
This is the way to try to capture the sound, but I'm not succeeding. It ends up returning unknowvalueerror
So I wanted to save it but I don't know how to get the sound using selenium and then save it as .wav
Someone help?
Related
Ive been attempting to use selenium to go through elements on soundclouds website and am having trouble interacting with the input tags. When I try to write in the input tag of the class "headerSearch__input" with the send keys command, I get back the error "Message: element not interactable". May someone please explain to me what im doing wrong?
from tkinter import *
import random
import urllib.request
from bs4 import BeautifulSoup
from selenium import webdriver
import time
import requests
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.options import Options
driver = webdriver.Chrome(executable_path='/Users/quanahbennett/PycharmProjects/SeleniumTest/chromedriver')
url= "https://soundcloud.com/"
driver.get(url)
#time.sleep(30)
wait = WebDriverWait(driver, 30)
#link = driver.find_elements_by_link_text("Sign in")
#link[0].click()
#driver.execute_script("arguments[0].click();", link[0])
#SUCCESFUL LOGIN BUTTON PUSH
#please = driver.find_element_by_css_selector('button.frontHero__loginButton')
#please.click()
attempt = driver.find_element_by_css_selector('input.headerSearch__input')
time.sleep(10)
attempt.send_keys('Hello')
breakpoint()
#driver.quit()
The locator - input.headerSearch__input is highlighting two different elements in the DOM. Its important to find unique locators. Link to refer
And also close the cookie pop-up. And then try to interact with elements.
Try like below and confirm.
driver.get("https://soundcloud.com/")
wait = WebDriverWait(driver,30)
# Click on Accept cookies button
wait.until(EC.element_to_be_clickable((By.ID,"onetrust-accept-btn-handler"))).click()
search_field = wait.until(EC.element_to_be_clickable((By.XPATH,"//div[#id='content']//input")))
search_field.send_keys("Sample text")
Whenever I try to scrape a number from a website and print it always returns 0 even if I delay it to let the window load first.
Here's my code,
from selenium import webdriver
import time
url = 'https://hytrack.me/'
browser = webdriver.Chrome(r'C:\Users\kinet\OneDrive\Documents\webscraper\chromedriver.exe')
browser.get(url)
text = browser.find_element_by_xpath('//*[#id="stat_totalPlayers"]').text
time.sleep(10)
print(text)
All I need it to do is print some text that it takes from a website.
Have I done something wrong or am I just completely missing something?
You should put the delay before getting the element!
from selenium import webdriver
import time
url = 'https://hytrack.me/'
browser = webdriver.Chrome(r'C:\Users\kinet\OneDrive\Documents\webscraper\chromedriver.exe')
browser.get(url)
time.sleep(10)
text = browser.find_element_by_xpath('//*[#id="stat_totalPlayers"]').text
print(text)
While it's better to use explicit wait, like this:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
url = 'https://hytrack.me/'
browser = webdriver.Chrome(r'C:\Users\kinet\OneDrive\Documents\webscraper\chromedriver.exe')
wait = WebDriverWait(driver, 20)
browser.get(url)
text = wait.until(EC.visibility_of_element_located((By.XPATH, '//*[#id="stat_totalPlayers"]'))).text
print(text)
hi tried to get a text from the page but unable to get with selenium here is my code and i want copy onlty text from web page my other xath element working just this getting error please help me to solve this one here is screen shot https://prnt.sc/qymf1s
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from urllib.parse import urlparse
import urllib.request
import mysql.connector
import sys, os
import requests
css_album = '/html/body/div[3]/div[2]/div[1]/div/article/div/div/div[2]/p/text()[1]'
driver = 0
def openbrowser():
global driver
xoptions = Options()
#xoptions.add_argument("--headless") # Runs Chrome in headless mode.
xoptions.add_argument('--no-sandbox') # Bypass OS security model
xoptions.add_argument('--disable-gpu') # applicable to windows os only
xoptions.add_experimental_option("excludeSwitches",["ignore-certificate-errors"])
xoptions.add_argument('disable-infobars')
xoptions.add_argument("--disable-extensions")
try:
driver = webdriver.Chrome(options=xoptions)
driver.set_window_size(55, 55)
except:
try:
driver.close()
except:
print('Error in opening chrome')
if len (sys.argv) <= 1:
print('Please provide a URL')
sys.exit()
openbrowser()
userurl = sys.argv[1]
driver.get(userurl)
wait = WebDriverWait(driver, 50000)
album = '';
try:
album = driver.find_element_by_xpath(css_album).text
print('Album:', album);
except:
print('Error in album')
driver.close()
I wasn't able to access the element you are trying to retrieve the text with the xpath you are using. I was able to get it with this xpath:
/html//div[#id='content']/article//div[#class='width:100%;position:relative']/div[2]/p
One issue though is that the 4 lines of text are all within this one element, so if you get the text you will get it all. This is what the html looks like for this:
<p style="">Song – Sheikh<br>
Singers – Karan Aujla<br>
Musicians – Deep Jandu<br>
Lyricists – Karan Aujla
</p>
You can get the text and then take the substring of the song text that you are looking for.
You can try using the following xpath :
driver.findElement(By.xpath("//div[#class='top_ad']//following-sibling::p")).getText()
This will return you:
Song – Sheikh
Singers – Karan Aujla
Musicians – Deep Jandu
Lyricists – Karan Aujla
Hope this helps.
I want to extract datas from the calendar in this website.
https://www.dreamplus.asia/event/list
If I click the tags which are evnets or the day of the events in the calendar. Detail information of tag pops up on the right side of the calendar. As you can see this website is made of js (probably) (if you see the detailed page source)
Even though I've used selenium to click the tag which are days or the events, I couldn't find how to click those things. Any helps?
# -*- coding: utf-8 -*-
import os
import re
import json
import requests
from bs4 import BeautifulSoup
import traceback
from pprint import pprint
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
def dreamplus():
options = Options()
driver = webdriver.Chrome(executable_path='../../chromedriver.exe', options=options)
driver.get("https://www.dreamplus.asia/event/list")
#driver = launchBrowser()
html = driver.page_source
soup = BeautifulSoup(html, 'html.parser')
#Days = driver.find_elements_by_xpath("//* [#id='calendar']/div[#class='fc-view-container']/div[#class='fc-view fc-month-view fc-basic-view']/table/tbody[#class='fc-body']/tr/td[#class='fc-widget-content']/div[#class='fc-scroller fc-day-grid-container']/div/div/div/table")
Controllers = driver.find_elements_by_class_name('fc-event-container')
print(Controllers)
for list in Controllers:
print(list.text)
driver.close()
if __name__ == '__main__':
try:
dreamplus()
except BaseException as e:
with open('dreamplus_error.log','wt') as f:
f.write(traceback.format_exc())
f.close()
I used find_elements_by_class_name to get 'fc-event-container' to get items But 'Controllers' is empty. probably because it is js..
I observed that if you attempt to go direct to event you get re-directed to homepage. So, you can either go to home package and click through to events, or simply do two .gets in a row. Note: you want the child a tags within the containers for clicking to update sidebar info.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
d = webdriver.Chrome()
d.get('https://www.dreamplus.asia/')
d.get('https://www.dreamplus.asia/event/list')
events = WebDriverWait(d,10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".fc-event-container a")))
events[2].click() #example event click
Clicking through (slower):
d.get('https://www.dreamplus.asia/')
event_tabs = d.find_elements_by_xpath("//*[contains(text(), 'Event')]")
event_tabs[0].click()
event_tabs[1].click()
events = WebDriverWait(d,20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".fc-event-container a")))
events[2].click() #example event click
So I trying to automate the scrolling of my google play music playlist but its not picking up and I don't what other solutions to use. Normal scrolling like on stack overflow works but I think that Google Music webapp uses something different. So here is my my code so far
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
from bs4 import BeautifulSoup
from selenium.webdriver.common.action_chains import ActionChains
profile = webdriver.FirefoxProfile(r"C:\Users\Richard\AppData\Roaming\Mozilla\Firefox\Profiles\puom7wwz.default-1468625172145")
driver = webdriver.Firefox(executable_path=r'geckodriver.exe',firefox_profile=profile)
driver.get("https://play.google.com/music/listen?u=1#/pl/AMaBXymw0YGY7Y-z1B-FdrwkDCm7WdO_kyK4x4xQqn_vZsg56-VzwjsRKaQPypMs88oIKHn4qZd5aDbFbh5ojt6DpoZZCyHgCA%3D%3D")
driver.find_element_by_id("gb_70").click()
time.sleep(2)
element = driver.find_element_by_xpath("//*[contains(text(), 'William Thomas')]").click()
time.sleep(2)
password_field = driver.find_element_by_name("password")
password_field.clear()
password_field.send_keys("IcewindDale1995%")
password_field.send_keys(u'\ue007') #unicode for enter key
time.sleep(10)
driver.get("https://stackoverflow.com/questions/44370208/clicking-on-link-through-google-with-selenium-web-driver")
#driver.find_element_by_id("gba_70").click()
source = driver.page_source
soup = BeautifulSoup(source, "html.parser")
time.sleep(10)
#driver.execute_script("document.getElementById('descriptionWrapper').focus();")
#time.sleep(10)
driver.execute_script("window.scrollTo(0, 400)")
# i = 0
# while i<1000:
# scroll = driver.find_element_by_id('descriptionWrapper').send_keys(Keys.PAGE_DOWN)
# i+=1
driver.find_element_by_id("gba_70").click()
with open('page1.html', 'w', encoding="utf-8") as fid:
fid.write(str(soup))
driver.close()
Edit:
There is no scrolling taking place with that code but it works on the Stack overflow website.
Anyone with a solution?
The issue is that the Google Play Music Player on itself does not scroll, you need to have the browser focus on the internal scrollable element, something along the lines of this:
driver.find_element_by_css_selector('#music-content > div.detail-view.g-content.view-transition').click()
Hope this answer serves you well!