My Python script can't access to internet using Selenium - python

I am creating a script that is concerned with accessing the internet, but it doesn't work. My connection to the internet is stable and the rest of the script works perfectly. I've used selenium module to do so.
Please look into the script I've attached and let me know if there are any problems and how I could resolve those.
def search_web(input):
driver = webdriver.Chrome()
driver.implicitly_wait(1)
driver.maximize_window()
if 'youtube' in input.lower():
assistant_speaks("Opening in youtube")
indx = input.lower().split().index('youtube')
query = input.split()[indx + 1:]
driver.get("http://www.youtube.com/results?search_query =" + '+'.join(query))
return
elif 'wikipedia' in input.lower():
assistant_speaks("Opening Wikipedia")
indx = input.lower().split().index('wikipedia')
query = input.split()[indx + 1:]
driver.get("https://en.wikipedia.org/wiki/" + '_'.join(query))
return
else:
if 'google' in input:
indx = input.lower().split().index('google')
query = input.split()[indx + 1:]
driver.get("https://www.google.com/search?q =" + '+'.join(query))
elif 'search' in input:
indx = input.lower().split().index('google')
query = input.split()[indx + 1:]
driver.get("https://www.google.com/search?q =" + '+'.join(query))
else:
driver.get("https://www.google.com/search?q =" + '+'.join(input.split()))
return

Try adding executable_path to chrome driver. I ran with executable_path, it's working fine for me.
driver = webdriver.Chrome(executable_path='path_of_chrome_driver')

Related

HCaptcha Bypass with Selenium & Python

I'm trying to build a voting bot for a french minecraft server.
The thing is that there is a hCaptcha and I'm using 2Captcha to bypass it.
Everything is working perfectly I got the Token from 2Captcha and I insert that same token inside the textarea of the Captcha.
But .. when submitting the form and process the vote. It seems like it's not working as expected. After digging for about 3 days I couldn't find any solutions.
import os
from twocaptcha import TwoCaptcha
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
# ====================
# GLOBAL PARAM
# ====================
driver = webdriver.Chrome(executable_path="./driver/chromedriver.exe")
original_window = driver.current_window_handle
driver.get("https://serveur-prive.net/minecraft/seasonsky-skyblock-100-farmtowin-1-18-crack-on-6330/vote")
def HCaptchaSolver():
def solver():
api_key = os.getenv('APIKEY_2CAPTCHA', '2CAPTCHA_API_KEY')
solver = TwoCaptcha(api_key)
try:
result = solver.hcaptcha(
sitekey='c6b0b71f-47cc-4512-b4df-a55e1a97a349',
url='https://serveur-prive.net/minecraft/seasonsky-skyblock-100-farmtowin-1-18-crack-on-6330/vote',
)
except Exception as e:
print(e)
return False
else:
return result
result = solver()
if result:
code = result['code']
print(code)
driver.execute_script(
"document.querySelector(" + "'" + '[name="h-captcha-response"]' + "'" + ").style = " + "'" + "block" + "'")
driver.execute_script(
"document.querySelector(" + "'" + '[name="h-captcha-response"]' + "'" + ").innerHTML = " + "'" + code + "'")
print("/!\ --> JS Code executed")
driver.find_element(
By.CSS_SELECTOR, "#btnvote").click()
else :
print("/!\ --> 2Captcha not working :p")
#wait for iframe to load
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, '#c > div > div > div.bvt > div.col > div.vote > div.form > div > div > iframe')))
#HCaptchaSolver()
note that you can recreate my problem by creating a 2Captcha account and solving a few captcha in order to get paid few cents and try it out
Thanks for the help !
Roy

Not able to scrape text from a website using Selenium

I want to get the values for the fields as shown in the attached picture. This is my sample code and it's not fetching the required fields any corrections are welcomed.
span_xpath = "//div[#id='se-siteDetailsPanel-panel']"
name_xpath = "//div[#id='se-siteDetailsPanel-name']" + span_xpath
site_data.append(browser.find_element_by_xpath(name_xpath).text)
# address:
adrs1_xpath = "//div[#id='se-siteDetailsPanel-firstAddress']" + span_xpath
adrs2_xpath = "//div[#id='se-siteDetailsPanel-address']" + span_xpath
address = browser.find_element_by_xpath(adrs1_xpath).text + \
browser.find_element_by_xpath(adrs2_xpath).text
site_data.append(address)
# installed:
installed_xpath = "//div[#id='se-siteDetailsPanel-installationDate']" + span_xpath
site_data.append(browser.find_element_by_xpath(installed_xpath).text)
#updated
updated_xpath = "//div[#id='se-siteDetailsPanel-lastUpdateTime']" + span_xpath
site_data.append(browser.find_element_by_xpath(updated_xpath).text)
# peak:
peak_xpath = "//div[#id='se-siteDetailsPanel-peakPower']" + span_xpath
peak = browser.find_element_by_xpath(peak_xpath).text
site_data.append(peak.split()[0])
You can try using xpath or By_id
If you cannot find the XPath then try chropath extension in chrome, you will easily find the xpath.
#itronic1990... your hint worked actually i saw that the xspan was wrong i corrected the span path and it started fetching the values –

Unable to print foodpanda product links using selenium python

As i want to extract link from a href tag but it no print any result from https://www.foodpanda.pk/restaurants/new?lat=24.9414896&lng=67.1676002&vertical=restaurants
from selenium import webdriver
driver = webdriver.Chrome('F:/chromedriver')
driver.get("https://www.foodpanda.pk/restaurants/new?lat=24.9414896&lng=67.1676002&vertical=restaurants")
# response = scrapy.Selector(text=driver.page_source)
list = driver.find_elements_by_css_selector("ul.vendor-list li")
length = len(driver.find_elements_by_css_selector("ul.vendor-list li"))
for i in range(length):
try:
name = driver.find_elements_by_css_selector(".headline .name")[i].text
time = driver.find_elements_by_css_selector(".badge-info")[i].text.strip()
rating = driver.find_elements_by_css_selector(".rating")[i].text
dealtag = driver.find_elements_by_css_selector(".multi-tag")[i].text
link = driver.find_elements_by_css_selector(".vendor [href]")[i].text
print(name,link,time,rating,dealtag)
except:
pass
Please read the code, This code is working fine in my computer.
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
driver = webdriver.Chrome()
wait = WebDriverWait(driver, 30)
driver.get('https://www.foodpanda.pk/restaurants/new?lat=24.9414896&lng=67.1676002&vertical=restaurants')
Vendor_list = driver.find_elements_by_xpath("//figure[#class=\"vendor-tile item\"]/ancestor::li")
for vendor in Vendor_list:
print("-------------------")
print("Restaurant Name :- " + vendor.find_element_by_xpath(".//span[#class=\"name fn\"]").text)
print("Badge :- " + vendor.find_element_by_xpath(".//span[#class=\"badge-info\"]").text[:2] +
vendor.find_element_by_xpath(".//span[#class=\"badge-info\"]/span").text)
try:
print("Rating :- " + vendor.find_element_by_xpath(".//span[#class=\"rating\"]").text)
except:
print("No Rating Available")
try:
print("Muti Tag :- " + vendor.find_element_by_xpath(".//span[#class=\"multi-tag\"]").text)
except:
print("No Tag Info")
print("Vendor URL :- " + vendor.find_element_by_xpath(".//a").get_attribute("href"))
If it solves your problem then please mark it as answer.
There are no elements with exact class name vendor there.
You should use something like //*[contains(#class,'vendor')]//a[#href]
I used Xpath since I prefer working with it, but you can also use similar css_selector

Python Selenium Multiple Webdrivers

So I have this python selenium code and I want it to run multiple times at the same time. So when I activate it, it opens multiple webdrivers and execute this script at the same time. How can I do this?
driver.get(base_url)
password_id = driver.find_element_by_id('password')
password = input("Password: ")
password_id.send_keys(password)
password_id.send_keys(Keys.ENTER)
email_id1 = EC.presence_of_element_located((By.ID, 'email'))
WebDriverWait(driver, 100).until(email_id1)
email_id = driver.find_element_by_id('email')
email_id.send_keys(user_email)
start = time.time()
print(Fore.WHITE + "STATUS:" + Fore.LIGHTYELLOW_EX + " Email Filled!")
name_id = driver.find_element_by_id('name')
name_id.send_keys(user_name)
print(Fore.WHITE + "STATUS:" + Fore.LIGHTYELLOW_EX + " Name Filled!")
button_id = driver.find_element_by_id('purchase')
end = time.time()
button_id.click()
print(Fore.WHITE + "STATUS:" + Fore.LIGHTGREEN_EX + " Processing order...")
sleep(10)
timeresult = end - start
speed = (str(timeresult))
checkout_done = driver.current_url
Have you tried multithreading?
Code below allows me to open two browsers at once (in separate loops) and control them within functions (first_window(), second_window()).
from selenium import webdriver
from threading import Thread
def first_window():
driver = webdriver.Chrome('chromedriver.exe')
driver.get("https://stackoverflow.com/")
def second_window():
driver = webdriver.Chrome('chromedriver.exe')
driver.get("https://stackoverflow.com/")
if __name__ == '__main__':
Thread(target=first_window).start()
Thread(target=second_window).start()

Python- Selenium/BeautifulSoup PDF & Table scraper

I am trying to write a code that is a robust web scraper that can be used on a variety of key words, which is formatted currently to find the first pdf it finds from the first page of a google search and if not found then it clicks into the first google search link and then grabs the closest HTML table and stores it into a pandas dataframe which will later be sent to pd._to_excel to change it into an excel file. I've asked a few questions like this before but I believe I have a decent amount of components:
a = ["term 1", "term 2 ", "term 3"]
b = ["school 1", "school 2 ", "school 3"]
c = ["program 1", "program 2", "program 3"]
keys = []
for x,y,z in [(x,y,z) for x in a for y in b for z in c]:
keys.append(z +" "+ x +" "+ y)
path_to_driver = r"C:\wherever_you_chose_the_download_path_to_be\chromedriver.exe"
download_dir = r"C:\wherever_you_want_to_place_the_downloaded_file\name_of_pdf_holder_file"
chrome_options = Options()
chrome_options.add_experimental_option("prefs", {
"download.default_directory": download_dir,
"download.prompt_for_download": False,
})
chrome_options.add_argument("--headless")
chrome_options.add_argument("--ignore-certificate-errors")
chrome_options.add_argument("--incognito")
driver = webdriver.Chrome(path_to_driver, options=chrome_options)
driver.command_executor._commands["send_command"] = ("POST", '/session/$sessionId/chromium/send_command')
params = {'cmd': 'Page.setDownloadBehavior', 'params': {'behavior': 'allow', 'downloadPath': download_dir}}
command_result = driver.execute("send_command", params)
for k, key in enumerate(keys):
try:
start = time.time()
driver.implicitly_wait(10)
driver.get("https://www.google.com/")
sleep_between_interactions = 5
searchbar = driver.find_element_by_name("q")
searchbar.send_keys(key)
searchbar.send_keys(Keys.ARROW_DOWN)
searchbar.send_keys(Keys.RETURN)
pdf_element = driver.find_elements(By.XPATH, ("//a[contains(#href, '.pdf')]"))
key_index_number = str(keys.index(key) +1 )
key_length = str(len(keys))
print(key_index_number + " out of " + key_length)
if len(pdf_element) > 0 and key_length < key_index_number :
print("pdf found for: "+ key)
pdf_element[0].click()
time.sleep(sleep_between_interactions)
print("downloaded " + key_index_number + " out of "+ str(len(keys)))
elif len(pdf_element) == 0 and key_index_number != key_length:
print("pdf NOT found for "+ key)
print(key + " pdf not downloaded, moving on...")
try:
google_search = f"https://www.google.com/search?q={key}"
driver.get(google_search)
clicked_link = driver.find_element(By.XPATH, '(//h3)[1]/../../a').click()
driver.implicitly_wait(10)
html_source_code = driver.execute_script("return document.body.innerHTML;")
html_soup: BeautifulSoup = BeautifulSoup(html_source_code, 'html.parser')
url = '{}'.format(html_soup)
r = requests.get(url)
soup = bs(r.content, 'lxml')
for table in soup.select('.table'):
tbl = pd.read_html(str(table))[0]
links_column = ['{}'.format(url) + i.select_one('.*')['href'] if i.select_one('.*') is not None else '' for i in table.select('td:nth-of-type(1)')]
tbl['Links'] = links_column
continue
except:
print("something happened, probably a JS error which will be dealt with")
continue
except IndexError as index_error:
print("Couldn't find pdf file for "+"\"" + key + "\""+" due to Index Error moving on....")
print(key_index_number + " out of " + str(len(keys)))
continue
except NoSuchElementException:
print("search bar didn't load, iterating next in loop")
print(" pdf NOT found for "+ key)
print(key + " pdf not downloaded, moving on...")
continue
except ElementNotInteractableException:
print("element either didn't load or doesn't exist")
driver.get("https://www.google.com/")
continue
So far this works okay for finding some pdfs but it's not working for any tables it comes across.
I tried something like this in the past:
url_search = f"https://www.google.com/search?q={key}"
request = requests.get(url_search)
soup = BeautifulSoup(request.text, "lxml")
first_link = soup.find("div", class_="BNeawe").text
links_list.append(first_link)
but it only returned a list of the titles of each link without actually clicking through.
Currently, I'm also thinking about how to store the pdfs by search term groupings.
Lastly, I've attempted to pass and use the html source in the past but that's also given me JavaScriptException which seems to not be an exception I can handle properly in python, I may be missing something here though.
I'm hoping also, if possible, to make the a,b,c,etc. arrays into an input() that can be used for future UI creation, not sure how doable this might be in python but it's worth a shot as long as I can get the main part of this code to work as I would like.
Not sure if this can be accomplished with XPATH or a CSS selector, or how I should go about this problem to avoid issues?

Categories

Resources