Hi I am trying to write a sign-up bot for UEFA.com using Selenium as requests I find to be too difficult for me to try so I am just working on automating the sign-up process even if it is a lot slower.
I am able to get to the final stage where I click on Create an Account, but faced with a reCaptcha which only appears after clicking on Create an Account. And after solving the captcha there is no 'Submit' button but it will automatically submit the details for you.
I am able to get the captcha token returned from 2captcha solving service, and inputted it into the innerHTML of the g-response-token field using javascript. However I do not know how to submit the captcha and the form.
import requests
import time
from selenium.webdriver.support.ui import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from seleniumwire import webdriver
import pyautogui
from twocaptcha import TwoCaptcha
import random
import os
from random import randint
import sys
firstnames = []
lastnames = []
API_Key = '6LehfZUbAAAAAJhue_6BVqqxLulLiXLP0rEgpdRH'
# Open Names File
with open('firstnames.txt', 'r') as f:
for name in f:
name = name.strip()
firstnames.append(name)
with open('lastnames.txt', 'r') as e:
for name in e:
name = name.strip()
lastnames.append(name)
with open('proxies.txt') as f:
proxy = f.readlines()
proxy_rand = randint(1, 35)
s_proxy = str(proxy[proxy_rand])
p_strip = s_proxy.rstrip()
# Proxy Input and Format
bare_proxy = p_strip.split(':')
username = bare_proxy[2]
password = bare_proxy[3]
ip = bare_proxy[0]
port = bare_proxy[1]
options = {
'proxy': {
'http': f'http://{username}:{password}#{ip}:{port}',
'https': f'https://{username}:{password}#{ip}:{port}',
'no_proxy': 'localhost,127.0.0.1'
}
}
os.environ['PATH'] += 'C:/SeleniumDrivers'
homepage_URL = 'https://www.uefa.com/tickets/'
driver = webdriver.Chrome(seleniumwire_options=options)
driver.get(homepage_URL)
# Accessing Register Page
reject_cookies = driver.find_element(By.ID, 'onetrust-reject-all-handler')
reject_cookies.click()
time.sleep(1)
login_button = driver.find_element(By.CSS_SELECTOR, "a[class='btn btn-secondary tickets__btn js-tracking-card']")
login_button.click()
time.sleep(10)
create_account = driver.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[2]/div/form/div[4]/a')
create_account.click()
time.sleep(10)
# Inputting SignUp Details
letters = 'abcdefghijklmnopqrstuvwxyz'
a = random.choice(letters)
b = random.choice(letters)
c = random.choice(letters)
d = random.choice(letters)
email = driver.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[2]/div/form/div[1]/div[6]/input')
email.send_keys(f'{a}{b}{c}{d}#nottingham.pro')
time.sleep(2)
password = driver.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[2]/div/form/div[1]/div[7]/input')
password.send_keys('19741002Rw!')
time.sleep(2)
first_name = driver.find_element(By.XPATH, '//*[#id="gigya-textbox-130722358975432270"]')
first_range = len(firstnames) - 1
random_first = randint(1, first_range)
f_name = firstnames[random_first]
first_name.send_keys(f'{f_name}')
time.sleep(2)
last_name = driver.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[2]/div/form/div[1]/div[9]/input')
last_range = len(lastnames) - 1
random_last = randint(1, first_range)
l_name = lastnames[random_last]
last_name.send_keys(f'{l_name}')
time.sleep(2)
day_of_birth = driver.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[2]/div/form/div[1]/div[10]/div[1]/input')
day = randint(1, 28)
day_of_birth.send_keys(f'{day}')
time.sleep(2)
month_of_birth = driver.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[2]/div/form/div[1]/div[10]/div[2]/input')
month = randint(1, 12)
month_of_birth.send_keys(f'{month}')
time.sleep(2)
year_of_birth = driver.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[2]/div/form/div[1]/div[10]/div[3]/input')
year = randint(1940, 2000)
year_of_birth.send_keys(f'{year}')
driver.execute_script("window.scrollTo(0, 500)")
time.sleep(2)
pyautogui.moveTo(353, 619)
time.sleep(2)
pyautogui.click()
time.sleep(5)
current_url = driver.current_url
print(current_url)
g_key = '6LehfZUbAAAAAJhue_6BVqqxLulLiXLP0rEgpdRH'
def SolveCaptcha():
sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
api_key = os.getenv(g_key, 'a733edea49a8327795d56edc9f06d391')
solver = TwoCaptcha(api_key)
try:
result = solver.recaptcha(
sitekey=g_key,
url=current_url)
except Exception as e:
print(e)
else:
return result
result = SolveCaptcha()
code = result['code']
print(code)
token = f'document.getElementById("g-recaptcha-response").innerHTML="{code}";'
driver.execute_script(token)
time.sleep(10000)
As you can see by the end of the code I have managed to input the captcha token but not sure how to submit as there is no submit button
I have tried to look for a callback function but can't seem to find it when I inspect the page.
submit the first form on the page:
driver.execute_script('document.forms[0].submit()')
i have written a selenium automation code however it is very unstable because of the Salesforce website ,
i need some expertise in my code.
i wanted to know how i can re run the code if it fails.
below is my code ,
i will not able able to share some part of my code.
i was trying to you for loop however i was not able to make the correct use of it ,
i even tried the while loop but i dont know how it works.
from selenium import webdriver
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from datetime import date
from datetime import timedelta
import pandas as pd
#rom glob import glob
from pathlib import Path
import pyodbc
import sqlalchemy
from urllib.parse import quote_plus
import win32com.client as win32
import warnings
warnings. filterwarnings("ignore")
#import sqlalchemy as sa
# PRE REQUSIT
today = date.today()
yesterday = today - timedelta(days = 1)
yesterday1 = yesterday.strftime('%d/%m/%Y')
yesterday2 = yesterday.strftime('%d%m%y')
loginfmt= 'q'4sag
#passward= 'av erg'
passward= 'wergwn'
search= 'ws'ergwerg
sql_userid='asdadsgfsdg'
sql_pwd= 'z5xv154fxgv'
fp = webdriver.FirefoxProfile()
fp.set_preference('browser.download.folderList', 2)
fp.set_preference('browser.download.manager.showWhenStarting', False)
fp.set_preference('browser.download.dir', r"\\abc\bcd\adff\asfg\\python basics\web scraping\salceforce code\salesforcecode3downloads")
fp.set_preference('browser.helperApps.neverAsk.openFile', 'text/csv,application/x-msexcel,application/excel,application/x-excel,application/vnd.ms-excel,image/png,image/jpeg,text/html,text/plain,application/msword,application/xml,application/xls,application/csv')
fp.set_preference('browser.helperApps.neverAsk.saveToDisk', 'text/csv,application/x-msexcel,application/excel,application/x-excel,application/vnd.ms-excel,image/png,image/jpeg,text/html,text/plain,application/msword,application/xml,application/xls,application/csv')
fp.set_preference('browser.helperApps.alwaysAsk.force', False)
fp.set_preference('browser.download.manager.alertOnEXEOpen', False)
fp.set_preference('browser.download.manager.focusWhenStarting', False)
fp.set_preference('browser.download.manager.useWindow', False)
fp.set_preference('browser.download.manager.showAlertOnComplete', False)
fp.set_preference('browser.download.manager.closeWhenDone', False)
fp.set_preference("dom.forms.number", False)
fp.set_preference('browser.helperApps.neverAsk.saveToDisk', 'text/csv,application/x-msexcel,application/excel,application/x-excel,application/vnd.ms-excel,image/png,image/jpeg,text/html,text/plain,application/msword,application/xml,application/xls,application/csv,,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet')
html_body2='<div>Hi Team,<br/> File saved to path:- "[0o0]" <br> File uploaded successfully <br><br></div><div> Thanks & Regards,.<br> BIA Team </div>'
html_body3='<div>Hi Team,<br/> Automation failed please upload the file manually.<br><br></div><div> Thanks & Regards,.<br> BIA Team </div>'
to = ('bfdds'';''safsdg')
def file_s():
outlook = win32.Dispatch('outlook.application')
mail = outlook.CreateItem(0)
mail.To = to
mail.CC = 'abc'
mail.Subject = 'Good will Automation successfull'
mail.Body = 'Message body'
mail.HTMLBody = html_body2 #this field is optional
# To attach a file to the email (optional):
#attachment = "Path to the attachment"
#mail.Attachments.Add(attachment)
mail.Send()
def file_f():
outlook = win32.Dispatch('outlook.application')
mail = outlook.CreateItem(0)
mail.To = to
mail.CC = 'abc'
mail.Subject = 'Good will Automation Failed'
mail.Body = 'Message body'
mail.HTMLBody = html_body3 #this field is optional
# To attach a file to the email (optional):
#attachment = "Path to the attachment"
#mail.Attachments.Add(attachment)
mail.Send()
try:
browser = webdriver.Firefox(executable_path =(r"\\abc\bcd\adff\asfg\\Desktop\python basics\geckodriver.exe"),firefox_profile=fp,service_log_path=r'\\abc\bcd\adff\asfg\\python basics\web scraping\salceforce code\geckodriver.log')
#browser = webdriver.Chrome(executable_path =(r"\\abc\bcd\adff\asfg\\Desktop\python basics\chromedriver.exe"))
#browser = webdriver.Edge(executable_path=(r"\\abc\bcd\adff\asfg\\Desktop\python basics\msedgedriver.exe"))
website_URL ="https://h3g.my.salesforce.com"
browser.get(website_URL)
# ACTUAL CODE
WebDriverWait(browser, 30, poll_frequency=5).until(EC.element_to_be_clickable((By.XPATH,'//*[#id="i0116"]'))).send_keys(loginfmt)
browser.find_element_by_xpath('//*[#id="idSIButton9"]').click()
WebDriverWait(browser, 30, poll_frequency=5).until(EC.element_to_be_clickable((By.XPATH,'//*[#id="passwordInput"]'))).send_keys(passward)
browser.find_element_by_xpath('//*[#id="submitButton"]').click()
#WebDriverWait(browser, 5, poll_frequency=2).until(EC.element_to_be_clickable((By.XPATH,'//*[#id="idBtn_Back"]'))).click()
#time.sleep(10)
WebDriverWait(browser,30, poll_frequency=5).until(EC.element_to_be_clickable((By.XPATH,'//*[#id="idBtn_Back"]'))).click()
#browser.find_element_by_xpath('//*[#id="idBtn_Back"]').click()
time.sleep(20)
browser.find_element_by_xpath("/html/body/div[2]/div[2]/div/button").click()
goodwill= 'Goodwill Credit Report'
browser.find_element_by_xpath('//*[#class="slds-input"]').send_keys(goodwill)
time.sleep(5)
browser.find_element_by_xpath("/html/body/div[4]div[1]/div/div[2]/div[1]/div/search_dialog-instant-results-list/div/search_dialog-instant-result-item[1]/div[1]/div[2]/span").click()
time.sleep(7)
iframe3=browser.find_element_by_xpath("//iframe[#title=\'Report Viewer\']") #(//iframe[#title='Report Viewer'])[2]
browser.switch_to.frame(iframe3)
#//iframe[#title=\'Report Viewer\']
time.sleep(240)
browser.find_element_by_xpath("/html/body/div[9]/div[1]/div[1]/div[2]/div/div/div/div[3]/div/div/button").click()
#WebDriverWait(browser,30, poll_frequency=5).until(EC.element_to_be_clickable((By.XPATH,'/html/body/div[9]/div/div[1]/div/div[1]/div[2]/div/div/div/div/div/div/div[1]/div/div[2]/div/div[2]/ul/li[2]/div/div/div/div/div/div/button/span[3]'))).click()
#browser.find_element_by_xpath("/html/body/div[9]/div/div[1]/div/div[1]/div[2]/div/div/div/div/div/div/div[1]/div/div/div[2]/div/div[2]/ul/li[2]/div/div/div/div/div/div/button/span[3]").click()
#browser.find_element(By.XPATH,'/html/body/div[9]/div/div[1]/div/div[1]/div[2]/div/div/div/div/div/div/div[1]/div/div/div/div[2]/ul/li[2]/div/div/div/div/div/div/button/span[3]').click()
fnd_ele = browser.find_element(By.XPATH, '/html/body/div[9]/div/div[1]/div/div[1]/div/div/div[3]/div/div/div/div/div/div/div[2]/div/div[2]/ul/li[2]/div/div/div/div/div/div/button/span[3]')
fnd_ele.click()
WebDriverWait(browser,30, poll_frequency=5).until(EC.element_to_be_clickable((By.XPATH,'/html/bodydiv/div/div[2]/div/div/span/a'))).click()
#browser.find_element_by_xpath("/html/body/span/section/div/div/span/a").click()
WebDriverWait(browser,30, poll_frequency=5).until(EC.element_to_be_clickable((By.XPATH,'//*[#class="slds-input"]'))).send_keys(yesterday1)
#browser.find_element_by_xpath('//*[#class="slds-input"]').send_keys(yesterday1)
#time.sleep(5)
WebDriverWait(browser,30, poll_frequency=5).until(EC.element_to_be_clickable((By.XPATH,'//div[#class = "filter-date-picker slds-form-element slds-m-to_none"]/div/div/div/div/input[#class="slds-input"]'))).send_keys(yesterday1)
#browser.find_element_by_xpath('//div[#class = "filter-date-picker slds-form-element slds-m-to_none"]/div/div/input[#class="slds-input"]').send_keys(yesterday1)
#time.sleep(3)
WebDriverWait(browser,30, poll_frequency=5).until(EC.element_to_be_clickable((By.XPATH,'/html/body/spandiv/footer/div[2]/button[2]'))).click()
#browser.find_element_by_xpath('/html/bodydiv/div/footer/div[2]/button[2]').click()
WebDriverWait(browser,30, poll_frequency=5).until(EC.element_to_be_clickable((By.XPATH,'/html/body/div[1]/div/div[1]/div[1]/div[1]/div[2]/div/div/div/div[5]/div/div/button'))).click()
#browser.find_element_by_xpath('/html/body/div[9]/div[1]/div[1]/div[1]/div[2]/div/div/div/div[5]/div/div/button').click()
#time.sleep(20)
WebDriverWait(browser,30, poll_frequency=5).until(EC.element_to_be_clickable((By.XPATH,'/htmldiv/ul/li[3]/a'))).click()
#browser.find_element_by_xpath('/html/body/span/div/ul/li[3]/a').click()
main_window_handle = browser.current_window_handle
print("Current window",main_window_handle)
browser.switch_to.default_content()
WebDriverWait(browser,10, poll_frequency=5).until(EC.visibility_of_element_located((By.XPATH,'/html/body/div[2]/div/div[2]/div/div[3]/buton[2]/span'))).click()
time.sleep(10)
#WebDriverWait(browser,10, poll_frequency=5).until(EC.visibility_of_element_located((By.XPATH,'/html/body/div[4]/div[1]/section/div[1]/div/div[1]/div[2]/div/div/ul[2]/li[2]/div[2]/buton/lightning-primitive-icon'))).click()
browser.find_element_by_xpath("/html/body/div[4]/div[1]/section/div[1]/div/div[1]/div[2]/div/div/ul[2]/li[2]/div[2]/buttn/lightning-primitive-icon").click()
time.sleep(20)
browser.quit()
" EDITING THE FILE "
def get_latest_file(src_path,extension,method='st_mtime'):
"""
Takes in a raw path and extension to parse over
returns a single file with the last modified date
methods:
st_mtime: It represents the time of most recent content modification. It is
expressed in seconds.
st_ctime: It represents the time of most recent metadata change on Unix
and creation time on Windows. It is expressed in seconds.
"""
extension = extension if extension[0] != '.' else extension[1:]
files = (Path(src_path).glob(f'*.{extension}'))
if method == 'st_mtime':
file_dictionary = {file : file.stat().st_mtime for file in files}
elif method == 'st_ctime':
file_dictionary = {file : file.stat().st_ctime for file in files}
else:
raise Exception(f'{method} not valid for this function')
max_file = max(file_dictionary, key=file_dictionary.get)
return max_file
latest = get_latest_file(r'C:\\abc\bcd\adff\asfg\python basics\web scraping\salceforce code\salesforcecode3downloads',extension='xlsx',method='st_mtime')
print(latest)
df = pd.read_excel(latest)
#df= pd.read_excel("Complaints-2022-03-09-07-52-48.xlsx")
df.drop([0,1,2,3,4,5,6,7],inplace=True)
df.columns = df.iloc[0]
df.reset_index(drop=True, inplace=True)
df2=df.iloc[1:]
df3=df2.iloc[:,1:]
df3.drop(df3.tail(1).index,inplace=True)
df4=df3.drop(df3.columns[1], axis=1)
df4.to_csv(r'\\abc\bcd\adff\asfg\Goodwill Credit Report-'+yesterday2+'.csv', header=True, index=False,)
df4.to_csv(r'\\abc\bcd\adff\asfg\Goodwill Credit Report-'+yesterday2+'.csv', header=True, index=False,)
print("Excel file edited")
#df3.columns
df5 = df4.drop(['Dispute Id: Case Number','Date','Status','Installment Amount','Total Installments','Service Terms','Service Amount','Service End Date','Mode',
'Notes'], axis = 1)
conn = pyodbc.connect(r'DRIVER={SQL Server Native Client 11.0};'
r'SERVER=something;'
r'DATABASE=something;'
r'Uid='+sql_userid+';'
r'Pwd='+sql_pwd+';')
cursor = conn.cursor()
cursor.execute('Truncate table somethingTemp')
conn.commit()
cursor.close()
conn.close()
print("file truncate succesfull")
driver='SQL Server Native Client 11.0'
params = quote_plus(r'DRIVER={SQL Server Native Client 11.0};'
r'SERVER=something;'
r'DATABASE=something;'
r'Uid='+sql_userid+';'
r'Pwd='+sql_pwd+';')
engine = sqlalchemy.create_engine('mssql+pyodbc:///?odbc_connect=%s' % params)
# converting the created date column to str then changing it to date time and formating it dd-mm-yy
df5['Created Date']=df5['Created Date'].astype(str)
df5['Created Date']=pd.to_datetime(df5['Created Date'], format="%d/%m/%Y")
df5.to_sql('C_VisionPay_Temp', con = engine, if_exists = 'append', index = False)
df5['Created Date']
conn = pyodbc.connect(r'DRIVER={SQL Server Native Client 11.0};'
r'SERVER=something;'
r'DATABASE=something;'
r'Uid='+sql_userid+';'
r'Pwd='+sql_pwd+';')
cursor = conn.cursor()
params=date(yesterday.year,yesterday.month,yesterday.day)
storedProc = "something"
cursor.execute( storedProc, params )
conn.commit()
cursor.close()
conn.close()
print("file executed sucessfully " )
file_s()
except:
file_f()
There's a lot of sleeps here, which is a tell-tale sign of poorly written automation code. This will expose the automation to the unwanted failures which you're describing.
Is there any reason why we're using sleep's instead of more appropriate explicit or fluent waits?
I would personally advise against going down the path of trying to rerun failed automation tests until they pass, that's a rabbit hole which sounds messy. Instead I'd focus on making the code sleepless and robust. There's a wealth of information around on why using sleeps is a bad idea.
Another couple of observations if I may:
Xpaths like /html/body/div[9]/div[1]/div[1]/div[2]/div/div/div/div[3]/div/div/button should be avoided, these are prone to change and will add brittleness to the code
There are examples where id's are being dealt with by proxy via xpath, for example: browser.find_element_by_xpath('//*[#id="idSIButton9"]').click(). We're better off cutting out the middleman and finding the element by id directly. For example: browser.find_element_by_id('idSIButton9').click
There's a lot of repeated code, if one small thing changes, we may have to make many changes to correct it. Ideally we want to only have to make that change in one place
All the best
Problem Description
Currently working on a project which requires me to take browse a url and take a screenshot of the webpage.
After looking various resources i found 3 ways to do so.I will be mentioning all 3 methods iam currently using.
Method - 1 : PhantomJS
from selenium import webdriver
import time
import sys
print 'Without Headless'
_start = time.time()
br = webdriver.PhantomJS()
br.get('http://' + sys.argv[1])
br.save_screenshot('screenshot-phantom.png')
br.quit
_end = time.time()
print 'Total time for non-headless {}'.format(_end - _start)
Method-2 : Headless Browser
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
print 'Headless'
_start = time.time()
options = Options()
options.add_argument("--headless") # Runs Chrome in headless mode.
options.add_argument('--no-sandbox') # # Bypass OS security model
options.add_argument('start-maximized')
options.add_argument('disable-infobars')
options.add_argument("--disable-extensions")
driver = webdriver.Chrome(chrome_options=options, executable_path='/usr/bin/chromedriver')
driver.get('http://' + sys.argv[1])
driver.save_screenshot('screenshot-headless.png')
driver.quit()
_end = time.time()
print 'Total time for headless {}'.format(_end - _start)
Method - 3 :PyQT
import argparse
import sys
import logging
import sys
import time
import os
import urlparse
from selenium import webdriver
from PyQt4.QtCore import *
from PyQt4.QtGui import *
from PyQt4.QtWebKit import *
class Screenshot(QWebView):
def __init__(self):
self.app = QApplication(sys.argv)
QWebView.__init__(self)
self._loaded = False
self.loadFinished.connect(self._loadFinished)
def capture(self, url, output_file):
_logger.info('Received url {}'.format(url))
_start = time.time()
try:
#Check for http/https
if url[0:3] == 'http' or url[0:4] == 'https':
self.url = url
else:
url = 'http://' + url
self.load(QUrl(url))
self.wait_load(url)
# set to webpage size
frame = self.page().mainFrame()
self.page().setViewportSize(frame.contentsSize())
# render image
image = QImage(self.page().viewportSize(), QImage.Format_ARGB32)
painter = QPainter(image)
frame.render(painter)
painter.end()
_logger.info('Saving screenshot {} for {}'.format(output_file,url))
image.save(os.path.join(os.path.dirname(os.path.realpath(__file__)),'data',output_file))
except Exception as e:
_logger.error('Error in capturing screenshot {} - {}'.format(url,e))
_end = time.time()
_logger.info('Time took for processing url {} - {}'.format(url,_end - _start))
def wait_load(self,url,delay=1,retry_count=60):
# process app events until page loaded
while not self._loaded and retry_count:
_logger.info('wait_load for url {} retry_count {}'.format(url,retry_count))
self.app.processEvents()
time.sleep(delay)
retry_count -=1
_logger.info('wait_load for url {} expired'.format(url))
self._loaded = False
def _loadFinished(self, result):
self._loaded = True
Issue Faced:
These 3 methods while using,all of them are getting stuck due to one or other error.One such issue faced is asked here Error Question on Stackoverflow.
So out of these 3 methods to take screenshot of a webpage in Python,which is effecient and will work on large scale deployment.
Taken from https://gist.github.com/fabtho/13e4a2e7cfbfde671b8fa81bbe9359fb and rewritten in Python 3
This method will technically work but it will not look good, as many websites will have cookie acceptance pop-ups that will appear in every screenshot, so depending on which website you use, you may wish to remove these first using selenium before beginning the screenshotting process.
from PIL import Image
from io import BytesIO
verbose = 1
browser = webdriver.Chrome(executable_path='C:/yourpath/chromedriver.exe')
browser.get('http://stackoverflow.com/questions/37906704/taking-a-whole-page-screenshot-with-selenium-marionette-in-python')
# from here http://stackoverflow.com/questions/1145850/how-to-get-height-of-entire-document-with-javascript
js = 'return Math.max( document.body.scrollHeight, document.body.offsetHeight, document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight);'
scrollheight = browser.execute_script(js)
if verbose > 0:
print(scrollheight)
slices = []
offset = 0
while offset < scrollheight:
if verbose > 0:
print(offset)
browser.execute_script("window.scrollTo(0, %s);" % offset)
img = Image.open(BytesIO(browser.get_screenshot_as_png()))
offset += img.size[1]
slices.append(img)
if verbose > 0:
browser.get_screenshot_as_file('%s/screen_%s.png' % ('/tmp', offset))
print(scrollheight)
screenshot = Image.new('RGB', (slices[0].size[0], offset))
offset = 0
for img in slices:
screenshot.paste(img, (0, offset))
offset += img.size[1]
screenshot.save('screenshot.png')
browser.quit()```
I am trying to create a python script that complets th form on this page http://segas.gr/index.php/el/2015-best-athlete by selecting the radio button with label "Γιώργος Μηλιαράς (ΣΑΚΑ) 800 μ./1,500 μ./3,000 μ" aka id="male_kids_3".
Here is my code:
import urllib
import urllib2
import webbrowser
url = "http://segas.gr/index.php/el/2015-best-athlete"
data = urllib.urlencode({'male_kids_3': 'checked'})
results = urllib2.urlopen(url, data)
with open("results.html", "w") as f:
f.write(results.read())
webbrowser.open("results.html")
I found a solution using selinium
from selenium import webdriver
import time
from selenium.webdriver.common.keys import Keys
def malakia():
#Get url
browser.get("http://segas.gr/index.php/el/2015-best-athlete")
miliaras = browser.find_element_by_id("male_kids_3")
miliaras.click()
validate = browser.find_element_by_name("input_submit_4")
validate.click()
if __name__ == "__main__": #Because we are bad-ass and we know python
#Let's make sme magiKKK
times = int(input("Πόσες φορές θέλεις να ψηφίσεις τον G #babas ??\n"))
#create brwoser object
browser = webdriver.Chrome()
for i in range(times):
malakia()
I'm using selenium and multiprocessing to spawn four different websites, and I want to run functions specific to the website the driver generated.
This is similar to my current code:
from multiprocessing import Pool
from selenium import webdriver
def gh(hosts):
driver = webdriver.Chrome(executable_path='./chromedriver')
driver.get(hosts)
html_source = driver.page_source
if 'ryan' in html_source:
print 'ryan'
doSomethingForRyan()
elif 'austin' in html_source:
print 'austin'
doSomethingForAustin()
elif 'travis' in html_source:
print 'travis'
doSomethingForTravis()
elif 'levi' in html_source:
print 'levi'
doSomethingForLevi()
else:
print '--NONE--'
if __name__ == '__main__':
p = Pool(4)
hosts = ["http://ryan.com", "https://www.austin.com", "http://levi.com", "http://travis.com"]
p.map(gh, hosts)
The result I'm getting is something like:
austin
austin
ryan
austin
EDIT - SOLVED
Instead of reading from driver.page_source, reading from driver.current_url makes sure that I can run website-specific functions.
if 'ryan' in driver.current_url:
print 'ryan'
doStuff()