I'm using Selenium to run two Firefox instances and get specifics pages.
I'm loading the pages parallel using threading.Thread for each of them.
I also want to set timeout for maximum page loading time, with browser.set_page_load_timeout() in my code.
My whole code looks like this:
from selenium import webdriver
from threading import Thread
from selenium.common.exceptions import TimeoutException
class Test():
def __init__(self):
browser = webdriver.Firefox()
def load_page(browser, url):
browser.set_page_load_timeout(20)
browser.get(url)
t = Thread(target=load_page, args=(browser, 'http://www.stackoverflow.com', ))
t.start()
t.join()
if __name__ == '__main__':
try:
Test()
except TimeoutException:
print "timeout reached"
In spite of my try except declaration, i still got this error:
Exception in thread Thread-1:
Traceback (most recent call last):
File "C:\Python27\lib\threading.py", line 801, in __bootstrap_inner
self.run()
File "C:\Python27\lib\threading.py", line 754, in run
self.__target(*self.__args, **self.__kwargs)
File "C:\Temp\test_b.py", line 13, in load_page
browser.get(url)
File "C:\Python27\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 213, in get
self.execute(Command.GET, {'url': url})
File "C:\Python27\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 201, in execute
self.error_handler.check_response(response)
File "C:\Python27\lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 181, in check_response
raise exception_class(message, screen, stacktrace)
TimeoutException: Message: Timed out waiting for page load.
Stacktrace:
at Utils.initWebLoadingListener/< (file:///c:/users/mgal/appdata/local/temp/tmpsasxck/extensions/fxdriver#googlecode.com/components/driver-component.js:9010)
at WebLoadingListener/e (file:///c:/users/mgal/appdata/local/temp/tmpsasxck/extensions/fxdriver#googlecode.com/components/driver-component.js:5114)
at WebLoadingListener/< (file:///c:/users/mgal/appdata/local/temp/tmpsasxck/extensions/fxdriver#googlecode.com/components/driver-component.js:5122)
at fxdriver.Timer.prototype.setTimeout/<.notify (file:///c:/users/mgal/appdata/local/temp/tmpsasxck/extensions/fxdriver#googlecode.com/components/driver-component.js:621)
In conclusion, how can I catch the Timeout Exception out of the thread scope?
Thanks ahead!
You can't. You should handle exception in the thread, so your function should look more or less like:
def load_page(browser, url):
try:
browser.set_page_load_timeout(20)
browser.get(url)
except TimeoutException:
'''Handle me here'''
EDIT: What you actually request for is:
from selenium import webdriver
from threading import Thread
from Queue import Queue
from selenium.common.exceptions import TimeoutException
class Test():
def __init__(self, queue, url):
browser = webdriver.Firefox()
def load_page(browser, url):
try:
browser.set_page_load_timeout(20)
browser.get(url)
except Exception as e:
queue.put(e)
else:
queue.put('OK or whatever the result you want')
t = Thread(target=load_page, args=(browser, url, ))
t.start()
if __name__ == '__main__':
urls = ('http://www.stackoverflow.com', 'http://http://meta.stackoverflow.com/')
queue = Queue()
for url in urls:
Test(queue, url)
for i in range(len(urls)):
result = queue.get()
if isinstance(result, Exception):
'''Handle exception preferably trying to determine the actual exception type'''
else:
'''Say cool cause everything is fine'''
Threading children run in their own stack, so this is impossible without message/event passing. You can use python's Queue library (which is thread-safe) and pass a queue object into your child function, using that as an event pool that the parent can handle.
Related
I have the following piece of code which loads a page and follows a link within it, using asyncio.gather as recommended in the documentation for the click method:
import asyncio
import pyppeteer
async def main(selector):
browser = await pyppeteer.launch()
page = await browser.newPage()
await page.goto("https://example.org")
result = await asyncio.gather(
page.waitForNavigation(),
page.click(selector),
)
result = next(filter(None, result))
if result and isinstance(result, pyppeteer.network_manager.Response):
print(result.status, result.url)
if __name__ == "__main__":
loop = asyncio.get_event_loop()
loop.run_until_complete(main(selector="a"))
This works wonders when the click method does trigger a navigation event:
200 https://www.iana.org/domains/reserved
But I'm trying to write generic code that could handle any selector, even those which would not cause a navigation. If I change the selector to "h1" I get the following timeout, because there is no navigation to wait for:
Traceback (most recent call last):
File "stackoverflow.py", line 20, in <module>
loop.run_until_complete(main(selector="h1"))
File "/usr/lib/python3.6/asyncio/base_events.py", line 484, in run_until_complete
return future.result()
File "stackoverflow.py", line 12, in main
page.click(selector),
File "/.../lib/python3.6/site-packages/pyppeteer/page.py", line 938, in waitForNavigation
raise error
pyppeteer.errors.TimeoutError: Navigation Timeout Exceeded: 30000 ms exceeded.
I could not find a way to detect whether or not the click event would produce an additional request, in order to avoid the Page.waitForNavigation call. Is it possible? Thanks in advance for your time!
This Python program:
import concurrent.futures
import multiprocessing
import time
class A:
def __init__(self):
self.event = multiprocessing.Manager().Event()
def start(self):
try:
while True:
if self.event.is_set():
break
print("processing")
time.sleep(1)
except BaseException as e:
print(type(e).__name__ + " (from pool thread):", e)
def shutdown(self):
self.event.set()
if __name__ == "__main__":
try:
a = A()
pool = concurrent.futures.ThreadPoolExecutor(1)
future = pool.submit(a.start)
while not future.done():
concurrent.futures.wait([future], timeout=0.1)
except BaseException as e:
print(type(e).__name__ + " (from main thread):", e)
finally:
a.shutdown()
pool.shutdown()
outputs:
processing
processing
processing
KeyboardInterrupt (from main thread):
BrokenPipeError (from pool thread): [WinError 232] The pipe is being closed
Traceback (most recent call last):
File "C:\Program Files\Python37\lib\multiprocessing\managers.py", line 788, in _callmethod
conn = self._tls.connection
AttributeError: 'ForkAwareLocal' object has no attribute 'connection'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File ".\foo.py", line 34, in <module>
a.shutdown()
File ".\foo.py", line 21, in shutdown
self.event.set()
File "C:\Program Files\Python37\lib\multiprocessing\managers.py", line 1067, in set
return self._callmethod('set')
File "C:\Program Files\Python37\lib\multiprocessing\managers.py", line 792, in _callmethod
self._connect()
File "C:\Program Files\Python37\lib\multiprocessing\managers.py", line 779, in _connect
conn = self._Client(self._token.address, authkey=self._authkey)
File "C:\Program Files\Python37\lib\multiprocessing\connection.py", line 490, in Client
c = PipeClient(address)
File "C:\Program Files\Python37\lib\multiprocessing\connection.py", line 691, in PipeClient
_winapi.WaitNamedPipe(address, 1000)
FileNotFoundError: [WinError 2] The system cannot find the file specified
when it is run and a SIGINT signal is sent after three seconds (by pressing Ctrl+C).
Analysis. — The SIGINT signal is sent to the main thread of each process. In this case there are two processes: the main process and the manager's child process.
In the main thread of the main process: after receiving the SIGINT signal, the default SIGINT signal handler raises the KeyboardInterrupt exception, which is caught and printed.
In the main thread of the manager's child process: in the mean time, after receiving the SIGINT signal, the default SIGINT signal handler raises a KeyboardInterrupt exception, which terminates the child process. Consequently all subsequent uses of the manager's shared objects by other processes raise a BrokenPipeError exception.
In the pool's child thread of the main process: in this case, a BrokenPipeError exception is raised at the line if self.event.is_set():.
In the main thread of the main process: Finally, the flow of control reaches the line a.shutdown(), which raises the AttributeError and FileNotFoundError exceptions.
How to prevent this BrokenPipeError exception?
A solution to this issue is to override the default SIGINT signal handler with a handler that will ignore the signal, for instance with the signal.SIG_IGN standard signal handler. It is possible by calling the signal.signal function at the start of the manager's child process:
import concurrent.futures
import multiprocessing.managers
import signal
import time
def init():
signal.signal(signal.SIGINT, signal.SIG_IGN)
class A:
def __init__(self):
manager = multiprocessing.managers.SyncManager()
manager.start(init)
self.event = manager.Event()
def start(self):
try:
while True:
if self.event.is_set():
break
print("processing")
time.sleep(1)
except BaseException as e:
print(type(e).__name__ + " (from pool thread):", e)
def shutdown(self):
self.event.set()
if __name__ == "__main__":
try:
a = A()
pool = concurrent.futures.ThreadPoolExecutor(1)
future = pool.submit(a.start)
while not future.done():
concurrent.futures.wait([future], timeout=0.1)
except BaseException as e:
print(type(e).__name__ + " (from main thread):", e)
finally:
a.shutdown()
pool.shutdown()
Note. — This program also works with a concurrent.futures.ProcessPoolExecutor.
views.py:
from threading import Thread
class PoliceJobs:
def call_police_defence_jobs(request):
job = PoliceDefenceJobs.police_jobs(request)
sleep(0.5)
job_details = PoliceDefenceJobDetails.police_defence_job_details(request)
message = call_all(job,job_details)
return HttpResponse(message)
def call_statewise_police_jobs(request):
job = PoliceDefenceJobs.statewise_police_jobs(request)
sleep(0.5)
job_details = PoliceDefenceJobDetails.statewise_police_job_details(request)
message = call_all(job,job_details)
return HttpResponse(message)
def police_jobs(request):
try:
t1 = Thread(target=PoliceJobs.call_police_defence_jobs,args=[request])
t2 = Thread(target=PoliceJobs.call_statewise_police_jobs,args=[request])
t1.start()
t2.start()
t1.join()
t2.join()
return HttpResponse("success")
except:
return HttpResponse("error")
urls.py
from django.urls import path
from .views import police_jobs
urlpatterns = [
path('finish_police_jobs/', police_jobs),
]
error in shell:
Traceback (most recent call last):
File "/usr/lib/python3.5/threading.py", line 914, in _bootstrap_inner
self.run()
File "/usr/lib/python3.5/threading.py", line 862, in run
self._target(*self._args, **self._kwargs)
File "/home/soubhagya/Desktop/carrier-circle/backend/finalize/views.py", line 840, in call_police_defence_jobs
job = PoliceDefenceJobs.police_jobs(request)
AttributeError: type object 'PoliceDefenceJobs' has no attribute 'police_jobs'
in PoliceJobs class i changed the function name to PoliceDefenceJobs.police_jobs which is not exist to make error.
Here i am making error and handling it by adding except block but
it is still showing error in console but not in browser.
in browser it is showing success wheather there is an exception.
Exceptions in threads don't propagate to the thread that created them. See Catch a thread's exception in the caller thread in Python for a workaround.
#!/usr/bin/env python
import threading
import urllib, sys,os
import Queue
concurrent = 200
queue = Queue.Queue(concurrent*2)
try:
aim = sys.argv[1].lower()
dic = open(sys.argv[2],'r')
except:
print "Usage: %s url wordlist" % sys.argv[0]
sys.exit(1)
class Scanner(threading.Thread):
def __init__(self,queue):
threading.Thread.__init__(self)
self.queue=queue
def run(self):
while True:
self.path = self.queue.get()
self.geturl = urllib.urlopen(aim+'/'+self.path)
self.status = self.geturl.getcode()
self.url = aim+self.path
self.result = self.url+'=>'+str(self.status)
print self.result
self.writeresult(self.result)
self.queue.task_done()
def writeresult(self,result):
fp = open('result.txt','a+')
fp.write(result+'\n')
fp.close()
def main():
for i in range(concurrent):
t = Scanner(queue)
t.setDaemon(True)
t.start()
for path in dic.readlines():
queue.put(path.strip())
queue.join()
if __name__ == '__main__':
main()
It is a python program to scan the dir of the website, when the scanning finish,
it even not quit with the ctrl+c
i want to know when it finish the scanning how to quit the program automatically.
and when it is in process, it also appear some problem like this:
Exception in thread Thread-130:
Traceback (most recent call last):
File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/threading.py", line 551, in __bootstrap_inner
self.run()
File "tt.py", line 28, in run
self.geturl = urllib.urlopen(aim+'/'+self.path)
File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib.py", line 86, in urlopen
return opener.open(url)
File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib.py", line 207, in open
return getattr(self, name)(url)
File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib.py", line 344, in open_http
h.endheaders(data)
File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 954, in endheaders
self._send_output(message_body)
File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 814, in _send_output
self.send(msg)
File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 776, in send
self.connect()
File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 757, in connect
self.timeout, self.source_address)
File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/socket.py", line 553, in create_connection
for res in getaddrinfo(host, port, 0, SOCK_STREAM):
IOError: [Errno socket error] [Errno 8] nodename nor servname provided, or not known
I wanted some practice so I tried this out and changed a lot. Does it get you a full set of results? You will need to replace paths with your original argument reading.
With those threads, maybe you are getting unhandled exceptions resulting in missing results? I added a mechanism to catch any errors during reading and pass those to the result writer.
I guess appending from multiple threads to a file is ok, but I added a writer thread to more cleanly manage the file
most of the assignments to self were unnecessary
if you still get socket errors, check the paths in the result file and see how you want to handle those results if at all
I'm no expert, so don't take this as best practice
import threading
import urllib
import Queue
concurrent = 5
aim = 'http://edition.cnn.com'
paths = ['2013/10/12/opinion/kazin-tea-party/index.html?hpt=hp_t5',
'2013/10/11/opinion/opinion-hay-nobel-opcw/index.html?hpt=hp_t5',
'2013/10/11/opinion/rosin-women-in-charge/index.html?hpt=hp_t5',
'some invalid path',
'2013'] # also an invalid path
def main():
work_q = Queue.Queue()
result_q = Queue.Queue()
# start the scanners and the result writer
scanners = [Scanner(work_q, result_q) for i in range(concurrent)]
for s in scanners:
s.start()
results_file_path = 'results.txt'
result_writer = ResultWriter(result_q, 'results.txt')
result_writer.start()
# send all the work and wait for it to be completed
for path in paths:
work_q.put(path.strip())
work_q.join()
# tell everyone to stop
# you could just kill the threads but you writer needs to close the file
for s in scanners:
work_q.put(Scanner.STOP_TOKEN)
result_q.put(ResultWriter.STOP_TOKEN) # make sure file gets closed
# wait for everyone to actually stop
for s in scanners:
s.join()
result_writer.join()
print 'the scan has finished and results are in {}'.format(results_file_path)
class Scanner(threading.Thread):
STOP_TOKEN = '<<stop>>'
def __init__(self, work_q, result_q):
threading.Thread.__init__(self)
self.work_q = work_q
self.result_q = result_q
def run(self):
while True:
path = status = None # reset in case of error
try:
try:
path = self.work_q.get(timeout=0.00001)
except Queue.Empty:
continue
if path == self.STOP_TOKEN:
break # stop looking for work
get_url = urllib.urlopen(aim + '/' + path)
status = get_url.getcode()
except Exception as e:
status = 'unhandled error ({})'.format(e)
self.result_q.put((path, status))
self.work_q.task_done()
class ResultWriter(threading.Thread):
STOP_TOKEN = '<<stop>>'
def __init__(self, result_q, results_file_path):
threading.Thread.__init__(self)
self.result_q = result_q
self.results_file_path = results_file_path
def run(self):
with open(self.results_file_path, 'w') as results_file:
while True:
try:
result = self.result_q.get(timeout=0.00001)
except Queue.Empty:
continue
if result == self.STOP_TOKEN:
break # stop looking for results
path, status = result
results_file.write('{}=>{}\n'.format(path, status))
if __name__ == '__main__':
main()
The program as it is, it will close when all threads have finished.
But to easily get rid of all those errors, in your function run, from the class, after the while True: claus, put everything that follows in a try: except: clause like this
try:
code
except:
pass
Its not exactly the cleanest way to do it, but considering what you are after, it will do the job, and will get you rid of those exceptions, which btw mean that some URLS have been timed out.
I've been trying to develop an automated test case solution using Selenium RC and Python and after lengthy testing I've hit a pretty hard block in the road, so to speak.
I have three files: unit.py, case1.py, and case1m.py
unit.py configures instances of case1m.py with a browser and a port, then runs the test by sending the case1m instance through unittest.main().
The case1.py file is a vanilla case that is generated from Selenium IDE; when run from the command line, it executes the test case and exits with OK. I used this file to help debug the failing points of the other two files.
Here is the source for all three files:
unit.py:
import unittest
from case1m import case1m
browser = "*chrome"
port = 4444
a = case1m()
a.setBrowser("*chrome",4444)
unittest.main(a)
case1m.py - handles browser/port arguments and runs selenium test cases:
from selenium import selenium
import unittest, time, re
class case1m(unittest.TestCase):
def setBrowser(self,b,p):
print "entered setBrowser"
self.browser = b
self.port = p
print "leaving setBrowser"
self.setUp()
def setUp(self):
print self.browser,", ",self.port
self.verificationErrors = []
self.selenium = selenium("localhost", self.browser, self.port, "http://megagate-ffcdcb.xl_net.internal/")
self.selenium.start()
print "end setUp"
self.runTest()
def runTest(self):
print "entered runTest"
sel = self.selenium
sel.open("/seltest/")
try: self.failUnless(sel.is_text_present("BODY"))
except AssertionError, e: self.verificationErrors.append(str(e))
print "leaving runTest"
self.tearDown()
def tearDown(self):
print "entered tearDown"
self.selenium.stop()
self.assertEqual([], self.verificationErrors)
print "leaving tearDown"
case1.py:
from selenium import selenium
import unittest, time, re
class case1(unittest.TestCase):
def setUp(self):
print "entered setUp"
self.verificationErrors = []
self.selenium = selenium("localhost", 4444, "*chrome", "http://megagate-ffcdcb.xl_net.internal/")
self.selenium.start()
def runTest(self):
sel = self.selenium
sel.open("/seltest/")
try: self.failUnless(sel.is_text_present("BODY"))
except AssertionError, e: self.verificationErrors.append(str(e))
def tearDown(self):
self.selenium.stop()
self.assertEqual([], self.verificationErrors)
if __name__ == '__main__':
unittest.main()
The first problem I ran into was passing the browser and port values to an instance of the case1m class. I tried using __init__ to collect them as arguments, but apparently sub-classing the TestCase class and then adding an __init__ override causes problems; the setUp(), runTest() and tearDown() methods no longer triggered automatically as they do in the case1 class.
So instead, I overrode and inserted a setBrowser() method to collect the values and create the browser and port variables within the class instance. This again causes the same issue as before, so I resorted to inserting method calls into setUp(), runTest() and tearDown(). When executed, it runs until it tries the do_command() method in the selenium instance.
Here is the error:
Traceback (most recent call last):
File "C:\sel-test\unit.py", line 13, in
a.setBrowser("*chrome",4444)
File "C:\sel-test\case1m.py", line 10, in setBrowser
self.setUp()
File "C:\sel-test\case1m.py", line 16, in setUp
self.selenium.start()
File "C:\Python26\lib\selenium.py", line 190, in start
result = self.get_string("getNewBrowserSession", [self.browserStartCommand, self.browserURL, self.extensionJs])
File "C:\Python26\lib\selenium.py", line 225, in get_string
result = self.do_command(verb, args)
File "C:\Python26\lib\selenium.py", line 213, in do_command
conn.request("POST", "/selenium-server/driver/", body, headers)
File "C:\Python26\lib\httplib.py", line 910, in request
self._send_request(method, url, body, headers)
File "C:\Python26\lib\httplib.py", line 947, in _send_request
self.endheaders()
File "C:\Python26\lib\httplib.py", line 904, in endheaders
self._send_output()
File "C:\Python26\lib\httplib.py", line 776, in _send_output
self.send(msg)
File "C:\Python26\lib\httplib.py", line 735, in send
self.connect()
File "C:\Python26\lib\httplib.py", line 716, in connect
self.timeout)
File "C:\Python26\lib\socket.py", line 500, in create_connection
for res in getaddrinfo(host, port, 0, SOCK_STREAM):
socket.gaierror: [Errno 10109] getaddrinfo failed
My questions is: why does the unit.py/case1m.py combination result in socket.gaierror when the case1.py file will run without error? From what I can see, the selenium class should be receiving the exact same information by the time it reaches self.do_command(). The only difference is that case1.py is being run directly from the commandline, while case1m.py is being run as an imported module.
Looking at the 2 code snippets side by side, I think you have inverted the browser and port arguments. This is probably the source of your error.
case1.py (runs fine):
self.selenium = selenium("localhost", 4444, "*chrome", "http://megagate-ffcdcb.xl_net.internal/")
case1m.py (socket error):
self.selenium = selenium("localhost", self.browser, self.port, "http://megagate-ffcdcb.xl_net.internal/")