in below code if i change one of the url to something invalid the whole process will stop and i couldn't exit form terminal using ctrl+c . so my question is how should i handle exception in my main thread run method and if an error happen trigger it and go to the next list element without fail the whole process:
#!/usr/bin/env python
import Queue
import threading
import urllib2
import time
hosts = ["http://yahoo.com", "http://google.com", "http://amazon.com","http://apple.com"]
queue = Queue.Queue()
class ThreadUrl(threading.Thread):
"""Threaded Url Grab"""
def __init__(self, queue):
threading.Thread.__init__(self)
self.queue = queue
def run(self):
while True:
#grabs host from queue
host = self.queue.get()
#grabs urls of hosts and prints first 1024 bytes of page
url = urllib2.urlopen(host)
print "connected"
#signals to queue job is done
self.queue.task_done()
start = time.time()
def main():
#spawn a pool of threads, and pass them queue instance
for i in range(5):
t = ThreadUrl(queue)
t.setDaemon(True)
t.start()
#populate queue with data
for host in hosts:
queue.put(host)
#wait on the queue until everything has been processed
queue.join()
main()
print "Elapsed Time: %s" % (time.time() - start)
use a finally block to make sure the thread always signals even when there is an error.
def run(self):
while True:
#grabs host from queue
host = self.queue.get()
#grabs urls of hosts and prints first 1024 bytes of page
try:
url = urllib2.urlopen(host)
print "connected"
except urllib2.URLError:
print "couldn't connect to %s" % host
finally:
#signals to queue job is done
self.queue.task_done()
Related
I am new to learning python and got an exercise to create a multithreaded script to take a list of 10 public ftp servers and connect to them anonymously and just do a directory listing. I have the following code and it works when i use the ftp connect within the run function but when i try to create an "ftp" function and utilize it keeps erroring out and then the terminal gets stuck and can't kill the program or get out, which i can't figure out why that keeps happening either?
!/usr/bin/python
import threading
import Queue
import time
from ftplib import FTP
sites = ["speedtest.tele2.net", "test.rebex.net", "test.talia.net", "ftp.swfwmd.state.fl.us", "ftp.heanet.ie", "ftp.rediris.es", "ftp.ch.freebsd.org", "ftp.mirror.nl", "ftp.ussg.iu.edu", "ftp.uni-bayreu$
class WorkerThread(threading.Thread) :
def __init__(self, queue) :
threading.Thread.__init__(self)
self.queue = queue
#def ftp(ip) :
# server = FTP(ip)
# server.login()
# server.retrlines('LIST')
def run(self) :
print "In WorkerThread"
while True :
counter = self.queue.get()
print "Connecting to FTP Server %s" % counter
#self.ftp(counter)
#print "Ordered to sleep for %d seconds!" % counter
#time.sleep(counter)
#print "Finished sleeping for %d seconds" % counter
server = FTP(counter)
server.login()
server.retrlines('LIST')
self.queue.task_done()
queue = Queue.Queue()
for i in range(10) :
print "Creating WorkerThread : %d" % i
worker = WorkerThread(queue)
worker.setDaemon(True)
worker.start()
print "WorkerThread %d Created!" % i
for j in sites :
queue.put(j)
queue.join()
print "All Tasks Over!"
As suggested by:
Is there any way to kill a Thread in Python?
you should put a stop condition a make the threads check on it. Together with the join it allows for the thread to be terminated gracefully. Without entering into some other implication try the code below.
#!/usr/bin/python
import threading
import Queue
import time
from ftplib import FTP
sites = ["speedtest.tele2.net"]
class WorkerThread(threading.Thread):
def __init__(self, queue):
threading.Thread.__init__(self)
self.queue = queue
self._stop = threading.Event()
def ftp(self, ip):
server = FTP(ip)
server.login()
server.retrlines('LIST')
def run(self):
print "In WorkerThread"
while not self.stopped():
counter = self.queue.get()
print "Connecting to FTP Server %s" % counter
self.ftp(counter)
self.queue.task_done()
def stop(self):
self._stop.set()
def stopped(self):
return self._stop.is_set()
if __name__ == '__main__':
queue = Queue.Queue()
for i in range(10):
print "Creating WorkerThread : %d" % i
worker = WorkerThread(queue)
worker.setDaemon(True)
worker.start()
worker.stop()
print "WorkerThread %d Created!" % i
for j in sites:
queue.put(j)
queue.join()
print "All Tasks Over!"
I want to write a web service which processes the request on the background. The service puts the request into a queue and responds the client immediately.
My problem in the code below is that while loop in BackgroundThread().run() function doesn't work.
While loop in BackgroundThread.run() method doesn't act like infinite.It only go into while loop once.
Thank you.
Code:
class BackgroundThread(threading.Thread):
def __init__(self):
threading.Thread.__init__(self)
def run(self):
global queue
while True:
item = queue.get()
if item is not None:
#long running process
time.sleep(random.randint(10, 100) / 1000.0)
print "task", item, "finished"
queue = Queue.Queue()
class MyHandler(tornado.web.RequestHandler):
#gen.coroutine
def get(self):
global queue
self.write('OK')
self.finish()
filePath = self.get_arguments("filePath")
queue.put(filePath)
print queue.qsize()
if __name__=='__main__':
try:
BackgroundThread().start()
BackgroundThread().start()
app = tornado.web.Application([(r'/', MyHandler)])
print("server opened on port : 8000")
server = tornado.httpserver.HTTPServer(app)
server.bind(8000)
server.start(4) # Specify number of subprocesses
tornado.ioloop.IOLoop.current().start()
except KeyboardInterrupt:
print '^C received, shutting down the web server'
sys.exit(1)
I just add try except block because when queue is empty in while loop,it get an exception and doesn't iterate.
I got the answer and here is the code :
class BackgroundThread(threading.Thread):
def __init__(self):
threading.Thread.__init__(self)
def run(self):
global queue
print("qwerqwer0")
while 1==1:
print("qwerqwer1")
print("qwerqwer2")
try:
item = queue.get()
queue.task_done()
except Queue.Empty:
print("empty")
pass
if item is not None:
print("qwerqwerqwer")
#long running process
print "task ", item, " finished"
queue = Queue.Queue()
class MyHandler(tornado.web.RequestHandler):
#gen.coroutine
def get(self):
global queue
self.write('OK')
self.finish()
filePath = self.get_arguments("filePath")
queue.put(filePath)
print queue.qsize()
if __name__=='__main__':
try:
#BackgroundThread().start()
BackgroundThread().start()
app = tornado.web.Application([(r'/', MyHandler)])
print("server opened on port : 8000")
server = tornado.httpserver.HTTPServer(app)
server.bind(8000)
server.start(4) # Specify number of subprocesses
tornado.ioloop.IOLoop.current().start()
except KeyboardInterrupt:
print '^C received, shutting down the web server'
sys.exit(1)
I'm writing simple script that will check for SSH connection, and I cannot understand, why it hangs on one thread.
class myThread(threading.Thread):
def __init__(self, hostname ):
threading.Thread.__init__(self)
self.hostname = hostname
def run(self):
return self.doSSH(self.hostname)
def doSSH(self,hostname):
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect((hostname, 22))
result = s.recv(1024)
if re.findall(r'^SSH.+?SSH.+',result) :
return "Up"
else :
return "Down"
def main():
q = Queue.Queue()
completeHostlist = ["host1","host2","google.com","host3"]
for hostname in completeHostlist:
thread = myThread(hostname)
thread.daemon = True
q.put_nowait(thread.run())
q.get_nowait()
I don't understand why this script hangs at google.com? I would be expecting it to spawn daemon thread and continue with host3. As soon it finish host3 it has to kill thread with Google and return results. What I did wrong ?
I already figured out about run() and start(). Anyway this is not working as expected, after all host[1-3] threads was started, script stuck at thread with google , waiting it to end. Should it be kill it at the end of the script ?
Should i be using multiprocessing instead of multithreading , to spawn separate process for each host?
In your code you do q.put_nowait(thread.run()). That immediately runs the ssh thing on the current thread. You need to call thread specific method to start the thread. You need to call thread.start().
Not sure what you're doing with the Queue.
Don't call .run() method directly for any thread. As #Sorin said call thread.start() instead.
You don't need to define a new thread class, a function is enough in this case:
from Queue import Queue
from threading import Thread
def is_ssh_up(result_queue, hostname, port=22):
# try to connect here
# ...
# write results
result_queue.put((hostname, True)) # Up
def main():
q = Queue()
hosts = ["host1", "host2", "google.com", "host3"]
for hostname in hosts: # start worker threads
t = Thread(target=is_ssh_up, args=[q, hostname])
t.daemon = True
t.start()
for _ in hosts: # collect results
hostname, is_up = q.get()
print("%s is %s" % (hostname, "Up" if is_up else "Down"))
Or you could use a thread pool:
from multiprocessing.pool import ThreadPool
def is_ssh_up(hostname, port=22):
# try to connect here
# ...
# return results
return hostname, is_up
hosts = ["host1", "host2", "google.com", "host3"]
pool = ThreadPool(20) # limit number of concurrent connections to 20
for hostname, is_up in pool.imap_unordered(is_ssh_up, hosts):
status = "Up" if is_up else "Down" if is_up is not None else "Unknown"
print("%s status is %s" % (hostname, status))
I was reading a article on Python multi threading using Queues and have a basic question.
Based on the print stmt, 5 threads are started as expected. So, how does the queue works?
1.The thread is started initially and when the queue is populated with a item does it gets restarted and starts processing that item?
2.If we use the queue system and threads process each item by item in the queue, how there is a improvement in performance..Is it not similar to serial processing ie; 1 by 1.
import Queue
import threading
import urllib2
import datetime
import time
hosts = ["http://yahoo.com", "http://google.com", "http://amazon.com",
"http://ibm.com", "http://apple.com"]
queue = Queue.Queue()
class ThreadUrl(threading.Thread):
def __init__(self, queue):
threading.Thread.__init__(self)
print 'threads are created'
self.queue = queue
def run(self):
while True:
#grabs host from queue
print 'thread startting to run'
now = datetime.datetime.now()
host = self.queue.get()
#grabs urls of hosts and prints first 1024 bytes of page
url = urllib2.urlopen(host)
print 'host=%s ,threadname=%s' % (host,self.getName())
print url.read(20)
#signals to queue job is done
self.queue.task_done()
start = time.time()
if __name__ == '__main__':
#spawn a pool of threads, and pass them queue instance
print 'program start'
for i in range(5):
t = ThreadUrl(queue)
t.setDaemon(True)
t.start()
#populate queue with data
for host in hosts:
queue.put(host)
#wait on the queue until everything has been processed
queue.join()
print "Elapsed Time: %s" % (time.time() - start)
A queue is similar to a list container, but with internal locking to make it a thread-safe way to communicate data.
What happens when you start all of your threads is that they all block on the self.queue.get() call, waiting to pull an item from the queue. When an item is put into the queue from your main thread, one of the threads will become unblocked and receive the item. It can then continue to process it until it finishes and returns to a blocking state.
All of your threads can run concurrently because they all are able to receive items from the queue. This is where you would see your improvement in performance. If the urlopen and read take time in one thread and it is waiting on IO, that means another thread can do work. The queue objects job is simply to manage the locking access, and popping off items to the callers.
I have a code that perform load testing against any specific url. But I have to do load testing of a web service that has different URLs. To do so, I need to make an array of URLs and each thread should hit all the URLs given in an array. How can I do this? This is my code:
import httplib2
import socket
import time
from threading import Event
from threading import Thread
from threading import current_thread
from urllib import urlencode
# Modify these values to control how the testing is done
# How many threads should be running at peak load.
NUM_THREADS = 50
# How many minutes the test should run with all threads active.
TIME_AT_PEAK_QPS = 20 # minutes
# How many seconds to wait between starting threads.
# Shouldn't be set below 30 seconds.
DELAY_BETWEEN_THREAD_START = 30 # seconds
quitevent = Event()
def threadproc():
"""This function is executed by each thread."""
print "Thread started: %s" % current_thread().getName()
h = httplib2.Http(timeout=30)
while not quitevent.is_set():
try:
# HTTP requests to exercise the server go here
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
resp, content = h.request(
"http://www.google.com")
if resp.status != 200:
print "Response not OK"
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
except socket.timeout:
pass
print "Thread finished: %s" % current_thread().getName()
if __name__ == "__main__":
runtime = (TIME_AT_PEAK_QPS * 60 + DELAY_BETWEEN_THREAD_START * NUM_THREADS)
print "Total runtime will be: %d seconds" % runtime
threads = []
try:
for i in range(NUM_THREADS):
t = Thread(target=threadproc)
t.start()
threads.append(t)
time.sleep(DELAY_BETWEEN_THREAD_START)
print "All threads running"
time.sleep(TIME_AT_PEAK_QPS*60)
print "Completed full time at peak qps, shutting down threads"
except:
print "Exception raised, shutting down threads"
quitevent.set()
time.sleep(3)
for t in threads:
t.join(1.0)
print "Finished"
Instead of passing a threadproc to Thread, extend the class:
class Worker(Thread):
def __init__(self, urls):
super(Worker, self).__init__()
self.urls = urls
def run(self):
for url in self.urls:
self.fetch(url)
That said, unless you do this to get a better understanding of threading and how load testing works internally, I suggest to use a mature testing framework like Jmeter instead. Years of experience went into it which you'd have to accumulate, first.