I have a script that follows the same logic in this sample.
Basically I insert items into a global queue and spawn threads with a while loop that gets and item from the queue and the calls task_done.
I can get the threads to join if my while loop is checking that the queue is not empty, but I wanted to try and incorporate a flag that I could set myself to exit the loop. When I try to do this, joining the thread blocks forever.
Here is the non-working sample that doesnt join the threads:
import threading
import queue
class Mythread(threading.Thread):
def __init__(self):
super().__init__()
self.signal = False
def run(self):
global queue
while not self.signal:
item = q.get()
print(item)
q.task_done()
def stop(self):
self.signal = True
q = queue.Queue
for i in range(5000):
q.put(i)
threads = []
for i in range(2):
t = Mythread()
threads.append(t)
for t in threads:
t.start()
q.join()
for t in threads:
print(t.signal) <---- False
t.stop()
print(t.signal) <---- True
t.join() <---- Blocks forever
Here is the one that works using queue empty
import threading
import queue
class Mythread(threading.Thread):
def __init__(self):
super().__init__()
def run(self):
global queue
while not q.empty():
item = q.get()
print(item)
q.task_done()
q = queue.Queue
for i in range(5000):
q.put(i)
threads = []
for i in range(2):
t = Mythread()
threads.append(t)
for t in threads:
t.start()
q.join()
for t in threads:
t.join() <---- Works fine
print(t.is_alive()) <--- returns False
Any ideas?
q.get blocks so it won't reach your while condition
Related
I need to open multiple chrome drivers with selenium, then execute my script by threading in them.
How to make it wait until first threading is finished and then start second threading.
time.sleep(x) wont work for me, as I do not know how much time would first threading take and I need second threading to start as soon as first one is finished.
import time
import threading
from selenium import webdriver
mydrivers=[]
tabs = []
class ActivePool(object):
def __init__(self):
super(ActivePool, self).__init__()
self.active = []
self.lock = threading.Lock()
def makeActive(self, name):
with self.lock:
self.active.append(name)
def makeInactive(self, name):
with self.lock:
self.active.remove(name)
def main_worker(s):
#Driver State
global tabs
global mydrivers
mydrivers.append(webdriver.Chrome())
tabs.append(False)
def worker(s, pool):
with s:
global tabs
global mydrivers
name = threading.currentThread().getName()
pool.makeActive(name)
x = tabs.index(False)
tabs[x] = True
mydrivers[x].get("https://stackoverflow.com")
time.sleep(15)
pool.makeInactive(name)
tabs[x]= False
for k in range(5):
t = threading.Thread(target=main_worker, args=(k,))
t.start()
# How to make it wait until above threading is finished and then start below threading
pool = ActivePool()
s = threading.Semaphore(5)
for j in range(100):
t = threading.Thread(target=worker, name=j, args=(s, pool))
t.start()
thds = []
for k in range(5):
thds.append( threading.Thread(target=main_worker, args=(k,)))
for t in thds:
t.start()
for t in thds:
t.join()
Or, even:
thds = [threading.Thread(target=main_worker, args=(k,)) for k in range(5)]
for t in thds:
t.start()
for t in thds:
t.join()
To wait for a thread to finish you should use the thread.join function. Eg...
from threading import Thread
import time
def wait_sec():
time.sleep(2)
my_thread = Thread(target=wait_sec)
my_thread.start()
# after starting the thread join it to wait for end of target
my_thread.join()
print("You have waited 2 seconds")
Python 2.7 here.
In the Queue example, the threads run indefinitely:
def worker():
while True:
item = q.get()
do_work(item)
q.task_done()
q = Queue()
for i in range(num_worker_threads):
t = Thread(target=worker)
t.daemon = True
t.start()
for item in source():
q.put(item)
q.join() # block until all tasks are done
Is there an "official" or recommended way to tell the threads to exit after q.join() without using a global variable or subclassing Thread?
Currently, I am doing something like:
class MyThread(Thread, object):
...
def run(self):
...
while True:
try:
item = self.q.get_nowait()
except Queue.Empty:
if self.is_queue_empty:
break
else:
...
...
threads = [MyThread(q, target) for _ in range(num_threads)]
...
q.join()
for thread in threads:
thread.is_queue_empty = True
It works, but it seems kind of hacky. I would also like to avoid making a list of threads, if possible.
I am having the Python Multi-threaded program as below. If I press ctrl+c within 5 seconds (approx), It is going inside the KeyboardInterrupt exception.
Running the code longer than 15 seconds failed to respond to ctrl+c. If I press ctrl+c after 15 seconds, It is not working. It is not throwing KeyboardInterrupt exception. What could be the reason ? I tested this on Linux.
#!/usr/bin/python
import os, sys, threading, time
class Worker(threading.Thread):
def __init__(self):
threading.Thread.__init__(self)
# A flag to notify the thread that it should finish up and exit
self.kill_received = False
def run(self):
while not self.kill_received:
self.do_something()
def do_something(self):
[i*i for i in range(10000)]
time.sleep(1)
def main(args):
threads = []
for i in range(10):
t = Worker()
threads.append(t)
t.start()
while len(threads) > 0:
try:
# Join all threads using a timeout so it doesn't block
# Filter out threads which have been joined or are None
threads = [t.join(1) for t in threads if t is not None and t.isAlive()]
except KeyboardInterrupt:
print "Ctrl-c received! Sending kill to threads..."
for t in threads:
t.kill_received = True
if __name__ == '__main__':
main(sys.argv)
After the first execution of
threads = [t.join(1) for t in threads if t is not None and t.isAlive()]
your variable threads contains
[None, None, None, None, None, None, None, None, None, None]
after the second execution, the same variable threads contains:
[]
At this point, len(threads) > 0 is False and you get out of the while loop. Your script is still running since you have 10 threads still active, but since you're not anymore in your try / except block (to catch KeyboardInterrupt), you can't stop using Ctrl + C
Add some prints to your script to see what I described:
#!/usr/bin/python
import os, sys, threading, time
class Worker(threading.Thread):
def __init__(self):
threading.Thread.__init__(self)
# A flag to notify the thread that it should finish up and exit
self.kill_received = False
def run(self):
while not self.kill_received:
self.do_something()
def do_something(self):
[i*i for i in range(10000)]
time.sleep(1)
def main(args):
threads = []
for i in range(10):
t = Worker()
threads.append(t)
t.start()
print('thread {} started'.format(i))
while len(threads) > 0:
print('Before joining')
try:
# Join all threads using a timeout so it doesn't block
# Filter out threads which have been joined or are None
threads = [t.join(1) for t in threads if t is not None and t.isAlive()]
print('After join() on threads: threads={}'.format(threads))
except KeyboardInterrupt:
print("Ctrl-c received! Sending kill to threads...")
for t in threads:
t.kill_received = True
print('main() execution is now finished...')
if __name__ == '__main__':
main(sys.argv)
And the result:
$ python thread_test.py
thread 0 started
thread 1 started
thread 2 started
thread 3 started
thread 4 started
thread 5 started
thread 6 started
thread 7 started
thread 8 started
thread 9 started
Before joining
After join() on threads: threads=[None, None, None, None, None, None, None, None, None, None]
Before joining
After join() on threads: threads=[]
main() execution is now finished...
Actually, Ctrl + C doesn't stop to work after 15 seconds, but after 10 or 11 seconds. This is the time needed to create and start the 10 threads (less than a second) and to execute join(1) on each thread (about 10 seconds).
Hint from the doc:
As join() always returns None, you must call isAlive() after join() to decide whether a timeout happened – if the thread is still alive, the join() call timed out.
to follow up on the poster above, isAlive() got renamed to is_alive()
tried on Python 3.9.6
full code:
#!/usr/bin/python
import os, sys, threading, time
class Worker(threading.Thread):
def __init__(self):
threading.Thread.__init__(self)
# A flag to notify the thread that it should finish up and exit
self.kill_received = False
def run(self):
while not self.kill_received:
self.do_something()
def do_something(self):
[i*i for i in range(10000)]
time.sleep(1)
def main(args):
threads = []
for i in range(10):
t = Worker()
threads.append(t)
t.start()
print('thread {} started'.format(i))
while len(threads) > 0:
print('Before joining')
try:
# Join all threads using a timeout so it doesn't block
# Filter out threads which have been joined or are None
threads = [t.join(1) for t in threads if t is not None and t.is_alive()]
print('After join() on threads: threads={}'.format(threads))
except KeyboardInterrupt:
print("Ctrl-c received! Sending kill to threads...")
for t in threads:
t.kill_received = True
print('main() execution is now finished...')
if __name__ == '__main__':
main(sys.argv)
I try threading in python. I have some code and hear that my program is waiting for threads if i use the .join method. But in the following code I get the print 'done' earlier then the prints in my thread. But why?
def getresults(seed):
print("get results now")
results[seed]
i = 0
threads = []
for suggestengine in suggestengines.keys():
i += 1
t = threading.Thread(target=getSuggestengineResult, args = (suggestengine, seed, i))
threads.append(t)
print('threads initialized')
for thread in threads:
thread.start()
for thread in threads:
thread.join
print('done')
I am building a multi threading application.
I have setup a threadPool.
[ A Queue of size N and N Workers that get data from the queue]
When all tasks are done I use
tasks.join()
where tasks is the queue .
The application seems to run smoothly until suddently at some point (after 20 minutes in example) it terminates with the error
thread.error: can't start new thread
Any ideas?
Edit: The threads are daemon Threads and the code is like:
while True:
t0 = time.time()
keyword_statuses = DBSession.query(KeywordStatus).filter(KeywordStatus.status==0).options(joinedload(KeywordStatus.keyword)).with_lockmode("update").limit(100)
if keyword_statuses.count() == 0:
DBSession.commit()
break
for kw_status in keyword_statuses:
kw_status.status = 1
DBSession.commit()
t0 = time.time()
w = SWorker(threads_no=32, network_server='http://192.168.1.242:8180/', keywords=keyword_statuses, cities=cities, saver=MySqlRawSave(DBSession), loglevel='debug')
w.work()
print 'finished'
When the daemon threads are killed?
When the application finishes or when the work() finishes?
Look at the thread pool and the worker (it's from a recipe )
from Queue import Queue
from threading import Thread, Event, current_thread
import time
event = Event()
class Worker(Thread):
"""Thread executing tasks from a given tasks queue"""
def __init__(self, tasks):
Thread.__init__(self)
self.tasks = tasks
self.daemon = True
self.start()
def run(self):
'''Start processing tasks from the queue'''
while True:
event.wait()
#time.sleep(0.1)
try:
func, args, callback = self.tasks.get()
except Exception, e:
print str(e)
return
else:
if callback is None:
func(args)
else:
callback(func(args))
self.tasks.task_done()
class ThreadPool:
"""Pool of threads consuming tasks from a queue"""
def __init__(self, num_threads):
self.tasks = Queue(num_threads)
for _ in range(num_threads): Worker(self.tasks)
def add_task(self, func, args=None, callback=None):
''''Add a task to the queue'''
self.tasks.put((func, args, callback))
def wait_completion(self):
'''Wait for completion of all the tasks in the queue'''
self.tasks.join()
def broadcast_block_event(self):
'''blocks running threads'''
event.clear()
def broadcast_unblock_event(self):
'''unblocks running threads'''
event.set()
def get_event(self):
'''returns the event object'''
return event
ALSo maybe the problem it's because I create SWorker objects in a loop?
What happens with the old SWorker (garbage collection ?) ?
There is still not enough code for localize the problem, but I'm sure that this is because you don't utilize the threads and start too much of them. Did you see canonical example from Queue python documentation http://docs.python.org/library/queue.html (bottom of the page)?
I can reproduce your problem with the following code:
import threading
import Queue
q = Queue.Queue()
def worker():
item = q.get(block=True) # sleeps forever for now
do_work(item)
q.task_done()
# create infinite number of workers threads and fails
# after some time with "error: can't start new thread"
while True:
t = threading.Thread(target=worker)
t.start()
q.join() # newer reached this
Instead you must create the poll of threads with known number of threads and put your data to queue like:
q = Queue()
def worker():
while True:
item = q.get()
do_work(item)
q.task_done()
for i in range(num_worker_threads):
t = Thread(target=worker)
t.daemon = True
t.start()
for item in source():
q.put(item)
q.join() # block until all tasks are done
UPD: In case you need to stop some thread, you can add a flag to it or send a special mark means "stop" for break while loop:
class Worker(Thread):
break_msg = object() # just uniq mark sign
def __init__(self):
self.continue = True
def run():
while self.continue: # can stop and destroy thread, (var 1)
msg = queue.get(block=True)
if msg == self.break_msg:
return # will stop and destroy thread (var 2)
do_work()
queue.task_done()
workers = [Worker() for _ in xrange(num_workers)]
for w in workers:
w.start()
for task in tasks:
queue.put(task)
for _ in xrange(num_workers):
queue.put(Worker.break_msg) # stop thread after all tasks done. Need as many messages as many threads you have
OR
queue.join() # wait until all tasks done
for w in workers:
w.continue = False
w.put(None)