I try threading in python. I have some code and hear that my program is waiting for threads if i use the .join method. But in the following code I get the print 'done' earlier then the prints in my thread. But why?
def getresults(seed):
print("get results now")
results[seed]
i = 0
threads = []
for suggestengine in suggestengines.keys():
i += 1
t = threading.Thread(target=getSuggestengineResult, args = (suggestengine, seed, i))
threads.append(t)
print('threads initialized')
for thread in threads:
thread.start()
for thread in threads:
thread.join
print('done')
Related
I am trying to create 3 threads within each of 2 processes and share a queue of the type multiprocessing.JoinableQueue among all threads. The worker_func function simply creates the threads while the thread_func function prints out the values it gets from the queue. The program gets stuck somewhere in the time.sleep or in the get() method of queue. What am I doing wrong? I am running on a Windows computer.
import threading
from multiprocessing import Pool, Manager, JoinableQueue
import multiprocessing
from threading import Thread
import time
def thread_func(q, disp_lock):
with disp_lock:
print('thread ', threading.current_thread().name, ' in process ', multiprocessing.current_process().name ,
' reporting for duty')
while True:
time.sleep(0.1)
try:
val = q.get_nowait()
with disp_lock:
print('thread ', threading.current_thread().name, ' in process ', multiprocessing.current_process().name , ' got value: ',val)
q.task_done()
except:
with disp_lock:
print('queue is empty: ', q.qsize())
def worker_func(num_threads, q, disp_lock):
threads = []
for i in range(num_threads):
thread = Thread(target= thread_func, args=( q, disp_lock,))
thread.daemon = True
thread.start()
if __name__ == "__main__":
manager = Manager()
lock = manager.Lock()
q1 = JoinableQueue()#manager.Queue()
q1_length = 20
for i in range(q1_length):
q1.put(i)
processes = []
num_processes = 2 # 2 processes
num_threads = 3
for _ in range(num_processes):
p = multiprocessing.Process(target=worker_func, args=( num_threads, q1, lock, )) # create a new Process
p.daemon = True
p.start()
processes.append(p)
q1.join()
You are not allowing the threads to complete their work. Either set them as non-daemon, or explicitly wait for them to join:
def worker_func(num_threads, q, disp_lock):
threads = []
for i in range(num_threads):
thread = Thread(target=thread_func, args=(q, disp_lock,))
thread.daemon = True
thread.start()
threads.append(thread)
# Wait for them to finish
for thread in threads:
thread.join()
I am having the Python Multi-threaded program as below. If I press ctrl+c within 5 seconds (approx), It is going inside the KeyboardInterrupt exception.
Running the code longer than 15 seconds failed to respond to ctrl+c. If I press ctrl+c after 15 seconds, It is not working. It is not throwing KeyboardInterrupt exception. What could be the reason ? I tested this on Linux.
#!/usr/bin/python
import os, sys, threading, time
class Worker(threading.Thread):
def __init__(self):
threading.Thread.__init__(self)
# A flag to notify the thread that it should finish up and exit
self.kill_received = False
def run(self):
while not self.kill_received:
self.do_something()
def do_something(self):
[i*i for i in range(10000)]
time.sleep(1)
def main(args):
threads = []
for i in range(10):
t = Worker()
threads.append(t)
t.start()
while len(threads) > 0:
try:
# Join all threads using a timeout so it doesn't block
# Filter out threads which have been joined or are None
threads = [t.join(1) for t in threads if t is not None and t.isAlive()]
except KeyboardInterrupt:
print "Ctrl-c received! Sending kill to threads..."
for t in threads:
t.kill_received = True
if __name__ == '__main__':
main(sys.argv)
After the first execution of
threads = [t.join(1) for t in threads if t is not None and t.isAlive()]
your variable threads contains
[None, None, None, None, None, None, None, None, None, None]
after the second execution, the same variable threads contains:
[]
At this point, len(threads) > 0 is False and you get out of the while loop. Your script is still running since you have 10 threads still active, but since you're not anymore in your try / except block (to catch KeyboardInterrupt), you can't stop using Ctrl + C
Add some prints to your script to see what I described:
#!/usr/bin/python
import os, sys, threading, time
class Worker(threading.Thread):
def __init__(self):
threading.Thread.__init__(self)
# A flag to notify the thread that it should finish up and exit
self.kill_received = False
def run(self):
while not self.kill_received:
self.do_something()
def do_something(self):
[i*i for i in range(10000)]
time.sleep(1)
def main(args):
threads = []
for i in range(10):
t = Worker()
threads.append(t)
t.start()
print('thread {} started'.format(i))
while len(threads) > 0:
print('Before joining')
try:
# Join all threads using a timeout so it doesn't block
# Filter out threads which have been joined or are None
threads = [t.join(1) for t in threads if t is not None and t.isAlive()]
print('After join() on threads: threads={}'.format(threads))
except KeyboardInterrupt:
print("Ctrl-c received! Sending kill to threads...")
for t in threads:
t.kill_received = True
print('main() execution is now finished...')
if __name__ == '__main__':
main(sys.argv)
And the result:
$ python thread_test.py
thread 0 started
thread 1 started
thread 2 started
thread 3 started
thread 4 started
thread 5 started
thread 6 started
thread 7 started
thread 8 started
thread 9 started
Before joining
After join() on threads: threads=[None, None, None, None, None, None, None, None, None, None]
Before joining
After join() on threads: threads=[]
main() execution is now finished...
Actually, Ctrl + C doesn't stop to work after 15 seconds, but after 10 or 11 seconds. This is the time needed to create and start the 10 threads (less than a second) and to execute join(1) on each thread (about 10 seconds).
Hint from the doc:
As join() always returns None, you must call isAlive() after join() to decide whether a timeout happened – if the thread is still alive, the join() call timed out.
to follow up on the poster above, isAlive() got renamed to is_alive()
tried on Python 3.9.6
full code:
#!/usr/bin/python
import os, sys, threading, time
class Worker(threading.Thread):
def __init__(self):
threading.Thread.__init__(self)
# A flag to notify the thread that it should finish up and exit
self.kill_received = False
def run(self):
while not self.kill_received:
self.do_something()
def do_something(self):
[i*i for i in range(10000)]
time.sleep(1)
def main(args):
threads = []
for i in range(10):
t = Worker()
threads.append(t)
t.start()
print('thread {} started'.format(i))
while len(threads) > 0:
print('Before joining')
try:
# Join all threads using a timeout so it doesn't block
# Filter out threads which have been joined or are None
threads = [t.join(1) for t in threads if t is not None and t.is_alive()]
print('After join() on threads: threads={}'.format(threads))
except KeyboardInterrupt:
print("Ctrl-c received! Sending kill to threads...")
for t in threads:
t.kill_received = True
print('main() execution is now finished...')
if __name__ == '__main__':
main(sys.argv)
My multi-threading script raising this error:
thread.error : can't start new thread
when it reached 460 threads:
threading.active_count() = 460
I assume the old threads keeps stack up, since the script didn't kill them. This my code:
import threading
import Queue
import time
import os
import csv
def main(worker):
#Do Work
print worker
return
def threader():
while True:
worker = q.get()
main(worker)
q.task_done()
def main_threader(workers):
global q
global city
q = Queue.Queue()
for x in range(20):
t = threading.Thread(target=threader)
t.daemon = True
print "\n\nthreading.active_count() = " + str(threading.active_count()) + "\n\n"
t.start()
for worker in workers:
q.put(worker)
q.join()
How do I kill the old threads when their job is done? (Is the function returning not enough?)
Python threading API doesn't have any function to kill a thread (nothing like threading.kill(PID)).
That said, you should code some thread-stopping algorithm yourself. For example, your thread should somehow decide that is should terminate (e.g. check some global variable or check whether some signal has been sent) and simply return.
For example:
import threading
nthreads = 7
you_should_stop = [0 for _ in range(nthreads)]
def Athread(number):
while True:
if you_should_stop[number]:
print "Thread {} stopping...".format(number)
return
print "Running..."
for x in range(nthreads):
threading.Thread(target = Athread, args = (x, )).start()
for x in range(nthreads):
you_should_stop[x] = 1
print "\nStopped all threads!"
I have a script that follows the same logic in this sample.
Basically I insert items into a global queue and spawn threads with a while loop that gets and item from the queue and the calls task_done.
I can get the threads to join if my while loop is checking that the queue is not empty, but I wanted to try and incorporate a flag that I could set myself to exit the loop. When I try to do this, joining the thread blocks forever.
Here is the non-working sample that doesnt join the threads:
import threading
import queue
class Mythread(threading.Thread):
def __init__(self):
super().__init__()
self.signal = False
def run(self):
global queue
while not self.signal:
item = q.get()
print(item)
q.task_done()
def stop(self):
self.signal = True
q = queue.Queue
for i in range(5000):
q.put(i)
threads = []
for i in range(2):
t = Mythread()
threads.append(t)
for t in threads:
t.start()
q.join()
for t in threads:
print(t.signal) <---- False
t.stop()
print(t.signal) <---- True
t.join() <---- Blocks forever
Here is the one that works using queue empty
import threading
import queue
class Mythread(threading.Thread):
def __init__(self):
super().__init__()
def run(self):
global queue
while not q.empty():
item = q.get()
print(item)
q.task_done()
q = queue.Queue
for i in range(5000):
q.put(i)
threads = []
for i in range(2):
t = Mythread()
threads.append(t)
for t in threads:
t.start()
q.join()
for t in threads:
t.join() <---- Works fine
print(t.is_alive()) <--- returns False
Any ideas?
q.get blocks so it won't reach your while condition
Why doesn't this code "act" threaded? (Please see the output.)
import time
from threading import Thread
def main():
for nums in [range(0,5), range(5,10)]:
t = Spider(nums)
t.start()
print 'started a thread'
t.join()
print "done"
class Spider(Thread):
def __init__(self, nums):
Thread.__init__(self)
self.nums = nums
def run(self): # this is an override
for num in self.nums:
time.sleep(3) # or do something that takes a while
print 'finished %s' % (num, )
if __name__ == '__main__':
main()
Output:
started a thread
finished 0
finished 1
finished 2
finished 3
finished 4
started a thread
finished 5
finished 6
finished 7
finished 8
finished 9
done
When you say t.join(), you're telling it to wait for the thread to end.
This means, you're asking it to make a thread, start it, then wait for the thread to end before making a new one.
If you want it to act multithreaded, you'll need to move the join()s outside of the loop.
def main():
# We will store the running threads in this
threads = []
# Start the threads
for nums in [range(0,5), range(5,10)]:
t = Spider(nums)
t.start()
print 'started a thread'
threads.append(t)
# All the threads have been started
# Now we wait for them to finish
for t in threads:
t.join()
print "done"
See also:
Documentation of Thread.join()
Your Thread join t.join blocks the main thread until the thread completes execution ( http://docs.python.org/library/threading.html#threading.Thread.join ). Change your code to look something like this:
def main():
threads = []
for nums in [range(0,5), range(5,10)]:
t = Spider(nums)
t.start()
print 'started a thread'
threads.append(t)
for t in threads: t.join()
print "done"
You need to start both the threads first, and then join with them once they are both running.