Python: threads using join() in while loop

Python: threads using join() in while loop - python

I would like my while loop to block at most 5 seconds for all threads it creates in the for loop. However, the following code will block by the threads one by one. How can I approach my goal? Thanks.
threads = []
while True:
for 3:
newThread = threading.Thread(..)
threads.append(newThread)
newThread.start()
newThread.join(5)

You need to use condition variable (threading.Condition in Python). It allows to wait for a predicate to become true. In your case the predicate is all threads have finished work or time out exceeded. Here is code which creates ten threads and waits until they are finished with 5sec time out. Verbose logs will help you:
import threading
import time
import logging
logging.basicConfig(
format='%(threadName)s:%(message)s',
level=logging.DEBUG,
)
NUM_OF_THREADS = 10
TIMEOUT = 5
def sleeping_thread(delay, cond):
logging.debug("Hi, I'm going to delay by %d sec." % delay)
time.sleep(delay)
logging.debug("I was sleeping for %d sec." % delay)
cond.acquire()
logging.debug("Calling notify().")
cond.notify()
cond.release()
def create_sleeping_thread(delay, cond):
return threading.Thread(target=sleeping_thread,
args=(delay, cond))
if __name__ == '__main__':
cond = threading.Condition(threading.Lock())
cond.acquire()
working_counter = NUM_OF_THREADS
for i in xrange(NUM_OF_THREADS):
t = create_sleeping_thread(i, cond)
t.start()
start_time = time.time()
while working_counter > 0 and (time.time() - start_time < TIMEOUT):
cond.wait()
working_counter -= 1
logging.debug('%d workers still working', working_counter)
cond.release()
logging.debug('Finish waiting for threads (%d workers still working)',
working_counter)
Further information at comp.programming.threads FAQ.

One thing to do is start all the threads, and then iterate over the array and join. But I suppose, this would still wait up to a total of 5*thread count seconds. Alternatively, you could create one additional thread that simply waits for your threads indefinitely. Then in your main thread you can just wait for the extra thread for 5 seconds.

Are you trying to spawn a thread every 5 seconds, except if one of the already-running threads ends, you wish to spawn a new thread sooner? If so, you could use a threading.Event to signal when a worker thread ends, and use event.wait(timeout) to block at most 5 seconds for the event:
import threading
import time
import logging
logger=logging.getLogger(__name__)
logging.basicConfig(level=logging.DEBUG,
format='%(asctime)s: %(message)s',
datefmt='%H:%M:%S')
def foo_event(n,e):
time.sleep(n)
name=threading.current_thread().name
logger.info('{n}: setting event'.format(n=name))
e.set()
def main():
e=threading.Event()
threads=[]
N=5
for i in range(3):
t=threading.Thread(target=foo_event,args=(N+1,e,),name='worker-{i}'.format(i=i))
threads.append(t)
t.daemon=True
t.start()
logger.info('entering wait')
e.wait(N)
logger.info('exit wait')
e.clear()
main()
yields
05:06:34: entering wait
05:06:39: exit wait <-- Wait 5 seconds
05:06:39: entering wait
05:06:40: worker-0: setting event
05:06:40: exit wait <-- Wait <5 seconds
05:06:40: entering wait
05:06:45: worker-1: setting event
05:06:45: exit wait <-- Wait 5 seconds

Related

Blocking other Threads while one Thread is running

Let's say I have two types of threads,
single thread that run every x min. let's call it A thread
multi threads run all the time. B threads when A thread do_something() i want All B threads to wait till A finish then resume them. i can't figure it out what to use.
I try to use threading.Condition, wait()/notifyAll() but it did not work as I want. once i put Condition in, it process 1 by 1 like synco threads or something. I want them to run freely.
This is the sample code I try to put them wait(), then notify them but it do 1 by 1 like join(). No idea what to us.
class ...
check = True
def xxx(self,g,con):
for i in range(3):
with con:
if self.check:
con.wait()
self.check = False
time.sleep(3)
print(g)
con = threading.Condition()
threading.Thread(target=xxx,args=('a',con,)).start()
threading.Thread(target=xxx,args=('b',con,)).start()
threading.Thread(target=xxx,args=('c',con,)).start()
time.sleep(2)
con.notifyAll()

Question: Blocking other Threads while one Thread is running
Instead of using threading.Condition(), this example uses threading.Barrier(...).
Used modules from docs.python.org:
module-threading
event-objects
barrier-objects
import time, threading
from threading import BrokenBarrierError
def worker_A(g, terminate, barrier):
# Counter to simulate conditional workload
do_something = 3
while not terminate.is_set():
if do_something == 0:
# Reset the barrier and wait until n_waiting == 2
barrier.reset()
while not terminate.is_set() and barrier.n_waiting < 2:
time.sleep(0.5)
# Now the other Threads waiting at the barrier
# Simulate worklaod ...
print('worker_A barrier.broken={} n_waiting={}'
.format(barrier.broken, barrier.n_waiting))
time.sleep(3)
# Call the third barrier.wait to release the barrier
try:
barrier.wait()
except BrokenBarrierError:
pass
# Reset counter to restart simulate conditional workload
do_something = 3
else:
# Count down and give the other threads a timeslice
do_something -= 1
time.sleep(0.5)
def worker_B(g, terminate, barrier):
while not terminate.is_set():
# Simulate workload ...
print('worker_B({})'.format(g))
time.sleep(1)
# Block at barrier.wait() if the barrier is NOT in the broken state
try:
barrier.wait()
except BrokenBarrierError:
pass
if __name__ == "__main__":
# Event to terminate all Threads save
terminate = threading.Event()
# Barrier to block worker_B Threads
# We use 3 Threads, therefore init with parties=3
barrier = threading.Barrier(3)
barrier.abort()
# Create and start the Threads
threads = []
for t in [(worker_A, 'a'), (worker_B, 'b'), (worker_B, 'c'), ]:
threads.append(threading.Thread(target=t[0], args=(t[1], terminate, barrier,)))
threads[-1].start()
time.sleep(0.2)
# Simulating MAIN Thread
time.sleep(20)
# Set the `terminate` Event to True,
# and abort the barrier to force all Threads to terminate
print('Terminate...')
terminate.set()
barrier.abort()
# Wait until all Threads terminated
for t in threads:
t.join()
print('EXIT MAIN')
Tested with Python: 3.5

Python threading: make the main thread report the progress

I run some jobs in parallel, which can sometime take a long time, so I want the main thread to report on the progress. For example, each hour.
Below is the simplified version of what I came up with. The code will run test_function in 2 threads with arguments from input_arguments. Every 5 seconds it will print % of the jobs finished.
import threading
import queue
import time
def test_function(x):
time.sleep(4)
print("Finished ", x)
num_processes = 2
input_arguments = range(10)
# Define a worker which will continuously execute function taking input parameters from the queue
def worker():
while True:
x = q.get()
if x is None:
break
test_function(x)
q.task_done()
# Initialize queue and the threads
q = queue.Queue()
threads = []
for i in range(num_processes):
t = threading.Thread(target=worker)
t.start()
threads.append(t)
# Create a queue of input parameters for function
for item in input_arguments:
q.put(item)
# Report progress every 5 seconds
report_progress(q)
# stop workers
for i in range(num_processes):
q.put(None)
for t in threads:
t.join()
Where report_progress is defined as following
def report_progress(q):
qsize_init = q.qsize()
while not q.empty():
time.sleep(5)
portion_finished = 1 - q.qsize() / qsize_init
print("run_parallel: {:.1%} jobs are finished".format(portion_finished))
However, I want to report the progress every hour instead of 5 seconds, and if all jobs are finished, the program might just be idle for many minutes.
Another possibility is to define report_progress differently:
def report_progress(q):
qsize_init = q.qsize()
time_start = time.time()
while not q.empty():
current_time = time.time()
if current_time - time_start > 5:
portion_finished = 1 - q.qsize() / qsize_init
print("run_parallel: {:.1%} jobs are finished".format(portion_finished))
time_start = time.time()
I am worried that constantly checking this condition will drain CPU resources, small portion, but on a scale of hours it could be a lot.
Is there a standard way of handling this?
Python: 3.6

For now I will use a simple solution, suggested in the comments by #Andriy Maletsky.
Main thread will check every few seconds if the q is not empty yet, and it will print a progress message if it has past more than 1 hour since the last report.
time_between_reports = 3600
time_between_checks = 5
def report_progress_until_finished(q):
qsize_init = q.qsize()
last_report_time = time.time()
while not q.empty():
time_elapsed = time.time() - last_report_time
if time_elapsed > time_between_reports:
portion_finished = 1 - q.qsize() / qsize_init
print("run_parallel: {:.1%} jobs are finished".format(portion_finished))
last_report_time = time.time()
time.sleep(time_between_checks)

Non-polling/Non-blocking Timer?

The best solution I've found so far is to just use the sleep() function. I'd like to run my own callback function when the event of a timer expiration happens. Is there any event-driven way to go about it?
from time import sleep
# Sleep for a minute
time.sleep(60)

There's a built-in simple solution, using the threading module:
import threading
timer = threading.Timer(60.0, callback)
timer.start() # after 60 seconds, 'callback' will be called
## (in the meanwhile you can do other stuff...)
You can also pass args and kwargs to your callback. See here.

I think it could be really simple. Take a look at this example. It works even in a python console!
from threading import Thread
from time import sleep
# Function to be called when the timer expires
def myFunction():
print 'Did anyone call me?'
# Function with the timer
def myTimer(seconds):
sleep(seconds)
myFunction()
# Thread that will sleep in background and call your function
# when the timer expires.
myThread = Thread(target=myTimer, args=(4,))
myThread.start()
Put whatever amount of seconds you want, and keep working with the console or running the main thread/programm. You will notice that the function will be called when the timer comes to an end.
Edit
Another good example, considering the comment from #tarabyte is the one where the function is called only depending on the value of some variable or flag. I hope this would then be the answer #tarabyte is looking for.
from threading import Thread
from time import sleep
myFlag = False
# Function to be called when the flag turns on
def myFunction():
print 'Did anyone call me?'
def myTimer():
global myFlag
while True:
if myFlag:
myFunction()
myFlag = False
else:
sleep(1)
# Thread that will sleep in background and call your function
# when the myFlag turns to be True
myThread = Thread(target=myTimer)
myThread.start()
# Then, you can do whatever you want and later change the value of myFlag.
# Take a look at the output inside ipython when the value of myFlag is changed.
In [35]: myFlag
Out[35]: False
In [36]: myFlag = True
In [37]: Did anyone call me?

Sometimes a simple solution is best, even if it polls the time. I have used this to great success before - it doesn't block if your thread doesn't stop on it.
I think I would manage this most simply by checking times, since this is so much more simple and resource economical than working out a separate threaded solution:
def event_minute_later(event):
print(time.time()) # use for testing, comment out or delete for production
return event + 60 < time.time()
And usage:
>>> event = time.time()
>>> print(event)
1393962502.62
>>> event_minute_later(event)
1393962526.73
False
>>> event_minute_later(event)
1393962562.9
True

Since Python 3.7 (and older versions have reached end of life by now) the asyncio built-in module lets you add a Python sleep() call asynchronously:
import asyncio
async def test():
print("Hello ... but wait, there is more!")
await asyncio.sleep(3)
print("... in the async world!")
Here's some proof that it is non-blocking (courtesy of RealPython):
import asyncio
# Jupyter Notebook users need to allow
# nesting of the asyncio event loop
import nest_asyncio
nest_asyncio.apply()
import time
async def workload(text, duration):
while duration > 0:
# run sleep and yield control
# back to the event loop (for one cycle)
await asyncio.sleep(1)
print(f'{text} counter: sleeping {duration} seconds')
duration -= 1
async def main():
# send the workload() coroutine to the background,
# to let it run concurrently with other tasks,
# switching between them at await points
task_1 = asyncio.create_task(workload('First', 2))
task_2 = asyncio.create_task(workload('Second', 4))
task_3 = asyncio.create_task(workload('Third', 8))
print(f"Started: {time.strftime('%X')}")
# create await points for each
# of the concurrent tasks
await task_1
await task_2
await task_3
print(f"Ended: {time.strftime('%X')}")
if __name__ == '__main__':
asyncio.run(main())
Output:
Started: 09:07:21
First counter: sleeping 2 seconds
Second counter: sleeping 4 seconds
Third counter: sleeping 8 seconds
First counter: sleeping 1 seconds
Second counter: sleeping 3 seconds
Third counter: sleeping 7 seconds
Second counter: sleeping 2 seconds
Third counter: sleeping 6 seconds
Second counter: sleeping 1 seconds
Third counter: sleeping 5 seconds
Third counter: sleeping 4 seconds
Third counter: sleeping 3 seconds
Third counter: sleeping 2 seconds
Third counter: sleeping 1 seconds
Ended: 09:07:29

python multithreading wait till all threads finished

This may have been asked in a similar context but I was unable to find an answer after about 20 minutes of searching, so I will ask.
I have written a Python script (lets say: scriptA.py) and a script (lets say scriptB.py)
In scriptB I want to call scriptA multiple times with different arguments, each time takes about an hour to run, (its a huge script, does lots of stuff.. don't worry about it) and I want to be able to run the scriptA with all the different arguments simultaneously, but I need to wait till ALL of them are done before continuing; my code:
import subprocess
#setup
do_setup()
#run scriptA
subprocess.call(scriptA + argumentsA)
subprocess.call(scriptA + argumentsB)
subprocess.call(scriptA + argumentsC)
#finish
do_finish()
I want to do run all the subprocess.call() at the same time, and then wait till they are all done, how should I do this?
I tried to use threading like the example here:
from threading import Thread
import subprocess
def call_script(args)
subprocess.call(args)
#run scriptA
t1 = Thread(target=call_script, args=(scriptA + argumentsA))
t2 = Thread(target=call_script, args=(scriptA + argumentsB))
t3 = Thread(target=call_script, args=(scriptA + argumentsC))
t1.start()
t2.start()
t3.start()
But I do not think this is right.
How do I know they have all finished running before going to my do_finish()?

Put the threads in a list and then use the Join method
threads = []
t = Thread(...)
threads.append(t)
...repeat as often as necessary...
# Start all threads
for x in threads:
x.start()
# Wait for all of them to finish
for x in threads:
x.join()

You need to use join method of Thread object in the end of the script.
t1 = Thread(target=call_script, args=(scriptA + argumentsA))
t2 = Thread(target=call_script, args=(scriptA + argumentsB))
t3 = Thread(target=call_script, args=(scriptA + argumentsC))
t1.start()
t2.start()
t3.start()
t1.join()
t2.join()
t3.join()
Thus the main thread will wait till t1, t2 and t3 finish execution.

In Python3, since Python 3.2 there is a new approach to reach the same result, that I personally prefer to the traditional thread creation/start/join, package concurrent.futures: https://docs.python.org/3/library/concurrent.futures.html
Using a ThreadPoolExecutor the code would be:
from concurrent.futures.thread import ThreadPoolExecutor
import time
def call_script(ordinal, arg):
print('Thread', ordinal, 'argument:', arg)
time.sleep(2)
print('Thread', ordinal, 'Finished')
args = ['argumentsA', 'argumentsB', 'argumentsC']
with ThreadPoolExecutor(max_workers=2) as executor:
ordinal = 1
for arg in args:
executor.submit(call_script, ordinal, arg)
ordinal += 1
print('All tasks has been finished')
The output of the previous code is something like:
Thread 1 argument: argumentsA
Thread 2 argument: argumentsB
Thread 1 Finished
Thread 2 Finished
Thread 3 argument: argumentsC
Thread 3 Finished
All tasks has been finished
One of the advantages is that you can control the throughput setting the max concurrent workers.
To use multiprocessing instead, you can use ProcessPoolExecutor.

I prefer using list comprehension based on an input list:
inputs = [scriptA + argumentsA, scriptA + argumentsB, ...]
threads = [Thread(target=call_script, args=(i)) for i in inputs]
[t.start() for t in threads]
[t.join() for t in threads]

You can have class something like below from which you can add 'n' number of functions or console_scripts you want to execute in parallel passion and start the execution and wait for all jobs to complete..
from multiprocessing import Process
class ProcessParallel(object):
"""
To Process the functions parallely
"""
def __init__(self, *jobs):
"""
"""
self.jobs = jobs
self.processes = []
def fork_processes(self):
"""
Creates the process objects for given function deligates
"""
for job in self.jobs:
proc = Process(target=job)
self.processes.append(proc)
def start_all(self):
"""
Starts the functions process all together.
"""
for proc in self.processes:
proc.start()
def join_all(self):
"""
Waits untill all the functions executed.
"""
for proc in self.processes:
proc.join()
def two_sum(a=2, b=2):
return a + b
def multiply(a=2, b=2):
return a * b
#How to run:
if __name__ == '__main__':
#note: two_sum, multiply can be replace with any python console scripts which
#you wanted to run parallel..
procs = ProcessParallel(two_sum, multiply)
#Add all the process in list
procs.fork_processes()
#starts process execution
procs.start_all()
#wait until all the process got executed
procs.join_all()

I just came across the same problem where I needed to wait for all the threads which were created using the for loop.I just tried out the following piece of code.It may not be the perfect solution but I thought it would be a simple solution to test:
for t in threading.enumerate():
try:
t.join()
except RuntimeError as err:
if 'cannot join current thread' in err:
continue
else:
raise

From the threading module documentation
There is a “main thread” object; this corresponds to the initial
thread of control in the Python program. It is not a daemon thread.
There is the possibility that “dummy thread objects” are created.
These are thread objects corresponding to “alien threads”, which are
threads of control started outside the threading module, such as
directly from C code. Dummy thread objects have limited functionality;
they are always considered alive and daemonic, and cannot be join()ed.
They are never deleted, since it is impossible to detect the
termination of alien threads.
So, to catch those two cases when you are not interested in keeping a list of the threads you create:
import threading as thrd
def alter_data(data, index):
data[index] *= 2
data = [0, 2, 6, 20]
for i, value in enumerate(data):
thrd.Thread(target=alter_data, args=[data, i]).start()
for thread in thrd.enumerate():
if thread.daemon:
continue
try:
thread.join()
except RuntimeError as err:
if 'cannot join current thread' in err.args[0]:
# catchs main thread
continue
else:
raise
Whereupon:
>>> print(data)
[0, 4, 12, 40]

Maybe, something like
for t in threading.enumerate():
if t.daemon:
t.join()

using only join can result in false-possitive interaction with thread. Like said in docs :
When the timeout argument is present and not None, it should be a
floating point number specifying a timeout for the operation in
seconds (or fractions thereof). As join() always returns None, you
must call isAlive() after join() to decide whether a timeout happened
– if the thread is still alive, the join() call timed out.
and illustrative piece of code:
threads = []
for name in some_data:
new = threading.Thread(
target=self.some_func,
args=(name,)
)
threads.append(new)
new.start()
over_threads = iter(threads)
curr_th = next(over_threads)
while True:
curr_th.join()
if curr_th.is_alive():
continue
try:
curr_th = next(over_threads)
except StopIteration:
break

Python threaded code not acting threaded

Why doesn't this code "act" threaded? (Please see the output.)
import time
from threading import Thread
def main():
for nums in [range(0,5), range(5,10)]:
t = Spider(nums)
t.start()
print 'started a thread'
t.join()
print "done"
class Spider(Thread):
def __init__(self, nums):
Thread.__init__(self)
self.nums = nums
def run(self): # this is an override
for num in self.nums:
time.sleep(3) # or do something that takes a while
print 'finished %s' % (num, )
if __name__ == '__main__':
main()
Output:
started a thread
finished 0
finished 1
finished 2
finished 3
finished 4
started a thread
finished 5
finished 6
finished 7
finished 8
finished 9
done

When you say t.join(), you're telling it to wait for the thread to end.
This means, you're asking it to make a thread, start it, then wait for the thread to end before making a new one.
If you want it to act multithreaded, you'll need to move the join()s outside of the loop.
def main():
# We will store the running threads in this
threads = []
# Start the threads
for nums in [range(0,5), range(5,10)]:
t = Spider(nums)
t.start()
print 'started a thread'
threads.append(t)
# All the threads have been started
# Now we wait for them to finish
for t in threads:
t.join()
print "done"
See also:
Documentation of Thread.join()

Your Thread join t.join blocks the main thread until the thread completes execution ( http://docs.python.org/library/threading.html#threading.Thread.join ). Change your code to look something like this:
def main():
threads = []
for nums in [range(0,5), range(5,10)]:
t = Spider(nums)
t.start()
print 'started a thread'
threads.append(t)
for t in threads: t.join()
print "done"

You need to start both the threads first, and then join with them once they are both running.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Python: threads using join() in while loop - python

Related

Blocking other Threads while one Thread is running

Python threading: make the main thread report the progress

Non-polling/Non-blocking Timer?

python multithreading wait till all threads finished

Python threaded code not acting threaded

Categories

Resources