Python threads handling - join() - Multithreading - python

i m writing a script in which threads that do simple tasks start every 3 minutes . I m using the threading and schedule modules .
Because of the nature of multi-threading threads are using the same resources .
What i need to achieve ?
When creating a new thread i would like to check if there is any runningthread; and if there is , then wait until the running thread terminates , then start the new thread .
What i have tried ?
import threading
def run_threaded(job_fn):
job_thread = threading.Thread(target=job_fn)
bot.logger.info(" --------------No of active threads : "+threading.activeCount())
job_thread.start()
job_thread.join()
bot.logger.info(" --------------No of active threads : " + threading.activeCount())
schedule.every(3).minutes.do(run_threaded, job)
while True:
schedule.run_pending()
Note: On the example above every job_thread needs 5 minutes to complete . Thus it creates 1 thread every 6 minutes .
From what i understand , the job_thread.join() line is joining the main thread ( with any other active threads) . Although the schedule , is blocked thus no any other thread can be instantiated before the previous thread is finished . Is that correct ? And if yes , is this a good practice of doing this .?
And for the record .. Can the script execute other blocks of code while running a thread ? Or can it instantiate other threads, before the previous thread is finished , if they are going to execute an other job let's say job2 ?

Here's a simple example showing several threads being started, each of which will terminate itself at a different time and how the main thread can determine when each has terminated.
#!/usr/bin/env python3
import threading
import time
import queue
class Worker(threading.Thread):
def __init__(self, duration, tqueue):
self.duration = duration
self.tqueue = tqueue
super().__init__()
def run(self):
# Do real work here instead of just sleeping
time.sleep(self.duration)
# Tell parent we are gone by putting our instance to the queue
self.tqueue.put(self)
def main():
thr_duration = [2.0, 1.5, 0.5, 2.7, 1.25]
workers = []
thr_queue = queue.Queue()
# Start all threads
for dur in thr_duration:
worker = Worker(dur, thr_queue)
worker.start()
workers.append(worker)
print("Started thread {}, duration {}".format(worker.name, dur))
# Wait for all threads to terminate
while workers:
worker = thr_queue.get()
worker.join()
print("Reaped thread {}".format(worker.name))
workers.remove(worker)
if __name__ == '__main__':
main()

Related

How to manage the exit of a process without blocking its thread in Python?

I'm trying to code a kind of task manager in Python. It's based on a job queue, the main thread is in charge of adding jobs to this queue. I have made this class to handle the jobs queued, able to limit the number of concurrent processes and handle the output of the finished processes.
Here comes the problem, the _check_jobs function I don't get updated the returncode value of each process, independently of its status (running, finished...) job.returncode is always None, therefore I can't run if statement and remove jobs from the processing job list.
I know it can be done with process.communicate() or process.wait() but I don't want to block the thread that launches the processes. Is there any other way to do it, maybe using a ProcessPoolExecutor? The queue can be hit by processes at any time and I need to be able to handle them.
Thank you all for your time and support :)
from queue import Queue
import subprocess
from threading import Thread
from time import sleep
class JobQueueManager(Queue):
def __init__(self, maxsize: int):
super().__init__(maxsize)
self.processing_jobs = []
self.process = None
self.jobs_launcher=Thread(target=self._worker_job)
self.processing_jobs_checker=Thread(target=self._check_jobs_status)
self.jobs_launcher.start()
self.processing_jobs_checker.start()
def _worker_job(self):
while True:
# Run at max 3 jobs concurrently
if self.not_empty and len(self.processing_jobs) < 3:
# Get job from queue
job = self.get()
# Execute a task without blocking the thread
self.process = subprocess.Popen(job)
self.processing_jobs.append(self.process)
# util if queue.join() is used to block the queue
self.task_done()
else:
print("Waiting 4s for jobs")
sleep(4)
def _check_jobs_status(self):
while True:
# Check if jobs are finished
for job in self.processing_jobs:
# Sucessfully completed
if job.returncode == 0:
self.processing_jobs.remove(job)
# Wait 4 seconds and repeat
sleep(4)
def main():
q = JobQueueManager(100)
task = ["stress", "--cpu", "1", "--timeout", "20"]
for i in range(10): #put 10 tasks in the queue
q.put(task)
q.join() #block until all tasks are done
if __name__ == "__main__":
main()
I answer myself, I have come up with a working solution. The JobExecutor class handles in a custom way the Pool of processes. The watch_completed_tasks function tries to watch and handle the output of the tasks when they are done. This way everything is done with only two threads and the main thread is not blocked when submitting processes.
import subprocess
from threading import Timer
from concurrent.futures import ProcessPoolExecutor, as_completed
import logging
def launch_job(job):
process = subprocess.Popen(job, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
print(f"launching {process.pid}")
return [process.pid, process.stdout.read(), process.stderr.read()]
class JobExecutor(ProcessPoolExecutor):
def __init__(self, max_workers: int):
super().__init__(max_workers)
self.futures = []
self.watch_completed_tasks()
def submit(self, command):
future = super().submit(launch_job, command)
self.futures.append(future)
return future
def watch_completed_tasks(self):
# Manage tasks completion
for completed_task in as_completed(self.futures):
print(f"FINISHED task with PID {completed_task.result()[0]}")
self.futures.remove(completed_task)
# call this function evevery 5 seconds
timer_thread = Timer(5.0, self.watch_completed_tasks)
timer_thread.setName("TasksWatcher")
timer_thread.start()
def main():
executor = JobExecutor(max_workers=5)
for i in range(10):
task = ["stress",
"--cpu", "1",
"--timeout", str(i+5)]
executor.submit(task)

How to use Queue with threading properly

I am new to queue & threads kindly help with the below code , here I am trying to execute the function hd , I need to run the function multiple times but only after a single run has been completed
import queue
import threading
import time
fifo_queue = queue.Queue()
def hd():
print("hi")
time.sleep(1)
print("done")
for i in range(3):
cc = threading.Thread(target=hd)
fifo_queue.put(cc)
cc.start()
Current Output
hi
hi
hi
donedonedone
Expected Output
hi
done
hi
done
hi
done​
You can use a Semaphore for your purposes
A semaphore manages an internal counter which is decremented by each acquire() call and incremented by each release() call. The counter can never go below zero; when acquire() finds that it is zero, it blocks, waiting until some other thread calls release().
A default value of Semaphore is 1,
class threading.Semaphore(value=1)
so only one thread would be active at once:
import queue
import threading
import time
fifo_queue = queue.Queue()
semaphore = threading.Semaphore()
def hd():
with semaphore:
print("hi")
time.sleep(1)
print("done")
for i in range(3):
cc = threading.Thread(target=hd)
fifo_queue.put(cc)
cc.start()
hi
done
hi
done
hi
done
As #user2357112supportsMonica mentioned in comments RLock would be more safe option
class threading.RLock
This class implements reentrant lock objects. A reentrant lock must be released by the thread that acquired it. Once a thread has acquired a reentrant lock, the same thread may acquire it again without blocking; the thread must release it once for each time it has acquired it.
import queue
import threading
import time
fifo_queue = queue.Queue()
lock = threading.RLock()
def hd():
with lock:
print("hi")
time.sleep(1)
print("done")
for i in range(3):
cc = threading.Thread(target=hd)
fifo_queue.put(cc)
cc.start()
please put the print("down") before sleep.
it will work fine.
Reason:
your program will do this:
thread1:
print
sleep
print
but while the thread is sleeping, other threads will be working and printing their first command.
in my way the thread will write the first, write the second and then go to sleep and wait for other threads to show up.

Python: continues method call hold until configured delay time

From different thread/interface my class getting work,my class has to process the work with configured delay time.
def getJob(job):
work = self._getNextWorkToRun(job)
if work is None:
return {}
#proceed to do work
job sends by different package to this class. I wanted to call _getNextWorkToRun() method every five minutes once only. but the job comes every seconds/less than seconds. So I have to wait until 5 minutes to call _getNextWorkToRun() once again with new job. Every job has reference (JOB1,JOB2...etc.,) and all the jobs have to complete with the delay of 5 mins.
What is the best way to achieve this.
below is an example of using threads, jobs will be added anytime to job queue from any other function and a get_job() function will run continuously to monitor jobs and process them on fixed interval until get a stop flag
from threading import Thread
from queue import Queue
import time
from random import random
jobs = Queue() # queue safely used between threads to pass jobs
run_flag = True
def job_feeder():
for i in range(10):
# adding a job to jobs queue, job could be anything, here we just add a string for simplicity
jobs.put(f'job-{i}')
print(f'adding job-{i}')
time.sleep(random()) # simulate adding jobs randomly
print('job_feeder() finished')
def get_job():
while run_flag:
if jobs.qsize(): # check if there is any jobs in queue first
job = jobs.get() # getting the job
print(f'executing {job}')
time.sleep(3)
print('get_job finished')
t1 = Thread(target=job_feeder)
t2 = Thread(target=get_job)
t1.start()
t2.start()
# we can make get_job() thread quit anytime by setting run_flag
time.sleep(20)
run_flag = False
# waiting for threads to quit
t1.join()
t2.join()
print('all clear')
output:
adding job-0
executing job-0
adding job-1
adding job-2
adding job-3
adding job-4
adding job-5
adding job-6
adding job-7
executing job-1
adding job-8
adding job-9
job_feeder() finished
executing job-2
executing job-3
executing job-4
executing job-5
executing job-6
get_job finished
all clear
note get_job() processed only 6 jobs because we send quit signal after 20 seconds

Mutual exclusion thread locking, with dropping of queued functions upon mutex/lock release, in Python?

This is the problem I have: I'm using Python 2.7, and I have a code which runs in a thread, which has a critical region that only one thread should execute at the time. That code currently has no mutex mechanisms, so I wanted to inquire what I could use for my specific use case, which involves "dropping" of "queued" functions. I've tried to simulate that behavior with the following minimal working example:
useThreading=False # True
if useThreading: from threading import Thread, Lock
else: from multiprocessing import Process, Lock
mymutex = Lock()
import time
tstart = None
def processData(data):
#~ mymutex.acquire()
try:
print('thread {0} [{1:.5f}] Do some stuff'.format(data, time.time()-tstart))
time.sleep(0.5)
print('thread {0} [{1:.5f}] 1000'.format(data, time.time()-tstart))
time.sleep(0.5)
print('thread {0} [{1:.5f}] done'.format(data, time.time()-tstart))
finally:
#~ mymutex.release()
pass
# main:
tstart = time.time()
for ix in xrange(0,3):
if useThreading: t = Thread(target = processData, args = (ix,))
else: t = Process(target = processData, args = (ix,))
t.start()
time.sleep(0.001)
Now, if you run this code, you get a printout like this:
thread 0 [0.00173] Do some stuff
thread 1 [0.00403] Do some stuff
thread 2 [0.00642] Do some stuff
thread 0 [0.50261] 1000
thread 1 [0.50487] 1000
thread 2 [0.50728] 1000
thread 0 [1.00330] done
thread 1 [1.00556] done
thread 2 [1.00793] done
That is to say, the three threads quickly get "queued" one after another (something like 2-3 ms after each other). Actually, they don't get queued, they simply start executing in parallel after 2-3 ms after each other.
Now, if I enable the mymutex.acquire()/.release() commands, I get what would be expected:
thread 0 [0.00174] Do some stuff
thread 0 [0.50263] 1000
thread 0 [1.00327] done
thread 1 [1.00350] Do some stuff
thread 1 [1.50462] 1000
thread 1 [2.00531] done
thread 2 [2.00547] Do some stuff
thread 2 [2.50638] 1000
thread 2 [3.00706] done
Basically, now with locking, the threads don't run in parallel, but they run one after another thanks to the lock - as long as one thread is working, the others will block at the .acquire(). But this is not exactly what I want to achieve, either.
What I want to achieve is this: let's assume that when .acquire() is first triggered by a thread function, it registers an id of a function (say a pointer to it) in a queue. After that, the behavior is basically the same as with the Lock - while the one thread works, the others block at .acquire(). When the first thread is done, it goes in the finally: block - and here, I'd like to check to see how many threads are waiting in the queue; then I'd like to delete/drop all waiting threads except for the very last one - and finally, I'd .release() the lock; meaning that after this, what was the last thread in the queue would execute next. I'd imagine, I would want to write something like the following pseudocode:
...
finally:
if (len(mymutex.queue) > 2): # more than this instance plus one other waiting:
while (len(mymutex.queue) > 2):
mymutex.queue.pop(1) # leave alone [0]=this instance, remove next element
# at this point, there should be only queue[0]=this instance, and queue[1]= what was the last thread queued previously
mymutex.release() # once we releace, queue[0] should be gone, and the next in the queue should acquire the mutex/lock..
pass
...
With that, I'd expect a printout like this:
thread 0 [0.00174] Do some stuff
thread 0 [0.50263] 1000
thread 0 [1.00327] done
# here upon lock release, thread 1 would be deleted - and the last one in the queue, thread 2, would acquire the lock next:
thread 2 [1.00350] Do some stuff
thread 2 [1.50462] 1000
thread 2 [2.00531] done
What would be the most straightforward way to achieve this in Python?
Seems like you want a queue-like behaviour, so why not use Queue?
import threading
from Queue import Queue
import time
# threads advertise to this queue when they're waiting
wait_queue = Queue()
# threads get their task from this queue
task_queue = Queue()
def do_stuff():
print "%s doing stuff" % str(threading.current_thread())
time.sleep(5)
def queue_thread(sleep_time):
# advertise current thread waiting
time.sleep(sleep_time)
wait_queue.put("waiting")
# wait for permission to pass
message = task_queue.get()
print "%s got task: %s" % (threading.current_thread(), message)
# unregister current thread waiting
wait_queue.get()
if message == "proceed":
do_stuff()
# kill size-1 threads waiting
for _ in range(wait_queue.qsize() - 1):
task_queue.put("die")
# release last
task_queue.put("proceed")
if message == "die":
print "%s died without doing stuff" % threading.current_thread()
pass
t1 = threading.Thread(target=queue_thread, args=(1, ))
t2 = threading.Thread(target=queue_thread, args=(2, ))
t3 = threading.Thread(target=queue_thread, args=(3, ))
t4 = threading.Thread(target=queue_thread, args=(4, ))
# allow first thread to pass
task_queue.put("proceed")
t1.start()
t2.start()
t3.start()
t4.start()
thread-1 arrives first and "acquires" the section, other threads come later to wait at the queue (and advertise they're waiting). Then, when thread-1 leaves it gives permission to the last thread at the queue by telling all other thread to die, and the last thread to proceed.
You can have finer control using different messages, a typical one would be a thread-id in the wait_queue (so you know who is waiting, and the order in which it arrived).
You can probably utilize non-blocking operations (queue.put(block=False) and queue.get(block=False)) in your favour when you're set on what you need.

Python - Is it possible to "stop" or "pause" a thread

I have two threads, and, I want one thread to run for 10 seconds, and then have this thread stop, whilst another thread executes and then the first thread starts up again; this process is repeated. So e.g.
from threading import Thread
import sys
import time
class Worker(Thread):
Listened = False;
def __init__(self):
while 1:
if(self.Listened == False):
time.sleep(0)
else:
time.sleep(20)
for x in range(0, 10):
print "I'm working"
self.Listened = True
class Processor(Thread):
Listened = False;
def __init__(self):
# this is where I'm confused!!
Worker().start()
Processer().start()
(P.S. I have indented correctly, however, SO seems to have messed it up a bit)
Basically, what I want is:
The worker thread works for 10 seconds (or so) and then stops, the "processor" starts up and, once the processor has processed the data from the last run of the "Worker" thread, it then re-starts the "worker" thread up. I don't specifically have to re-start the "worker" thread from that current position, it can start from the beginning.
Does anyone have any ideas?
You can use a counting semaphore to block a thread, and then wake-it-up later.
A counting semaphore is an object that has a non-negative integer count. If a thread calls acquire() on the semaphore when the count is 0, the thead will block until the semaphore's count becomes greater than zero. To unblock the thread, another thread must increase the count of the semaphore by calling release() on the semaphore.
Create two semaphores, one to block the worker, and one to block the processor. Start the worker semaphore's count a 1 since we want it to run right away. Start the processor's semaphore's count to 0 since we want it to block until the worker is done.
Pass the semaphores to the worker and processor classes. After the worker has run for 10 seconds, it should wake-up the processor by calling processorSemaphore.release(), then it should sleep on its semaphore by calling workerSemaphore.acquire(). The processor does the same.
#!/usr/bin/env python
from threading import Thread, Semaphore
import sys
import time
INTERVAL = 10
class Worker(Thread):
def __init__(self, workerSemaphore, processorSemaphore):
super(Worker, self).__init__()
self.workerSemaphore = workerSemaphore
self.processorSemaphore = processorSemaphore
def run(self):
while True:
# wait for the processor to finish
self.workerSemaphore.acquire()
start = time.time()
while True:
if time.time() - start > INTERVAL:
# wake-up the processor
self.processorSemaphore.release()
break
# do work here
print "I'm working"
class Processor(Thread):
def __init__(self, workerSemaphore, processorSemaphore):
super(Processor, self).__init__()
print "init P"
self.workerSemaphore = workerSemaphore
self.processorSemaphore = processorSemaphore
def run(self):
print "running P"
while True:
# wait for the worker to finish
self.processorSemaphore.acquire()
start = time.time()
while True:
if time.time() - start > INTERVAL:
# wake-up the worker
self.workerSemaphore.release()
break
# do processing here
print "I'm processing"
workerSemaphore = Semaphore(1)
processorSemaphore = Semaphore(0)
worker = Worker(workerSemaphore, processorSemaphore)
processor = Processor(workerSemaphore, processorSemaphore)
worker.start()
processor.start()
worker.join()
processor.join()
See Alvaro's answer. But if you must really use threads then you can do something like below. However you can call start() on a Thread object only once. So either your data should preserve state as to where the next Worker thread should start from and you create a new worker thread in Processor every time or try to use a critical section so that the Worker and Processor threads can take turns to access it.
#!/usr/bin/env python
from threading import Thread
import time
class Worker(Thread):
def __init__(self):
Thread.__init__(self)
pass
def run(self):
for x in range(0, 10):
print "I'm working"
time.sleep(1)
class Processor(Thread):
def __init__(self, w):
Thread.__init__(self)
self.worker = w
def run(self):
# process data from worker thread, add your logic here
self.worker.start()
w = Worker()
p = Processor(w)
p.start()

Categories

Resources