The following example program uses QThread instances to run jobs from a queue.
from queue import Queue
from sys import argv
from threading import Lock
from time import sleep
from PyQt5.QtCore import QObject, QThread, pyqtSignal, pyqtSlot
from PyQt5.QtWidgets import QApplication
class Worker(QObject):
finished = pyqtSignal()
def __init__(self, number):
super().__init__()
self.number = number
#pyqtSlot()
def work(self):
while True:
job = queue.get()
if job is None:
self.finished.emit()
return
with lock:
print('worker={} job={}'.format(self.number, job))
sleep(1)
app = QApplication(argv)
lock = Lock()
queue = Queue()
threads = []
nthreads = 4
for ithread in range(nthreads):
thread = QThread()
worker = Worker(ithread + 1)
worker.moveToThread(thread)
thread.started.connect(worker.work)
worker.finished.connect(thread.quit)
thread.start()
threads += [thread]
#-----------
sleep(1e-10)
#-----------
for ijob in range(10):
queue.put(ijob + 1)
for _ in range(nthreads):
queue.put(None)
for thread in threads:
thread.wait()
With the sleep call (see marked line), all threads are running as expected. Output:
worker=1 job=1
worker=2 job=2
worker=3 job=3
worker=4 job=4
[…]
Without the call, an arbitrary number of threads is running. Output:
worker=4 job=1
worker=4 job=2
worker=4 job=3
worker=4 job=4
[…]
I have tested this with PyQt 5 in Python 3.6.2, other versions, a variable sleep duration, different orders of statements, and a running event loop.
Why does the sleep call change the number of running threads?
The first three worker objects are prone to garbage-collection, whilst the fourth one is left as a global variable. The sleep allows just enough time for each thread to call the work method of its associated worker, and this will keep them all alive (since the methods run blocking while-loops). Without the sleep, the first three workers will be immediately garbage-collected, leaving only the fourth to process the queue. If the workers are kept in a list (in the same way as the threads), you should see all of them being utilised (with or without the sleep).
To prove that this is what is going on, you can add this to the Worker class:
class Worker(QObject)
...
def __del__(self):
print('deleted:', self.number)
Related
I'm trying to code a kind of task manager in Python. It's based on a job queue, the main thread is in charge of adding jobs to this queue. I have made this class to handle the jobs queued, able to limit the number of concurrent processes and handle the output of the finished processes.
Here comes the problem, the _check_jobs function I don't get updated the returncode value of each process, independently of its status (running, finished...) job.returncode is always None, therefore I can't run if statement and remove jobs from the processing job list.
I know it can be done with process.communicate() or process.wait() but I don't want to block the thread that launches the processes. Is there any other way to do it, maybe using a ProcessPoolExecutor? The queue can be hit by processes at any time and I need to be able to handle them.
Thank you all for your time and support :)
from queue import Queue
import subprocess
from threading import Thread
from time import sleep
class JobQueueManager(Queue):
def __init__(self, maxsize: int):
super().__init__(maxsize)
self.processing_jobs = []
self.process = None
self.jobs_launcher=Thread(target=self._worker_job)
self.processing_jobs_checker=Thread(target=self._check_jobs_status)
self.jobs_launcher.start()
self.processing_jobs_checker.start()
def _worker_job(self):
while True:
# Run at max 3 jobs concurrently
if self.not_empty and len(self.processing_jobs) < 3:
# Get job from queue
job = self.get()
# Execute a task without blocking the thread
self.process = subprocess.Popen(job)
self.processing_jobs.append(self.process)
# util if queue.join() is used to block the queue
self.task_done()
else:
print("Waiting 4s for jobs")
sleep(4)
def _check_jobs_status(self):
while True:
# Check if jobs are finished
for job in self.processing_jobs:
# Sucessfully completed
if job.returncode == 0:
self.processing_jobs.remove(job)
# Wait 4 seconds and repeat
sleep(4)
def main():
q = JobQueueManager(100)
task = ["stress", "--cpu", "1", "--timeout", "20"]
for i in range(10): #put 10 tasks in the queue
q.put(task)
q.join() #block until all tasks are done
if __name__ == "__main__":
main()
I answer myself, I have come up with a working solution. The JobExecutor class handles in a custom way the Pool of processes. The watch_completed_tasks function tries to watch and handle the output of the tasks when they are done. This way everything is done with only two threads and the main thread is not blocked when submitting processes.
import subprocess
from threading import Timer
from concurrent.futures import ProcessPoolExecutor, as_completed
import logging
def launch_job(job):
process = subprocess.Popen(job, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
print(f"launching {process.pid}")
return [process.pid, process.stdout.read(), process.stderr.read()]
class JobExecutor(ProcessPoolExecutor):
def __init__(self, max_workers: int):
super().__init__(max_workers)
self.futures = []
self.watch_completed_tasks()
def submit(self, command):
future = super().submit(launch_job, command)
self.futures.append(future)
return future
def watch_completed_tasks(self):
# Manage tasks completion
for completed_task in as_completed(self.futures):
print(f"FINISHED task with PID {completed_task.result()[0]}")
self.futures.remove(completed_task)
# call this function evevery 5 seconds
timer_thread = Timer(5.0, self.watch_completed_tasks)
timer_thread.setName("TasksWatcher")
timer_thread.start()
def main():
executor = JobExecutor(max_workers=5)
for i in range(10):
task = ["stress",
"--cpu", "1",
"--timeout", str(i+5)]
executor.submit(task)
I have main works with heavy calculations and also logging with many IO operations.
I don't care much about either the speed or the order of logging.
What I want is a log collector who can take the context I want to log in a new thread so that my main script can keep running without being blocked.
The code I tried is as below:
import threading
from loguru import logger
from collections import deque
import time
class ThreadLogger:
def __init__(self):
self.thread = threading.Thread(target=self.run, daemon=True)
self.log_queue = deque()
self.thread.start()
self.run()
def run(self):
# I also have tried while True:
while self.log_queue:
log_func, context = self.log_queue.popleft()
log_func(*context)
def addLog(self, log_func, context):
self.log_queue.append([log_func, context])
thlogger = ThreadLogger()
for i in range(20):
# add log here with new thread so that won't affect main jobs
thlogger.addLog(logger.debug, (f'hi {i}',))
# main jobs here (I want to do some real shit here with heavy calculation)
The code above doesn't really work as my expectation.
It cannot detect by itself when to digest the queue
Also, if I use "while True: " it just blocks the queue that the queue is never getting longer.
All other techniques I can come out with aren't really doing on a new single thread
Any suggestions I would be very appreciated!
Remove the call self.run() as you already have started a thread to run that method. And it is that call that is blocking your program. It causes the main thread to sit blocked on the empty queue.
def __init__(self):
self.thread = threading.Thread(target=self.run, daemon=True)
self.log_queue = deque()
self.thread.start()
#self.run() # remove
Once you do that then you can change while self.log_queue: to while True:
As Dan D.'s answer
import threading
from loguru import logger
from collections import deque
import time
class ThreadLogger:
def __init__(self):
self.thread = threading.Thread(target=self.run, daemon=True)
self.log_queue = deque()
self.thread.start()
def run(self):
while True:
if self.log_queue:
log_func, context = self.log_queue.popleft()
log_func(*context)
def addLog(self, log_func, context):
self.log_queue.append([log_func, context])
thlogger = ThreadLogger()
for i in range(20):
thlogger.addLog(logger.debug, (f'hi {i}',))
time.sleep(1) # wait for log to happen
I’m implementing multithreading in PyQt5 and Python 3.5 by running a Worker inside a QThread. In the following sample thread2_worker (runs inside a secondary thread and) creates thread3_worker, connects thread3_worker.finished signal to thread3_worker_finished() and runs it.
When thread3_worker is done it emits finished from within its thread but the connection doesn’t work. My guess is that it has to do with thread3_worker being created or connected not in the main thread but I'd welcome any clarification.
import time
from PyQt5.QtCore import QObject, QThread, pyqtSignal, pyqtSlot, QCoreApplication
class Worker(QObject):
# Generic worker.
finished = pyqtSignal()
def __init__(self, func):
super().__init__()
self.func = func
def run(self):
self.func()
self.finished.emit()
def thread_factory(func):
# Creates a Worker, a QThread and moves the Worker inside the QThread.
worker = Worker(func)
thread = QThread()
worker.moveToThread(thread)
thread.started.connect(worker.run)
# Provision graceful termination.
worker.finished.connect(thread.quit)
worker.finished.connect(worker.deleteLater)
thread.finished.connect(thread.deleteLater)
return worker, thread
def wait():
print("thread3:\t{}".format(QThread.currentThread()))
time.sleep(3)
# finished signal of thread3_worker is never received.
#pyqtSlot()
def thread3_worker_finished():
QCoreApplication.exit()
def create_thread3():
print("thread2:\t{}".format(QThread.currentThread()))
global thread3_worker, thread3
# thread3_worker runs inside thread3, and all it does is call wait().
thread3_worker, thread3 = thread_factory(wait)
thread3_worker.finished.connect(thread3_worker_finished) # FIXME Doesn't work.
thread3.start()
app = QCoreApplication([])
print("Main thread:\t{}".format(QThread.currentThread()))
thread3_worker, thread3 = None, None
# thread2_worker runs inside thread2, and creates and runs thread3_worker.
thread2_worker, thread2 = thread_factory(create_thread3)
thread2.start()
app.exec_()
Cross-thread signals require an event-loop. Your thread_factory function connects the finished signal of the worker to the quit slot of its thread. The quit slot asks the thread to exit its event-loop.
So after thread3 starts, worker2 finishes and thread2 quits. Then when the finished signal of worker3 is emitted, there is no longer an event-loop running that can process it. If you comment out the line worker.finished.connect(thread.quit), your example should work.
I'm trying to figure out how to implement the concept of having my main thread spawn a new thread that processes data concurrently as messages are passed to it.
From what I figured so far the simplest way of doing this would be something like:
from PySide.QtCore import QCoreApplication, QObject, Signal, QThread, QTimer
class Foo(QObject):
do_foo = Signal(str)
def __init__(self, parent=None):
super(Foo, self).__init__(parent)
self.do_foo.connect(self._do_foo)
def foo(self, string):
self.do_foo.emit(string)
def _do_foo(self, string):
# Process string
print "Process thread:", self.thread().currentThreadId()
class App(QCoreApplication):
def run(self):
print "Main thread:", self.thread().currentThreadId()
thread = QThread()
foo = Foo()
foo.moveToThread(thread)
thread.start()
# Obviously the following should be done with the event-loop
# or at doing processing of events.
running = True
while running:
try:
string = raw_input()
foo.foo(string)
except EOFError:
running = False
thread.exit()
thread.wait()
self.exit()
if __name__ == '__main__':
import sys
app = App(sys.argv)
QTimer.singleShot(0, app.run)
sys.exit(app.exec_())
But if this would be the way of doing it I can not see what the use of Slots would be.
Or you can use the design patter "Provider-Consumer". How it works? Well you have to implement a queue. The spwaned thread will get the data from this queue while your main thread will feed the queue with new data.
Your spawned threads blocks while the queue is empty. This way you can even process data in more that one thread, and you don't have to worry about two threads trying to read the same data.
Here is some seudo-code for consumer threads.
class MyThread:
def __init__(self, queue):
self.queue = queue
self.event = Event() # I generally use threading.Event for stopping threads. You don't need it here.
def run():
while not self.event.isSet():
data = self.queue.get() # This stop the thread until new data be available.
do_something_with_data(data)
Then in your main thread:
import Queue
queue = Queue.Queue()
mthread = MyThread(queue)
mthread.start()
# And now you can send data to threads by:
queue.put(data)
I have two threads, and, I want one thread to run for 10 seconds, and then have this thread stop, whilst another thread executes and then the first thread starts up again; this process is repeated. So e.g.
from threading import Thread
import sys
import time
class Worker(Thread):
Listened = False;
def __init__(self):
while 1:
if(self.Listened == False):
time.sleep(0)
else:
time.sleep(20)
for x in range(0, 10):
print "I'm working"
self.Listened = True
class Processor(Thread):
Listened = False;
def __init__(self):
# this is where I'm confused!!
Worker().start()
Processer().start()
(P.S. I have indented correctly, however, SO seems to have messed it up a bit)
Basically, what I want is:
The worker thread works for 10 seconds (or so) and then stops, the "processor" starts up and, once the processor has processed the data from the last run of the "Worker" thread, it then re-starts the "worker" thread up. I don't specifically have to re-start the "worker" thread from that current position, it can start from the beginning.
Does anyone have any ideas?
You can use a counting semaphore to block a thread, and then wake-it-up later.
A counting semaphore is an object that has a non-negative integer count. If a thread calls acquire() on the semaphore when the count is 0, the thead will block until the semaphore's count becomes greater than zero. To unblock the thread, another thread must increase the count of the semaphore by calling release() on the semaphore.
Create two semaphores, one to block the worker, and one to block the processor. Start the worker semaphore's count a 1 since we want it to run right away. Start the processor's semaphore's count to 0 since we want it to block until the worker is done.
Pass the semaphores to the worker and processor classes. After the worker has run for 10 seconds, it should wake-up the processor by calling processorSemaphore.release(), then it should sleep on its semaphore by calling workerSemaphore.acquire(). The processor does the same.
#!/usr/bin/env python
from threading import Thread, Semaphore
import sys
import time
INTERVAL = 10
class Worker(Thread):
def __init__(self, workerSemaphore, processorSemaphore):
super(Worker, self).__init__()
self.workerSemaphore = workerSemaphore
self.processorSemaphore = processorSemaphore
def run(self):
while True:
# wait for the processor to finish
self.workerSemaphore.acquire()
start = time.time()
while True:
if time.time() - start > INTERVAL:
# wake-up the processor
self.processorSemaphore.release()
break
# do work here
print "I'm working"
class Processor(Thread):
def __init__(self, workerSemaphore, processorSemaphore):
super(Processor, self).__init__()
print "init P"
self.workerSemaphore = workerSemaphore
self.processorSemaphore = processorSemaphore
def run(self):
print "running P"
while True:
# wait for the worker to finish
self.processorSemaphore.acquire()
start = time.time()
while True:
if time.time() - start > INTERVAL:
# wake-up the worker
self.workerSemaphore.release()
break
# do processing here
print "I'm processing"
workerSemaphore = Semaphore(1)
processorSemaphore = Semaphore(0)
worker = Worker(workerSemaphore, processorSemaphore)
processor = Processor(workerSemaphore, processorSemaphore)
worker.start()
processor.start()
worker.join()
processor.join()
See Alvaro's answer. But if you must really use threads then you can do something like below. However you can call start() on a Thread object only once. So either your data should preserve state as to where the next Worker thread should start from and you create a new worker thread in Processor every time or try to use a critical section so that the Worker and Processor threads can take turns to access it.
#!/usr/bin/env python
from threading import Thread
import time
class Worker(Thread):
def __init__(self):
Thread.__init__(self)
pass
def run(self):
for x in range(0, 10):
print "I'm working"
time.sleep(1)
class Processor(Thread):
def __init__(self, w):
Thread.__init__(self)
self.worker = w
def run(self):
# process data from worker thread, add your logic here
self.worker.start()
w = Worker()
p = Processor(w)
p.start()