Python multiprocessing : Killing a process gracefully - python

Does
import multiprocessing
import schedule
def worker():
#do some stuff
def sched(argv):
schedule.every(0.01).minutes.do(worker)
while True:
schedule.run_pending()
processs = []
..
..
p = multiprocessing.Process(target=sched,args)
..
..
processs.append(p)
for p in processs:
p.terminate()
kills gracefully a list of processes ?
If not what is the simplest way to do it ?
The goal is to reload the configuration file into memory, so I would like to kill all children processes and create others instead, those latter will read the new config file.
Edit : Added more code to explain that I am running a while True loop
Edit : This is the new code after #dano suggestion
def get_config(self):
from ConfigParser import SafeConfigParser
..
return argv
def sched(self, args, event):
#schedule instruction:
schedule.every(0.01).minutes.do(self.worker,args)
while not event.is_set():
schedule.run_pending()
def dispatch_processs(self, conf):
processs = []
event = multiprocessing.Event()
for conf in self.get_config():
process = multiprocessing.Process(target=self.sched,args=( i for i in conf), kwargs={'event' : event})
processs.append((process, event)
return processs
def start_process(self, process):
process.start()
def gracefull_process(self, process):
process.join()
def main(self):
while True:
processs = self.dispatch_processs(self.get_config())
print ("%s processes running " % len(processs) )
for process, event in processs:
self.start_process(process)
time.sleep(1)
event.set()
self.gracefull_process(process)
The good thing about the code, is that I can edit config file and the process will reload its config also.
The problem is that only the first process runs and the others are ignored.
Edit : This saved my life , working with while True in schedule() is not a good idea, so I set up refresh_time instead
def sched(self, args, event):
schedule.every(0.01).minutes.do(self.worker,args)
for i in range(refresh_time):
schedule.run_pending()
time.sleep(1)
def start_processs(self, processs):
for p,event in processs:
if not p.is_alive():
p.start()
time.sleep(1)
event.set()
self.gracefull_processs(processs)
def gracefull_processs(self, processs):
for p,event in processs:
p.join()
processs = self.dispatch_processs(self.get_config())
self.start_processs(processs)
def main(self):
while True:
processs = self.dispatch_processs(self.get_config())
self.start_processs(processs)
break
print ("Reloading function main")
self.main()

If you don't mind only aborting after worker has completed all of its work, its very simple to add a multiprocessing.Event to handle exiting gracefully:
import multiprocessing
import schedule
def worker():
#do some stuff
def sched(argv, event=None):
schedule.every(0.01).minutes.do(worker)
while not event.is_set(): # Run until we're told to shut down.
schedule.run_pending()
processes = []
..
..
event = multiprocessing.Event()
p = multiprocessing.Process(target=sched,args, kwargs={'event' : event})
..
..
processes.append((p, event))
# Tell all processes to shut down
for _, event in processes:
event.set()
# Now actually wait for them to shut down
for p, _ in processes:
p.join()

A: No, both .terminate() & SIG_* methods are rather brutal
In a need to arrange a gracefull end of any process, as described in your post, there rather shall be some "soft-signalling" layer, that allows, on both ends, to send/receive smart-signalls without being dependent on the O/S interpretations ( O/S knows nothing about your application-level context and state of the respective work-unit, that is currently being processed ).
You may want to read about such soft-signalling approach in links referred from >>> https://stackoverflow.com/a/25373416/3666197

No, it doesn't kill a process according to your own definition of gracefully - unless you take some additional steps.
Assuming you're using a unix system (since you mentioned scp), terminate sends a SIGTERM signal to the child process. You can catch this signal in the child process, and act accordingly (wait for scp to finish):
import signal
def on_terminate(signum, stack):
wait_for_current_scp_operation()
signal.signal(signal.SIGTERM, on_terminate)
Here's a tutorial about handling and sending signals

Related

How to manage the exit of a process without blocking its thread in Python?

I'm trying to code a kind of task manager in Python. It's based on a job queue, the main thread is in charge of adding jobs to this queue. I have made this class to handle the jobs queued, able to limit the number of concurrent processes and handle the output of the finished processes.
Here comes the problem, the _check_jobs function I don't get updated the returncode value of each process, independently of its status (running, finished...) job.returncode is always None, therefore I can't run if statement and remove jobs from the processing job list.
I know it can be done with process.communicate() or process.wait() but I don't want to block the thread that launches the processes. Is there any other way to do it, maybe using a ProcessPoolExecutor? The queue can be hit by processes at any time and I need to be able to handle them.
Thank you all for your time and support :)
from queue import Queue
import subprocess
from threading import Thread
from time import sleep
class JobQueueManager(Queue):
def __init__(self, maxsize: int):
super().__init__(maxsize)
self.processing_jobs = []
self.process = None
self.jobs_launcher=Thread(target=self._worker_job)
self.processing_jobs_checker=Thread(target=self._check_jobs_status)
self.jobs_launcher.start()
self.processing_jobs_checker.start()
def _worker_job(self):
while True:
# Run at max 3 jobs concurrently
if self.not_empty and len(self.processing_jobs) < 3:
# Get job from queue
job = self.get()
# Execute a task without blocking the thread
self.process = subprocess.Popen(job)
self.processing_jobs.append(self.process)
# util if queue.join() is used to block the queue
self.task_done()
else:
print("Waiting 4s for jobs")
sleep(4)
def _check_jobs_status(self):
while True:
# Check if jobs are finished
for job in self.processing_jobs:
# Sucessfully completed
if job.returncode == 0:
self.processing_jobs.remove(job)
# Wait 4 seconds and repeat
sleep(4)
def main():
q = JobQueueManager(100)
task = ["stress", "--cpu", "1", "--timeout", "20"]
for i in range(10): #put 10 tasks in the queue
q.put(task)
q.join() #block until all tasks are done
if __name__ == "__main__":
main()
I answer myself, I have come up with a working solution. The JobExecutor class handles in a custom way the Pool of processes. The watch_completed_tasks function tries to watch and handle the output of the tasks when they are done. This way everything is done with only two threads and the main thread is not blocked when submitting processes.
import subprocess
from threading import Timer
from concurrent.futures import ProcessPoolExecutor, as_completed
import logging
def launch_job(job):
process = subprocess.Popen(job, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
print(f"launching {process.pid}")
return [process.pid, process.stdout.read(), process.stderr.read()]
class JobExecutor(ProcessPoolExecutor):
def __init__(self, max_workers: int):
super().__init__(max_workers)
self.futures = []
self.watch_completed_tasks()
def submit(self, command):
future = super().submit(launch_job, command)
self.futures.append(future)
return future
def watch_completed_tasks(self):
# Manage tasks completion
for completed_task in as_completed(self.futures):
print(f"FINISHED task with PID {completed_task.result()[0]}")
self.futures.remove(completed_task)
# call this function evevery 5 seconds
timer_thread = Timer(5.0, self.watch_completed_tasks)
timer_thread.setName("TasksWatcher")
timer_thread.start()
def main():
executor = JobExecutor(max_workers=5)
for i in range(10):
task = ["stress",
"--cpu", "1",
"--timeout", str(i+5)]
executor.submit(task)

killing a thread without waiting for join

I want to kill a thread in python. This thread can run in a blocking operation and join can't terminate it.
Simular to this:
from threading import Thread
import time
def block():
while True:
print("running")
time.sleep(1)
if __name__ == "__main__":
thread = Thread(target = block)
thread.start()
#kill thread
#do other stuff
My problem is that the real blocking operation is in another module that is not from me so there is no place where I can break with a running variable.
The thread will be killed when exiting the main process if you set it up as a daemon:
from threading import Thread
import time
def block():
while True:
print("running")
time.sleep(1)
if __name__ == "__main__":
thread = Thread(target = block, daemon = True)
thread.start()
sys.exit(0)
Otherwise just set a flag, I'm using a bad example (you should use some synchronization not just a plain variable):
from threading import Thread
import time
RUNNING = True
def block():
global RUNNING
while RUNNING:
print("running")
time.sleep(1)
if __name__ == "__main__":
thread = Thread(target = block, daemon = True)
thread.start()
RUNNING = False # thread will stop, not killed until next loop iteration
.... continue your stuff here
Use a running variable:
from threading import Thread
import time
running = True
def block():
global running
while running:
print("running")
time.sleep(1)
if __name__ == "__main__":
thread = Thread(target = block)
thread.start()
running = False
# do other stuff
I would prefer to wrap it all in a class, but this should work (untested though).
EDIT
There is a way to asynchronously raise an exception in a separate thread which could be caught by a try: except: block, but it's a dirty dirty hack: https://gist.github.com/liuw/2407154
Original post
"I want to kill a thread in python." you can't. Threads are only killed when they're daemons when there are no more non-daemonic threads running from the parent process. Any thread can be asked nicely to terminate itself using standard inter-thread communication methods, but you state that you don't have any chance to interrupt the function you want to kill. This leaves processes.
Processes have more overhead, and are more difficult to pass data to and from, but they do support being killed by sending SIGTERM or SIGKILL.
from multiprocessing import Process, Queue
from time import sleep
def workfunction(*args, **kwargs): #any arguments you send to a child process must be picklable by python's pickle module
sleep(args[0]) #really long computation you might want to kill
return 'results' #anything you want to get back from a child process must be picklable by python's pickle module
class daemon_worker(Process):
def __init__(self, target_func, *args, **kwargs):
self.return_queue = Queue()
self.target_func = target_func
self.args = args
self.kwargs = kwargs
super().__init__(daemon=True)
self.start()
def run(self): #called by self.start()
self.return_queue.put(self.target_func(*self.args, **self.kwargs))
def get_result(self): #raises queue.Empty if no result is ready
return self.return_queue.get()
if __name__=='__main__':
#start some work that takes 1 sec:
worker1 = daemon_worker(workfunction, 1)
worker1.join(3) #wait up to 3 sec for the worker to complete
if not worker1.is_alive(): #if we didn't hit 3 sec timeout
print('worker1 got: {}'.format(worker1.get_result()))
else:
print('worker1 still running')
worker1.terminate()
print('killing worker1')
sleep(.1) #calling worker.is_alive() immediately might incur a race condition where it may or may not have shut down yet.
print('worker1 is alive: {}'.format(worker1.is_alive()))
#start some work that takes 100 sec:
worker2 = daemon_worker(workfunction, 100)
worker2.join(3) #wait up to 3 sec for the worker to complete
if not worker2.is_alive(): #if we didn't hit 3 sec timeout
print('worker2 got: {}'.format(worker2.get_result()))
else:
print('worker2 still running')
worker2.terminate()
print('killing worker2')
sleep(.1) #calling worker.is_alive() immediately might incur a race condition where it may or may not have shut down yet.
print('worker2 is alive: {}'.format(worker2.is_alive())

multiprocessing produces defunct process

I use Tornado as a web server, user can submit a task through the front end page, after auditing they can start the submitted task. In this situation, i want to start an asynchronous sub process to handle the task, so i write the following code in an request handler:
def task_handler():
// handle task here
def start_a_process_for_task():
p = multiprocessing.Process(target=task_handler,args=())
p.start()
return 0
I don't care about the sub process and just start a process for it and return to the front end page and tell user the task is started. The task itself will run in the background and will record it's status or results to database so user
can view on the web page later. So here i don't want to use p.join() which is blocking, but without p.join() after the task finished,the sub process becomes a defunct process and as Tornado runs as a daemon and never exits, the defunct process will never disappear.
Anyone knows how to fix this problem, thanks.
The proper way to avoid defunct children is for the parent to gracefully clean up and close all resources of the exited child. This is normally done by join(), but if you want to avoid that, another approach could be to set up a global handler for the SIGCHLD signal on the parent.
SIGCHLD will be emitted whenever a child exits, and in the handler function you should either call Process.join() if you still have access to the process object, or even use os.wait() to "wait" for any child process to terminate and properly reap it. The wait time here should be 0 as you know for sure a child process has just exited. You will also be able to get the process' exit code / termination signal so it can also be a useful method to handle / log child process crashes.
Here's a quick example of doing this:
from __future__ import print_function
import os
import signal
import time
from multiprocessing import Process
def child_exited(sig, frame):
pid, exitcode = os.wait()
print("Child process {pid} exited with code {exitcode}".format(
pid=pid, exitcode=exitcode
))
def worker():
time.sleep(5)
print("Process {pid} has completed it's work".format(pid=os.getpid()))
def parent():
children = []
# Comment out the following line to see zombie children
signal.signal(signal.SIGCHLD, child_exited)
for i in range(5):
c = Process(target=worker)
c.start()
print("Parent forked out worker process {pid}".format(pid=c.pid))
children.append(c)
time.sleep(1)
print("Forked out {c} workers, hit Ctrl+C to end...".format(c=len(children)))
while True:
time.sleep(5)
if __name__ == '__main__':
parent()
One caveat is that I am not sure if this process works on non-Unix operating systems. It should work on Linux, Mac and other Unixes.
You need to join your subprocesses if you do not want to create zombies. You can do it in threads.
This is a dummy example. After 10 seconds, all your subprocesses are gone instead of being zombies. This launches a thread for every subprocess. Threads do not need to be joined or waited. A thread executes subprocess, joins it and then exits the thread as soon as the subprocess is completed.
import multiprocessing
import threading
from time import sleep
def task_processor():
sleep(10)
class TaskProxy(threading.Thread):
def __init__(self):
super(TaskProxy, self).__init__()
def run(self):
p = multiprocessing.Process(target=task_processor,args=())
p.start()
p.join()
def task_handler():
t = TaskProxy()
t.daemon = True
t.start()
return
for _ in xrange(0,20):
task_handler()
sleep(60)

python multiprocessing.Process.terminate - How to kill child processes

This code:
import multiprocessing as mp
from threading import Thread
import subprocess
import time
class WorkerProcess(mp.Process):
def run(self):
# Simulate long running task
self.subprocess = subprocess.Popen(['python', '-c', 'import time; time.sleep(1000)'])
self.code = self.subprocess.wait()
class ControlThread(Thread):
def run():
jobs = []
for _ in range(2):
job = WorkerProcess()
jobs.append(job)
job.start()
# wait for a while and then kill jobs
time.sleep(2)
for job in jobs:
job.terminate()
if __name__ == "__main__":
controller = ControlThread()
controller.start()
When I terminate the spawned WorkerProcess instances. They die just fine, however the subprocesses python -c 'import time; time.sleep(1000) runs until completition. This is well documented in the official docs, but how do I kill the child processes of a killed process?
A possbile soultion might be:
Wrap WorkerProcess.run() method inside try/except block catching SIGTERM, and terminating the subprocess.call call. But I am not sure how to catch the SIGTERM in the WorkerProcess
I also tried setting signal.signal(signal.SIGINT, handler) in the WorkerProcess, but I am getting ValuError, because it is allowed to be set only in the main thread.
What do I do now?
EDIT: As #svalorzen pointed out in comments this doesn't really work since the reference to self.subprocess is lost.
Finally came to a clean, acceptable solution. Since mp.Process.terminate is a method, we can override it.
class WorkerProcess(mp.Process):
def run(self):
# Simulate long running task
self.subprocess = subprocess.Popen(['python', '-c', 'import time; time.sleep(1000)'])
self.code = self.subprocess.wait()
# HERE
def terminate(self):
self.subprocess.terminate()
super(WorkerProcess, self).terminate()
You can use queues to message to your subprocesses and ask them nicely to terminate their children before exiting themselves. You can't use signals in anywhere else but your main thread, so signals are not suitable for this.
Curiously, when I modify the code like this, even if I interrupt it with control+C, subprocesses will die as well. This may be OS related thing, though.
import multiprocessing as mp
from threading import Thread
import subprocess
import time
from Queue import Empty
class WorkerProcess(mp.Process):
def __init__(self,que):
super(WorkerProcess,self).__init__()
self.queue = que
def run(self):
# Simulate long running task
self.subprocess = subprocess.Popen(['python', '-c', 'import time; time.sleep(1000)'])
while True:
a = self.subprocess.poll()
if a is None:
time.sleep(1)
try:
if self.queue.get(0) == "exit":
print "kill"
self.subprocess.kill()
self.subprocess.wait()
break
else:
pass
except Empty:
pass
print "run"
else:
print "exiting"
class ControlThread(Thread):
def run(self):
jobs = []
queues = []
for _ in range(2):
q = mp.Queue()
job = WorkerProcess(q)
queues.append(q)
jobs.append(job)
job.start()
# wait for a while and then kill jobs
time.sleep(5)
for q in queues:
q.put("exit")
time.sleep(30)
if __name__ == "__main__":
controller = ControlThread()
controller.start()
Hope this helps.
Hannu

Python Daemon Thread Clean Up Logic on Abrupt sys.exit()

Using Linux and Python 2.7.6, I have a script that uploads lots of files at one time. I am using multi-threading with the Queue and Threading modules.
I implemented a handler for SIGINT to stop the script if the user hits ctrl-C. I prefer to use daemon threads so I don't have to clear the queue, which would require alot of re-writing code to make the SIGINT handler have access to the Queue object since the handlers don't take parameters.
To make sure the daemon threads finish and clean up before sys.exit(), I am using threading.Event() and threading.clear() to make threads wait. This code seems to work as print threading.enumerate() only shows the main thread before the script terminates when I did debugging. Just to make sure, I was wondering if there is any kind of insight to this clean up implementation that I might be missing even though it seems to be working for me:
def signal_handler(signal, frame):
global kill_received
kill_received = True
msg = (
"\n\nYou pressed Ctrl+C!"
"\nYour logs and their locations are:"
"\n{}\n{}\n{}\n\n".format(debug, error, info))
logger.info(msg)
threads = threading.Event()
threads.clear()
while True:
time.sleep(3)
threads_remaining = len(threading.enumerate())
print threads_remaining
if threads_remaining == 1:
sys.exit()
def do_the_uploads(file_list, file_quantity,
retry_list, authenticate):
"""The uploading engine"""
value = raw_input(
"\nPlease enter how many concurent "
"uploads you want at one time(example: 200)> ")
value = int(value)
logger.info('{} concurent uploads will be used.'.format(value))
confirm = raw_input(
"\nProceed to upload files? Enter [Y/y] for yes: ").upper()
if confirm == "Y":
kill_received = False
sys.stdout.write("\x1b[2J\x1b[H")
q = CustomQueue()
def worker():
global kill_received
while not kill_received:
item = q.get()
upload_file(item, file_quantity, retry_list, authenticate, q)
q.task_done()
for i in range(value):
t = Thread(target=worker)
t.setDaemon(True)
t.start()
for item in file_list:
q.put(item)
q.join()
print "Finished. Cleaning up processes...",
#Allowing the threads to cleanup
time.sleep(4)
def upload_file(file_obj, file_quantity, retry_list, authenticate, q):
"""Uploads a file. One file per it's own thread. No batch style. This way if one upload
fails no others are effected."""
absolute_path_filename, filename, dir_name, token, url = file_obj
url = url + dir_name + '/' + filename
try:
with open(absolute_path_filename) as f:
r = requests.put(url, data=f, headers=header_collection, timeout=20)
except requests.exceptions.ConnectionError as e:
pass
if src_md5 == r.headers['etag']:
file_quantity.deduct()
If you want to handle Ctrl+C; it is enough to handle KeyboardInterrupt exception in the main thread. Don't use global X in a function unless you do X = some_value in it. Using time.sleep(4) to allow the threads to cleanup is a code smell. You don't need it.
I am using threading.Event() and threading.clear() to make threads wait.
This code has no effect on your threads:
# create local variable
threads = threading.Event()
# clear internal flag in it (that is returned by .is_set/.wait methods)
threads.clear()
Don't call logger.info() from a signal handler in a multithreaded program. It might deadlock your program. Only a limited set of functions can be called from a signal handler. The safe option is to set a global flag in it and exit:
def signal_handler(signal, frame):
global kill_received
kill_received = True
# return (no more code)
The signal might be delayed until q.join() returns. Even if the signal were delivered immediately; q.get() blocks your child threads. They hang until the main thread exits. To fix both issues, you could use a sentinel to signal child processes that there are no more work, drop the signal handler completely in this case:
def worker(stopped, queue, *args):
for item in iter(queue.get, None): # iterate until queue.get() returns None
if not stopped.is_set(): # a simple global flag would also work here
upload_file(item, *args)
else:
break # exit prematurely
# do child specific clean up here
# start threads
q = Queue.Queue()
stopped = threading.Event() # set when threads should exit prematurely
threads = set()
for _ in range(number_of_threads):
t = Thread(target=worker, args=(stopped, q)+other_args)
threads.add(t)
t.daemon = True
t.start()
# provide work
for item in file_list:
q.put(item)
for _ in threads:
q.put(None) # put sentinel to signal the end
while threads: # until there are alive child threads
try:
for t in threads:
t.join(.3) # use a timeout to get KeyboardInterrupt sooner
if not t.is_alive():
threads.remove(t) # remove dead
break
except (KeyboardInterrupt, SystemExit):
print("got Ctrl+C (SIGINT) or exit() is called")
stopped.set() # signal threads to exit gracefully
I've renamed value to number_of_threads. I've used explicit threads set
If an individual upload_file() blocks then the program won't exit on Ctrl-C.
Your case seems to be simple enough for multiprocessing.Pool interface:
from multiprocessing.pool import ThreadPool
from functools import partial
def do_uploads(number_of_threads, file_list, **kwargs_for_upload_file):
process_file = partial(upload_file, **kwargs_for_upload_file)
pool = ThreadPool(number_of_threads) # number of concurrent uploads
try:
for _ in pool.imap_unordered(process_file, file_list):
pass # you could report progress here
finally:
pool.close() # no more additional work
pool.join() # wait until current work is done
It should gracefully exit on Ctrl-C i.e., uploads that are in progress are allowed to finish but new uploads are not started.

Categories

Resources