One python multiprocess errors - python

I have one multprocess demo here, and I met some problems with it. Researched for a night, I cannot resolve the reason.
Any one can help me?
I want to have one parent process acts as producer, when there are tasks come, the parent can fork some children to consume these tasks. The parent monitors the child, if any one exits with exception, it can be restarted by parent.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from multiprocessing import Process, Queue from Queue import Empty import sys, signal, os, random, time import traceback
child_process = []
child_process_num = 4
queue = Queue(0)
def work(queue):
signal.signal(signal.SIGINT, signal.SIG_DFL)
signal.signal(signal.SIGTERM, signal.SIG_DFL)
signal.signal(signal.SIGCHLD, signal.SIG_DFL)
time.sleep(10) #demo sleep
def kill_child_processes(signum, frame):
#terminate all children
pass
def restart_child_process(signum, frame):
global child_process
for i in xrange(len(child_process)):
child = child_process[i]
try:
if child.is_alive():
continue
except OSError, e:
pass
child.join() #join this process to make sure there is no zombie process
new_child = Process(target=work, args=(queue,))
new_child.start()
child_process[i] = new_child #restart one new process
child = None
return
if __name__ == '__main__':
reload(sys)
sys.setdefaultencoding("utf-8")
for i in xrange(child_process_num):
child = Process(target=work, args=(queue,))
child.start()
child_process.append(child)
signal.signal(signal.SIGINT, kill_child_processes)
signal.signal(signal.SIGTERM, kill_child_processes) #hook the SIGTERM
signal.signal(signal.SIGCHLD, restart_child_process)
signal.signal(signal.SIGPIPE, signal.SIG_DFL)
When this program runs, there will be errors as below:
Error in atexit._run_exitfuncs:
Error in sys.exitfunc:
Traceback (most recent call last):
File "/usr/local/python/lib/python2.6/atexit.py", line 30, in _run_exitfuncs
traceback.print_exc()
File "/usr/local/python/lib/python2.6/traceback.py", line 227, in print_exc
print_exception(etype, value, tb, limit, file)
File "/usr/local/python/lib/python2.6/traceback.py", line 124, in print_exception
_print(file, 'Traceback (most recent call last):')
File "/usr/local/python/lib/python2.6/traceback.py", line 12, in _print
def _print(file, str='', terminator='\n'):
File "test.py", line 42, in restart_child_process
new_child.start()
File "/usr/local/python/lib/python2.6/multiprocessing/process.py", line 99, in start
_cleanup()
File "/usr/local/python/lib/python2.6/multiprocessing/process.py", line 53, in _cleanup
if p._popen.poll() is not None:
File "/usr/local/python/lib/python2.6/multiprocessing/forking.py", line 106, in poll
pid, sts = os.waitpid(self.pid, flag)
OSError: [Errno 10] No child processes
If I send signal to one child:kill –SIGINT {child_pid} I will get:
[root#mail1 mail]# kill -SIGINT 32545
[root#mail1 mail]# Error in atexit._run_exitfuncs:
Traceback (most recent call last):
File "/usr/local/python/lib/python2.6/atexit.py", line 24, in _run_exitfuncs
func(*targs, **kargs)
File "/usr/local/python/lib/python2.6/multiprocessing/util.py", line 269, in _exit_function
p.join()
File "/usr/local/python/lib/python2.6/multiprocessing/process.py", line 119, in join
res = self._popen.wait(timeout)
File "/usr/local/python/lib/python2.6/multiprocessing/forking.py", line 117, in wait
return self.poll(0)
File "/usr/local/python/lib/python2.6/multiprocessing/forking.py", line 106, in poll
pid, sts = os.waitpid(self.pid, flag)
OSError: [Errno 4] Interrupted system call Error in sys.exitfunc:
Traceback (most recent call last):
File "/usr/local/python/lib/python2.6/atexit.py", line 24, in _run_exitfuncs
func(*targs, **kargs)
File "/usr/local/python/lib/python2.6/multiprocessing/util.py", line 269, in _exit_function
p.join()
File "/usr/local/python/lib/python2.6/multiprocessing/process.py", line 119, in join
res = self._popen.wait(timeout)
File "/usr/local/python/lib/python2.6/multiprocessing/forking.py", line 117, in wait
return self.poll(0)
File "/usr/local/python/lib/python2.6/multiprocessing/forking.py", line 106, in poll
pid, sts = os.waitpid(self.pid, flag)
OSError: [Errno 4] Interrupted system call

Main proc is waiting for all child procs to be terminated before exits itself so there's a blocking call (i.e. wait4) registered as at_exit handles. The signal you sent interrupts that blocking call thus the stack trace.
The thing I'm not clear about is that if the signal sent to child would be redirected to the parent process, which then interrupted that wait4 call. This is something related to the Unix process group behaviors.

Related

How to complete a childprocess's job when the parent is interrupted?

I run the following code, and I want the child process to continue to complete the job when the parent process is terminated.
I need the child process to complete each submitted job.
The parent process doesn't matter.
import signal
from concurrent.futures import ProcessPoolExecutor
import time
def sub_task(bulks):
def signal_handler(signal, frame):
pass
signal.signal(signal.SIGINT, signal_handler)
for i in bulks:
time.sleep(0.1)
print("Success")
return True
def main():
# it is 40 cores
pool = ProcessPoolExecutor(4)
bulks = []
try:
for i in range(10000):
time.sleep(0.05)
bulks.append(i)
if len(bulks) >= 100:
print("Send")
pool.submit(sub_task, bulks)
bulks = []
if bulks:
pool.submit(sub_task, bulks)
except:
import traceback
traceback.print_exc()
pool.shutdown(wait=True)
return True
main()
But got this exception:
Send
Send
Success
Send
^CProcess ForkProcess-4:
Traceback (most recent call last):
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
self.run()
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/usr/lib/python3.8/concurrent/futures/process.py", line 233, in _process_worker
call_item = call_queue.get(block=True)
File "/usr/lib/python3.8/multiprocessing/queues.py", line 97, in get
res = self._recv_bytes()
File "/usr/lib/python3.8/multiprocessing/connection.py", line 216, in recv_bytes
buf = self._recv_bytes(maxlength)
File "/usr/lib/python3.8/multiprocessing/connection.py", line 414, in _recv_bytes
buf = self._recv(4)
File "/usr/lib/python3.8/multiprocessing/connection.py", line 379, in _recv
chunk = read(handle, remaining)
KeyboardInterrupt
Traceback (most recent call last):
File "test1.py", line 37, in main
time.sleep(0.05)
KeyboardInterrupt
Can anyone help me out?
Thanks in advance

How to properly setup sys.excepthook

I have written the following code to understand the sys.excepthook on a multiprocess environment. I am using python 3. I have created 2 processes which would print and wait for getting ctrl+c.
from multiprocessing import Process
import multiprocessing
import sys
from time import sleep
class foo:
def f(self, name):
try:
raise ValueError("test value error")
except ValueError as e:
print(e)
print('hello', name)
while True:
pass
def myexcepthook(exctype, value, traceback):
print("Value: {}".format(value))
for p in multiprocessing.active_children():
p.terminate()
def main(name):
a = foo()
a.f(name)
sys.excepthook = myexcepthook
if __name__ == '__main__':
for i in range(2):
p = Process(target=main, args=('bob', ))
p.start()
I was expecting the following result when I press ctrl+C
python /home/test/test.py
test value error
hello bob
test value error
hello bob
Value: <KeyboardInterrupt>
But unfortunately, I got the following result.
/home/test/venvPython3/bin/python /home/test/test.py
test value error
hello bob
test value error
hello bob
Error in atexit._run_exitfuncs:
Traceback (most recent call last):
Process Process-1:
File "/usr/lib/python3.6/multiprocessing/popen_fork.py", line 28, in poll
pid, sts = os.waitpid(self.pid, flag)
KeyboardInterrupt
Traceback (most recent call last):
File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "/home/test/test.py", line 26, in main
a.f(name)
File "/home/test/test.py", line 15, in f
pass
KeyboardInterrupt
Process Process-2:
Traceback (most recent call last):
File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "/home/test/test.py", line 26, in main
a.f(name)
File "/home/test/test.py", line 15, in f
pass
KeyboardInterrupt
Process finished with exit code 0
It will be a great help if somebody could point out what am I doing wrong. Also, please let me know how to get the expected output.
You almost did it.
At first, use exctype to print:
def myexcepthook(exctype, value, traceback):
print("Value: {}".format(exctype))
for p in multiprocessing.active_children():
p.terminate()
And join() created processes, to prevent premature exit
if __name__ == '__main__':
pr = []
for i in range(2):
p = Process(target=main, args=('bob', ))
p.start()
pr.append(p)
for p in pr:
p.join()

How does a python process exit gracefully after receiving SIGTERM while waiting on a semaphore?

I have a Python process which spawns 5 other Python processes using the multiprocessing module. Let's call the parent process P0 and the others P1-P5. The requirement is, if we send a SIGTERM to P0, it should shut down P1 to P5 first and then exit itself.
The catch is P1 and P5 are waiting on semaphores. So when I send SIGTERM to these processes, they invoke the signal handler and exit. But since they are waiting on semaphore, they throw an exception. Is there any way to catch that exception before exit, so that P0 to P5 can make a graceful exit?
Traceback:
Traceback (most recent call last):
File "/usr/lib64/python2.7/multiprocessing/process.py", line 258, in _bootstrap
Traceback (most recent call last):
Process Process-2:
File "/usr/lib64/python2.7/multiprocessing/process.py", line 258, in _bootstrap
Traceback (most recent call last):
self.run()
File "/usr/lib64/python2.7/multiprocessing/process.py", line 114, in run
self._target(*self._args, **self._kwargs)
Process Process-5:
Traceback (most recent call last):
File "/usr/lib64/python2.7/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/usr/lib64/python2.7/multiprocessing/process.py", line 114, in run
self._target(*self._args, **self._kwargs)
File "/opt/fireeye/scripts/mip/StaticAnalysisRunner.py", line 45, in run
qsem.acquire()
You can install a signal handler which throws an exception which is then caught in the subprocess to handle exits gracefully.
Here is an example of a script which waits in a semaphore in a subprocess and terminates gracefully when sent a SIGTERM.
#!/usr/bin/env python
import signal
import time
import multiprocessing
class GracefulExit(Exception):
pass
def signal_handler(signum, frame):
raise GracefulExit()
def subprocess_function():
try:
sem = multiprocessing.Semaphore()
print "Acquiring semaphore"
sem.acquire()
print "Semaphore acquired"
print "Blocking on semaphore - waiting for SIGTERM"
sem.acquire()
except GracefulExit:
print "Subprocess exiting gracefully"
if __name__ == "__main__":
# Use signal handler to throw exception which can be caught to allow
# graceful exit.
signal.signal(signal.SIGTERM, signal_handler)
# Start a subprocess and wait for it to terminate.
p = multiprocessing.Process(target=subprocess_function)
p.start()
print "Subprocess pid: %d" % p.pid
p.join()
An example run of this script is as follows:
$ ./test.py
Subprocess pid: 7546
Acquiring semaphore
Semaphore acquired
Blocking on semaphore - waiting for SIGTERM
----> Use another shell to kill -TERM 7546
Subprocess exiting gracefully
There is no traceback from the subprocess and the flow shows that the subprocess exits in a graceful manner. This is because the SIGTERM is caught by the subprocess signal handler which throws a normal Python exception which can be handled inside the process.

Python multiprocessing.Manager and os.fork producing strange behavior

My coworker asked for my help with a problem he was having with a daemon script he is working on. He was having a strange error involving a multiprocessing.Manager, which I managed to reproduce with the following five lines:
import multiprocessing, os, sys
mgr = multiprocessing.Manager()
pid = os.fork()
if pid > 0:
sys.exit(0)
When run on CentOS 6 Linux and Python 2.6, I get the following error:
Traceback (most recent call last):
File "/usr/lib64/python2.6/multiprocessing/util.py", line 235, in _run_finalizers
finalizer()
File "/usr/lib64/python2.6/multiprocessing/util.py", line 174, in __call__
res = self._callback(*self._args, **self._kwargs)
File "/usr/lib64/python2.6/multiprocessing/managers.py", line 576, in _finalize_manager
if process.is_alive():
File "/usr/lib64/python2.6/multiprocessing/process.py", line 129, in is_alive
assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Error in atexit._run_exitfuncs:
Traceback (most recent call last):
File "/usr/lib64/python2.6/atexit.py", line 24, in _run_exitfuncs
func(*targs, **kargs)
File "/usr/lib64/python2.6/multiprocessing/util.py", line 269, in _exit_function
p.join()
File "/usr/lib64/python2.6/multiprocessing/process.py", line 117, in join
assert self._parent_pid == os.getpid(), 'can only join a child process'
AssertionError: can only join a child process
Error in sys.exitfunc:
Traceback (most recent call last):
File "/usr/lib64/python2.6/atexit.py", line 24, in _run_exitfuncs
func(*targs, **kargs)
File "/usr/lib64/python2.6/multiprocessing/util.py", line 269, in _exit_function
p.join()
File "/usr/lib64/python2.6/multiprocessing/process.py", line 117, in join
assert self._parent_pid == os.getpid(), 'can only join a child process'
AssertionError: can only join a child process
I suspect the error is due to some interaction between os.fork and the multiprocessing.Manager, and that he should use the multiprocessing module to create new processes instead of os.fork. Can anyone confirm this and/or explain what is going on? If my hunch is correct, why is this the wrong place to use os.fork?
The issue is that Manager create a process and try to stop it at sys.exit. Since the memory of the process is copied (lazily) during the fork both the parent and the child try to stop the process and wait for it to stop. However, as the exception mention only the parent process can do that. If instead of using os.fork, you use multiprocessing.Process which will spawn a new process which wouldn't try to close the Manager at sys.exit.

Race condition using multiprocessing and threading together

I wrote the sample program.
It creates 8 threads and spawns process in each one
import threading
from multiprocessing import Process
def fast_function():
pass
def thread_function():
process_number = 1
print 'start %s processes' % process_number
for i in range(process_number):
p = Process(target=fast_function, args=())
p.start()
p.join()
def main():
threads_number = 8
print 'start %s threads' % threads_number
threads = [threading.Thread(target=thread_function, args=())
for i in range(threads_number)]
for thread in threads:
thread.start()
for thread in threads:
thread.join()
It crashes with several exceptions like this
Exception in thread Thread-3:
Traceback (most recent call last):
File "/usr/lib/python2.6/threading.py", line 532, in __bootstrap_inner
self.run()
File "/usr/lib/python2.6/threading.py", line 484, in run
self.__target(*self.__args, **self.__kwargs)
File "./repeat_multiprocessing_bug.py", line 15, in thread_function
p.start()
File "/usr/lib/python2.6/multiprocessing/process.py", line 99, in start
_cleanup()
File "/usr/lib/python2.6/multiprocessing/process.py", line 53, in _cleanup
if p._popen.poll() is not None:
File "/usr/lib/python2.6/multiprocessing/forking.py", line 106, in poll
pid, sts = os.waitpid(self.pid, flag)
OSError: [Errno 10] No child processes
Python version 2.6.5. Can somebody explain what I do wrong?
You're probably trying to run it from the interactive interpreter. Try writing your code to a file and run it as a python script, it works on my machine...
See the explanation and examples at the Python multiprocessing docs.
The multiprocessing module has a thread-safety issue in 2.6.5. Your best bet is updating to a newer Python, or add this patch to 2.6.5: http://hg.python.org/cpython/rev/41aef062d529/
The bug is described in more detail in the following links:
http://bugs.python.org/issue11891
http://bugs.python.org/issue1731717

Categories

Resources