From b21f1eaeae274589945c4e2e3e77a7b39305ac0d Mon Sep 17 00:00:00 2001 From: Brad King Date: Mon, 30 Nov 2009 13:14:02 -0500 Subject: [PATCH] KWSys: Restore SIGSTOP/SIGKILL to end process tree On UNIX systems we kill a tree of processes by performing a DFS walk of the tree. We send SIGSTOP to each process encountered, recursively handle its children, and then send SIGKILL. We once used the above approach in the past, but it was removed by the commit "Do not send both SIGSTOP and SIGKILL when killing a process". The commit was meant to work-around an OS X 10.3 bug in which the child would not always honor SIGKILL after SIGSTOP. At the time we wrongly assumed that the process tree remains intact after SIGKILL and before the child is reaped. In fact the grandchildren may be re-parented to ppid=1 even before the child is reaped, which causes the DFS walk to miss them. --- Source/kwsys/ProcessUNIX.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/Source/kwsys/ProcessUNIX.c b/Source/kwsys/ProcessUNIX.c index e2935eb1e..2deb2f4a9 100644 --- a/Source/kwsys/ProcessUNIX.c +++ b/Source/kwsys/ProcessUNIX.c @@ -2392,13 +2392,8 @@ static void kwsysProcessKill(pid_t process_id) DIR* procdir; #endif - /* Kill the process now to make sure it does not create more - children. Do not reap it yet so we can identify its existing - children. There is a small race condition here. If the child - forks after we begin looking for children below but before it - receives this kill signal we might miss a child. Also we might - not be able to catch up to a fork bomb. */ - kill(process_id, SIGKILL); + /* Suspend the process to be sure it will not create more children. */ + kill(process_id, SIGSTOP); /* Kill all children if we can find them. */ #if defined(__linux__) || defined(__CYGWIN__) @@ -2486,6 +2481,19 @@ static void kwsysProcessKill(pid_t process_id) } #endif } + + /* Kill the process. */ + kill(process_id, SIGKILL); + +#if defined(__APPLE__) + /* On OS X 10.3 the above SIGSTOP occasionally prevents the SIGKILL + from working. Just in case, we resume the child and kill it + again. There is a small race condition in this obscure case. If + the child manages to fork again between these two signals, we + will not catch its children. */ + kill(process_id, SIGCONT); + kill(process_id, SIGKILL); +#endif } /*--------------------------------------------------------------------------*/