File pacemaker-libservices-prevent-killing-foreign-process.patch of Package pacemaker.14737

commit 9acc32260562df262a768bd2259372d16d90283b
Author: Jan Pokorný <jpokorny@redhat.com>
Date:   Tue Jan 31 20:49:04 2017 +0100

    Low: libservices(sync): partially prevent killing foreign process
    
    Do not attempt sending SIGKILL to process for which previous
    waitpid(pid, &st, WNOHANG) indicated ECHILD issue.  This might
    be a result of clashing with another thread or an orthogonal
    signal handler getting to reap the child first.
    
    For good measure, repeat non-hanging waitpid test right before
    killing -- it won't prevent illustrated race, but will limit it
    a bit more.  That's unlikely, sad path, hence without regularly
    imposed performance penalty.  And when we can pay more attention
    to prevent killing innocent processes (the code in question is
    commonly run as root so nothing will prevent that), we should.

diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c
index ffb74ef42..16f25f351 100644
--- a/lib/services/services_linux.c
+++ b/lib/services/services_linux.c
@@ -521,11 +521,19 @@ action_synced_wait(svc_action_t * op, sigset_t *mask)
 #endif
                     wait_rc = waitpid(op->pid, &status, WNOHANG);
 
-                    if (wait_rc < 0){
-                        crm_perror(LOG_ERR, "waitpid() for %d failed", op->pid);
-
-                    } else if (wait_rc > 0) {
+                    if (wait_rc > 0) {
                         break;
+
+                    } else if (wait_rc < 0){
+                        if (errno == ECHILD) {
+                                /* Here, don't dare to kill and bail out... */
+                                break;
+
+                        } else {
+                                /* ...otherwise pretend process still runs. */
+                                wait_rc = 0;
+                        }
+                        crm_perror(LOG_ERR, "waitpid() for %d failed", op->pid);
                     }
                 }
             }
@@ -547,9 +555,8 @@ action_synced_wait(svc_action_t * op, sigset_t *mask)
 
     crm_trace("Child done: %d", op->pid);
     if (wait_rc <= 0) {
-        int killrc = kill(op->pid, SIGKILL);
-
         op->rc = PCMK_OCF_UNKNOWN_ERROR;
+
         if (op->timeout > 0 && timeout <= 0) {
             op->status = PCMK_LRM_OP_TIMEOUT;
             crm_warn("%s:%d - timed out after %dms", op->id, op->pid, op->timeout);
@@ -558,16 +565,15 @@ action_synced_wait(svc_action_t * op, sigset_t *mask)
             op->status = PCMK_LRM_OP_ERROR;
         }
 
-        if (killrc && errno != ESRCH) {
-            crm_err("kill(%d, KILL) failed: %d", op->pid, errno);
+        /* If only child hasn't been successfully waited for, yet.
+           This is to limit killing wrong target a bit more. */
+        if (wait_rc == 0 && waitpid(op->pid, &status, WNOHANG) == 0) {
+            if (kill(op->pid, SIGKILL)) {
+                crm_err("kill(%d, KILL) failed: %d", op->pid, errno);
+            }
+            /* Safe to skip WNOHANG here as we sent non-ignorable signal. */
+            while (waitpid(op->pid, &status, 0) == (pid_t) -1 && errno == EINTR) /*omit*/;
         }
-        /*
-         * From sigprocmask(2):
-         * It is not possible to block SIGKILL or SIGSTOP.  Attempts to do so are silently ignored.
-         *
-         * This makes it safe to skip WNOHANG here
-         */
-        while (waitpid(op->pid, &status, 0) == (pid_t) -1 && errno == EINTR) /*omit*/;
 
     } else if (WIFEXITED(status)) {
         op->status = PCMK_LRM_OP_DONE;
openSUSE Build Service is sponsored by