File fix_pmie_pmlogger_startup_races.patch of Package pcp

From 26129ab3fc91a77aefa3c2807c2b49dec4631036 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@redhat.com>
Date: Wed, 28 Nov 2012 14:30:31 +1100
Subject: [PATCH] Fix race conditions in pmie and pmlogger startup scripts

Recent changes to tempfile handling has regressed the pmie and
pmlogger startup scripts.  Errors of the form:
/etc/rc.d/init.d/pmlogger: line 100: /var/tmp/pcp.5vfQsSHKo/pmcheck: No such file or directory
are now produced.

Because sections of these two scripts are run in parallel with
the original script, we are open to race conditions where the
main script exits and removes the temporary directory before the
co-process has an opportunity to create its temporary file.  We
can resolve this using separate temporary dirs and trap handling
which thus no longer race.

Worse still, QA failed to detect the problem.  At least one test
that should have found the problem trivially (575) failed to, as
a result of aggressively discarding stderr and stdout instead of
using filtering.  This has been rectified and common pmie filter
routines abstracted (from tests 115, 504 and 575) to simplify the
task for all current and future tests.
---
 qa/115                   |   18 ------------------
 qa/115.out               |    2 ++
 qa/504                   |   14 --------------
 qa/575                   |   18 ++++++++++++------
 qa/575.out               |    1 +
 qa/common.filter         |   26 ++++++++++++++++++++++++++
 src/pmie/rc_pmie         |   30 +++++++++++++++++++++---------
 src/pmlogger/rc_pmlogger |   26 +++++++++++++++++---------
 8 files changed, 79 insertions(+), 56 deletions(-)

diff --git a/qa/115 b/qa/115
index 8a3f675..4be3323 100755
--- a/qa/115
+++ b/qa/115
@@ -54,24 +54,6 @@ _filter()
 	-e "s;/private/tmp;/tmp;g"
 }
 
-_filter_pmie_start()
-{
-    sed \
-	-e '/^Waiting for pmie process(es) to terminate/d' \
-	-e "s;$PCP_RC_DIR/pmie:;RC_SCRIPT;" \
-	-e '/RC_SCRIPT/d' \
-	-e '/(pmie) is disabled/d' \
-	-e '/To enable/d' \
-	-e '/\/sbin\/chkconfig pmie on/d' \
-	-e '/\/usr\/sbin\/sysv-rc-conf pmie on/d' \
-        -e '/update-rc.d -f pmie defaults/d' \
-	-e '/ln -sf \.\.\/init.d\/pmie \/etc\/rc\.d\//d' \
-	-e "s;$PCP_PMIECONTROL_PATH;\$PCP_PMIECONTROL_PATH;" \
-        -e '/^\.\.*done$/d' \
-        -e '/^\.\.*failed$/d' \
-	-e "s;/private/tmp;/tmp;g"
-}
-
 _count_pmies()
 {
     count=0
diff --git a/qa/115.out b/qa/115.out
index 0b8cfe6..7229a9e 100644
--- a/qa/115.out
+++ b/qa/115.out
@@ -5,6 +5,7 @@ pmie count at start of QA testing: 0
 pmie count after chkconfig pmie off: 0
 
 === check for missing control file ===
+$PCP_RC_DIR/pmie:
 Error: PCP inference engine control file $PCP_PMIECONTROL_PATH
        is missing!  Cannot start any Performance Co-Pilot inference engine(s).
 pmie count after attempt without control file: 0
@@ -34,3 +35,4 @@ No current pmie process exists for:
 Restarting pmie for host "LOCALHOST" ...
 + pmie -b -h LOCALHOST -l /tmp/PID.log0 /tmp/PID.conf
 
+$PCP_RC_DIR/pmie: PMIE not running
diff --git a/qa/504 b/qa/504
index c65df90..047b859 100755
--- a/qa/504
+++ b/qa/504
@@ -62,20 +62,6 @@ _filter()
 	-e "s/$lhost/LOCALHOST/g"
 }
 
-
-_filter_pmie_start()
-{
-    $PCP_AWK_PROG '
-/^Waiting for pmie process\(es\) to terminate/ { next }
-/^Waiting for PMIE process\(es\) to terminate/ { next }
-/^\/etc.*\/init\.d\/pmie:/ { next }
-/\(pmie\) is disabled/ { next }
-/To enable/ { next }
-/\/sbin\/chkconfig pmie on/ { next }
-
-{ print }'
-}
-
 _count_pmies()
 {
     count=0
diff --git a/qa/575 b/qa/575
index 785c034..41a6266 100755
--- a/qa/575
+++ b/qa/575
@@ -1,7 +1,7 @@
 #! /bin/sh
 # PCP QA Test No. 575
-# exercise fix for bug #692244
 #
+# Copyright (c) 2012 Red Hat.
 # Copyright (c) 1995-2002 Silicon Graphics, Inc.  All Rights Reserved.
 #
 
@@ -13,18 +13,24 @@ echo "QA output created by $seq"
 . ./common.filter
 . ./common.check
 
+_cleanup()
+{
+    _change_config pmie off
+    rm -f $tmp.*
+}
+
 signal=$PCP_BINADM_DIR/pmsignal
 status=1	# failure is the default!
-trap "rm -f $tmp.*; exit \$status" 0 1 2 3 15
+trap "_cleanup; exit \$status" 0 1 2 3 15
 
 # real QA test starts here
 $sudo $signal -a pmie >/dev/null 2>&1
 $sudo rm -fr $PCP_TMP_DIR/pmie
-$sudo $PCP_RC_DIR/pmie stop \
-| _filter_pcp_stop \
-| sed -e "s;$PCP_RC_DIR;\$PCP_RC_DIR;g"
 
-$sudo $PCP_RC_DIR/pmie start >/dev/null 2>&1
+_change_config pmie on
+$sudo $PCP_RC_DIR/pmie stop | _filter_pmie_stop
+$sudo $PCP_RC_DIR/pmie start | _filter_pmie_start
+
 # success, all done
 status=0
 exit
diff --git a/qa/575.out b/qa/575.out
index 2842d31..3077d2a 100644
--- a/qa/575.out
+++ b/qa/575.out
@@ -1,2 +1,3 @@
 QA output created by 575
 $PCP_RC_DIR/pmie: PMIE not running
+Performance Co-Pilot starting inference engine(s) ... 
diff --git a/qa/common.filter b/qa/common.filter
index fbbdbfd..648e115 100644
--- a/qa/common.filter
+++ b/qa/common.filter
@@ -412,6 +412,32 @@ s/PMCD/pmcd/
     | _filter_init_distro
 }
 
+_filter_pmie_start()
+{
+    sed \
+	-e '/^Waiting for pmie process(es) to terminate/d' \
+	-e "s;$PCP_RC_DIR/pmie;\$PCP_RC_DIR/pmie;g" \
+	-e '/(pmie) is disabled/d' \
+	-e '/To enable/d' \
+	-e '/\/sbin\/chkconfig pmie on/d' \
+	-e '/\/usr\/sbin\/sysv-rc-conf pmie on/d' \
+        -e '/update-rc.d -f pmie defaults/d' \
+	-e '/ln -sf \.\.\/init.d\/pmie \/etc\/rc\.d\//d' \
+	-e "s;$PCP_PMIECONTROL_PATH;\$PCP_PMIECONTROL_PATH;" \
+        -e '/^\.\.*done$/d' \
+	-e "s;/private/tmp;/tmp;g" \
+    | _filter_init_distro
+}
+
+_filter_pmie_stop()
+{
+    sed \
+	-e "s;$PCP_RC_DIR/pmie;\$PCP_RC_DIR/pmie;g" \
+	-e '/^Waiting for pmie/s/\.\.\.[. ]*done/.../' \
+	-e '/^Waiting for pmie/s/\.\.\. *$/.../' \
+    | _filter_init_distro
+}
+
 _filterall_pcp_start()
 {
     _filter_pcp_start \
diff --git a/src/pmie/rc_pmie b/src/pmie/rc_pmie
index bcb0ba7..698c45a 100644
--- a/src/pmie/rc_pmie
+++ b/src/pmie/rc_pmie
@@ -1,5 +1,6 @@
 #!/bin/sh
 #
+# Copyright (c) 2012 Red Hat.
 # Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
 # 
 # This program is free software; you can redistribute it and/or modify it
@@ -109,37 +110,48 @@ _reboot_setup()
     [ ! -d "$LOGDIR" ] && mkdir -p "$LOGDIR" && chown pcp:pcp "$LOGDIR"
 }
 
+# Note: _start_pmie is running in the background, in parallel with
+# the rest of the script.  It might complete well after the caller
+# so tmpfile handling is especially problematic.  Goal is to speed
+# bootup by starting potentially slow (remote monitoring) processes
+# in the background.
+#
 _start_pmie()
 {
+    bgstatus=0
+    bgtmp=`mktemp -d /var/tmp/pcp.XXXXXXXXX` || exit 1
+    trap "rm -rf $bgtmp; exit \$bgstatus" 0 1 2 3 15
+
     wait_option=''
     [ ! -z "$PMCD_WAIT_TIMEOUT" ] && wait_option="-t $PMCD_WAIT_TIMEOUT" 
 
     if pmcd_wait $wait_option
     then
-	pmie_check >$tmp/pmie 2>&1
-	if [ -s $tmp/pmie ]
+	pmie_check >$bgtmp/pmie 2>&1
+	bgstatus=$?
+	if [ -s $bgtmp/pmie ]
 	then
 	    pmpost "pmie_check start failed in $prog, mailing output to root"
 	    if [ ! -z "$MAIL" ]
 	    then
-		$MAIL -s "pmie_check start failed in $prog" root <$tmp/pmie >/dev/null 2>&1
+		$MAIL -s "pmie_check start failed in $prog" root <$bgtmp/pmie >/dev/null 2>&1
 	    else
 		echo "$prog: pmie_check start failed ..."
-		cat $tmp/pmie
+		cat $bgtmp/pmie
 	    fi
 	fi
-	rm -f $tmp/pmie
     else
-	status=$?
-	pmpost "pmcd_wait failed in $prog: exit status: $status" 
+	bgstatus=$?
+	pmpost "pmcd_wait failed in $prog: exit status: $bgstatus" 
 	if [ ! -z "$MAIL" ]
 	then
-	    echo "pmcd_wait: exit status: $status" | $MAIL -s "pmcd_wait failed in $prog" root
+	    echo "pmcd_wait: exit status: $bgstatus" | $MAIL -s "pmcd_wait failed in $prog" root
 	else
 	    echo "$prog: pmcd_wait failed ..."
-	    echo "pmcd_wait: exit status: $status"
+	    echo "pmcd_wait: exit status: $bgstatus"
 	fi
     fi
+    exit $bgstatus	# co-process is now complete
 }
 
 _shutdown()
diff --git a/src/pmlogger/rc_pmlogger b/src/pmlogger/rc_pmlogger
index 6f9949f..84f3382 100644
--- a/src/pmlogger/rc_pmlogger
+++ b/src/pmlogger/rc_pmlogger
@@ -1,5 +1,6 @@
 #!/bin/sh
 #
+# Copyright (c) 2012 Red Hat.
 # Copyright (c) 2000-2008 Silicon Graphics, Inc.  All Rights Reserved.
 # 
 # This program is free software; you can redistribute it and/or modify it
@@ -12,10 +13,6 @@
 # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 # for more details.
 # 
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
-# 
 # Start or Stop the Performance Co-Pilot pmlogger processes.
 #
 # The following is for chkconfig on RedHat based systems
@@ -95,21 +92,32 @@ in
 	;;
 esac
 
+# Note: _start_pmcheck() runs in the background, in parallel with
+# the rest of the script.  It might complete well after the caller
+# so tmpfile handling is especially problematic.  Goal is to speed
+# bootup by starting potentially slow (remote monitoring) pmlogger
+# processes in the background.
+#
 _start_pmcheck()
 {
-    pmlogger_check $VFLAG >$tmp/pmcheck 2>&1
-    if [ -s $tmp/pmcheck ]
+    bgstatus=0
+    bgtmp=`mktemp -d /var/tmp/pcp.XXXXXXXXX` || exit 1
+    trap "rm -rf $bgtmp; exit \$bgstatus" 0 1 2 3 15
+
+    pmlogger_check $VFLAG >$bgtmp/pmcheck 2>&1
+    bgstatus=$?
+    if [ -s $bgtmp/pmcheck ]
     then
 	pmpost "pmlogger_check failed in $prog, mailing output to root"
 	if [ ! -z "$MAIL" ]
 	then
-	    $MAIL -s "pmlogger_check failed in $prog" root <$tmp/pmcheck
+	    $MAIL -s "pmlogger_check failed in $prog" root <$bgtmp/pmcheck
 	else
 	    echo "$prog: pmlogger_check failed ..."
-	    cat $tmp/pmcheck
+	    cat $bgtmp/pmcheck
 	fi
     fi
-    rm -f $tmp/pmcheck
+    exit $bgstatus	# co-process is now complete
 }
 
 _start_pmlogger()
-- 
1.7.10.4