File pacemaker-crmd-dont-restart-transition-if-no-fence-devices.patch of Package pacemaker.14737
commit 9e9a271fd666ff371487f22c28ba9e420a22434c
Author: Ken Gaillot <kgaillot@redhat.com>
Date: Mon Apr 17 18:18:42 2017 -0500
Fix: crmd: don't restart transition if no fence devices
This restores the behavior removed by ff881376, but more precisely where it's
needed.
Index: pacemaker-1.1.16+20170320.77ea74d/crmd/crmd_utils.h
===================================================================
--- pacemaker-1.1.16+20170320.77ea74d.orig/crmd/crmd_utils.h
+++ pacemaker-1.1.16+20170320.77ea74d/crmd/crmd_utils.h
@@ -19,6 +19,7 @@
# define CRMD_UTILS__H
# include <crm/crm.h>
+# include <crm/transition.h>
# include <crm/common/xml.h>
# include <crm/cib/internal.h> /* For CIB_OP_MODIFY */
# include "notify.h"
@@ -102,7 +103,8 @@ void crmd_join_phase_log(int level);
const char *get_timer_desc(fsa_timer_t * timer);
void st_fail_count_reset(const char * target);
void st_fail_count_increment(const char *target);
-void abort_for_stonith_failure(const char *target, xmlNode *reason);
+void abort_for_stonith_failure(enum transition_action abort_action,
+ const char *target, xmlNode *reason);
void crmd_peer_down(crm_node_t *peer, bool full);
/* Convenience macro for registering a CIB callback
Index: pacemaker-1.1.16+20170320.77ea74d/crmd/te_callbacks.c
===================================================================
--- pacemaker-1.1.16+20170320.77ea74d.orig/crmd/te_callbacks.c
+++ pacemaker-1.1.16+20170320.77ea74d/crmd/te_callbacks.c
@@ -729,18 +729,18 @@ st_fail_count_increment(const char *targ
* \internal
* \brief Abort transition due to stonith failure
*
+ * \param[in] abort_action Whether to restart or stop transition
* \param[in] target Don't restart if this (NULL for any) has too many failures
* \param[in] reason Log this stonith action XML as abort reason (or NULL)
*/
void
-abort_for_stonith_failure(const char *target, xmlNode *reason)
+abort_for_stonith_failure(enum transition_action abort_action,
+ const char *target, xmlNode *reason)
{
- enum transition_action abort_action = tg_restart;
-
/* If stonith repeatedly fails, we eventually give up on starting a new
* transition for that reason.
*/
- if (too_many_st_failures(target)) {
+ if ((abort_action != tg_stop) && too_many_st_failures(target)) {
abort_action = tg_stop;
}
abort_transition(INFINITY, abort_action, "Stonith failed", reason);
@@ -807,11 +807,22 @@ tengine_stonith_callback(stonith_t * sto
} else {
const char *target = crm_element_value_const(action->xml, XML_LRM_ATTR_TARGET);
+ enum transition_action abort_action = tg_restart;
action->failed = TRUE;
crm_notice("Stonith operation %d for %s failed (%s): aborting transition.",
call_id, target, pcmk_strerror(rc));
- abort_for_stonith_failure(target, NULL);
+
+ /* If no fence devices were available, there's no use in immediately
+ * checking again, so don't start a new transition in that case.
+ */
+ if (rc == -ENODEV) {
+ crm_warn("No devices found in cluster to fence %s, giving up",
+ target);
+ abort_action = tg_stop;
+ }
+
+ abort_for_stonith_failure(abort_action, target, NULL);
}
update_graph(transition_graph, action);
Index: pacemaker-1.1.16+20170320.77ea74d/crmd/te_utils.c
===================================================================
--- pacemaker-1.1.16+20170320.77ea74d.orig/crmd/te_utils.c
+++ pacemaker-1.1.16+20170320.77ea74d/crmd/te_utils.c
@@ -162,7 +162,7 @@ fail_incompletable_stonith(crm_graph_t *
if (last_action != NULL) {
crm_warn("STONITHd failure resulted in un-runnable actions");
- abort_for_stonith_failure(NULL, last_action);
+ abort_for_stonith_failure(tg_restart, NULL, last_action);
return TRUE;
}
Index: pacemaker-1.1.16+20170320.77ea74d/include/crm/transition.h
===================================================================
--- pacemaker-1.1.16+20170320.77ea74d.orig/include/crm/transition.h
+++ pacemaker-1.1.16+20170320.77ea74d/include/crm/transition.h
@@ -15,6 +15,8 @@
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
+#ifndef CRM_TRANSITION__H
+# define CRM_TRANSITION__H
#include <crm/crm.h>
#include <crm/msg_xml.h>
@@ -147,3 +149,5 @@ bool update_abort_priority(crm_graph_t *
const char *actiontype2text(action_type_e type);
lrmd_event_data_t *convert_graph_action(xmlNode * resource, crm_action_t * action, int status,
int rc);
+
+#endif