File bsc#1181744-0004-Refactor-fencer-optimize-merging-of-fencing-history-.patch of Package pacemaker.19127
From dbc27b2ac63ab6b7d2385da384c70d1c13e93369 Mon Sep 17 00:00:00 2001
From: "Gao,Yan" <ygao@suse.com>
Date: Wed, 17 Feb 2021 11:36:50 +0100
Subject: [PATCH 4/4] Refactor: fencer: optimize merging of fencing history by
removing unneeded entries on creation of history diff
---
daemons/fenced/fenced_history.c | 161 ++++++++++++++++----------------
1 file changed, 82 insertions(+), 79 deletions(-)
Index: pacemaker-2.0.4+20200616.2deceaa3a/daemons/fenced/fenced_history.c
===================================================================
--- pacemaker-2.0.4+20200616.2deceaa3a.orig/daemons/fenced/fenced_history.c
+++ pacemaker-2.0.4+20200616.2deceaa3a/daemons/fenced/fenced_history.c
@@ -260,7 +260,7 @@ stonith_xml_history_to_list(xmlNode *his
/*!
* \internal
* \brief Craft xml difference between local fence-history and a history
- * coming from remote
+ * coming from remote, and merge the remote history into the local
*
* \param[in] remote_history Fence-history as hash-table (may be NULL)
* \param[in] add_id If crafting the answer for an API
@@ -270,21 +270,23 @@ stonith_xml_history_to_list(xmlNode *his
* \return The fence-history as xml
*/
static xmlNode *
-stonith_local_history_diff(GHashTable *remote_history,
+stonith_local_history_diff_and_merge(GHashTable *remote_history,
gboolean add_id,
const char *target)
{
xmlNode *history = NULL;
+ GHashTableIter iter;
+ remote_fencing_op_t *op = NULL;
+ gboolean updated = FALSE;
int cnt = 0;
if (stonith_remote_op_list) {
- GHashTableIter iter;
- remote_fencing_op_t *op = NULL;
+ char *id = NULL;
history = create_xml_node(NULL, F_STONITH_HISTORY_LIST);
g_hash_table_iter_init(&iter, stonith_remote_op_list);
- while (g_hash_table_iter_next(&iter, NULL, (void **)&op)) {
+ while (g_hash_table_iter_next(&iter, (void **)&id, (void **)&op)) {
xmlNode *entry = NULL;
if (remote_history) {
@@ -292,10 +294,25 @@ stonith_local_history_diff(GHashTable *r
g_hash_table_lookup(remote_history, op->id);
if (remote_op) {
- if (remote_op->state != st_failed
- && remote_op->state != st_done
- && (op->state == st_failed
- || op->state == st_done)) {
+ if (stonith__op_state_pending(op->state)
+ && !stonith__op_state_pending(remote_op->state)) {
+
+ crm_debug("Updating outdated pending operation %.8s "
+ "(state=%d) according to the one (state=%d) from "
+ "remote peer history",
+ op->id, op->state,
+ remote_op->state);
+
+ g_hash_table_steal(remote_history, op->id);
+ op->id = remote_op->id;
+ remote_op->id = id;
+ g_hash_table_iter_replace(&iter, remote_op);
+
+ updated = TRUE;
+ continue; /* skip outdated entries */
+
+ } else if (!stonith__op_state_pending(op->state)
+ && stonith__op_state_pending(remote_op->state)) {
crm_debug("Broadcasting operation %.8s (state=%d) to "
"update the outdated pending one "
@@ -303,7 +320,10 @@ stonith_local_history_diff(GHashTable *r
op->id, op->state,
remote_op->state);
+ g_hash_table_remove(remote_history, op->id);
+
} else {
+ g_hash_table_remove(remote_history, op->id);
continue; /* skip entries broadcasted already */
}
}
@@ -329,6 +349,47 @@ stonith_local_history_diff(GHashTable *r
}
}
+ if (remote_history) {
+ init_stonith_remote_op_hash_table(&stonith_remote_op_list);
+
+ updated |= g_hash_table_size(remote_history);
+
+ g_hash_table_iter_init(&iter, remote_history);
+ while (g_hash_table_iter_next(&iter, NULL, (void **)&op)) {
+
+ if (stonith__op_state_pending(op->state) &&
+ safe_str_eq(op->originator, stonith_our_uname)) {
+ crm_warn("Failing pending operation %.8s originated by us but "
+ "known only from peer history", op->id);
+ op->state = st_failed;
+ op->completed = time(NULL);
+ /* use -EHOSTUNREACH to not introduce a new return-code that might
+ trigger unexpected results at other places and to prevent
+ remote_op_done from setting the delegate if not present
+ */
+ stonith_bcast_result_to_peers(op, -EHOSTUNREACH, FALSE);
+ }
+
+ g_hash_table_iter_steal(&iter);
+ g_hash_table_replace(stonith_remote_op_list, op->id, op);
+ /* we could trim the history here but if we bail
+ * out after trim we might miss more recent entries
+ * of those that might still be in the list
+ * if we don't bail out trimming once is more
+ * efficient and memory overhead is minimal as
+ * we are just moving pointers from one hash to
+ * another
+ */
+ }
+
+ g_hash_table_destroy(remote_history); /* remove what is left */
+ }
+
+ if (updated) {
+ stonith_fence_history_trim();
+ do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL);
+ }
+
if (cnt == 0) {
free_xml(history);
return NULL;
@@ -339,75 +400,18 @@ stonith_local_history_diff(GHashTable *r
/*!
* \internal
- * \brief Merge fence-history coming from remote into local history
+ * \brief Craft xml from the local fence-history
+ *
+ * \param[in] add_id If crafting the answer for an API
+ * history-request there is no need for the id
+ * \param[in] target Optionally limit to certain fence-target
*
- * \param[in] history Hash-table holding remote history to be merged in
+ * \return The fence-history as xml
*/
-static void
-stonith_merge_in_history_list(GHashTable *history)
+static xmlNode *
+stonith_local_history(gboolean add_id, const char *target)
{
- GHashTableIter iter;
- remote_fencing_op_t *op = NULL;
- gboolean updated = FALSE;
-
- if (!history) {
- return;
- }
-
- init_stonith_remote_op_hash_table(&stonith_remote_op_list);
-
- g_hash_table_iter_init(&iter, history);
- while (g_hash_table_iter_next(&iter, NULL, (void **)&op)) {
- remote_fencing_op_t *stored_op =
- g_hash_table_lookup(stonith_remote_op_list, op->id);
-
- if (stored_op) {
- if (stored_op->state != st_failed
- && stored_op->state != st_done
- && (op->state == st_failed || op->state == st_done)) {
- crm_debug("Updating outdated pending operation %.8s "
- "(state=%d) according to the one (state=%d) from "
- "remote peer history",
- op->id, stored_op->state,
- op->state);
-
- } else {
- continue; // Skip existent
- }
- }
-
- updated = TRUE;
- g_hash_table_iter_steal(&iter);
-
- if ((op->state != st_failed) &&
- (op->state != st_done) &&
- safe_str_eq(op->originator, stonith_our_uname)) {
- crm_warn("received pending action we are supposed to be the "
- "owner but it's not in our records -> fail it");
- op->state = st_failed;
- op->completed = time(NULL);
- /* use -EHOSTUNREACH to not introduce a new return-code that might
- trigger unexpected results at other places and to prevent
- remote_op_done from setting the delegate if not present
- */
- stonith_bcast_result_to_peers(op, -EHOSTUNREACH, FALSE);
- }
-
- g_hash_table_insert(stonith_remote_op_list, op->id, op);
- /* we could trim the history here but if we bail
- * out after trim we might miss more recent entries
- * of those that might still be in the list
- * if we don't bail out trimming once is more
- * efficient and memory overhead is minimal as
- * we are just moving pointers from one hash to
- * another
- */
- }
- stonith_fence_history_trim();
- if (updated) {
- do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL);
- }
- g_hash_table_destroy(history); /* remove what is left */
+ return stonith_local_history_diff_and_merge(NULL, add_id, target);
}
/*!
@@ -463,7 +467,7 @@ stonith_fence_history(xmlNode *msg, xmlN
* so that every node can merge and broadcast
* what it has on top
*/
- out_history = stonith_local_history_diff(NULL, TRUE, NULL);
+ out_history = stonith_local_history(TRUE, NULL);
crm_trace("Broadcasting history to peers");
stonith_send_broadcast_history(out_history,
st_opt_broadcast | st_opt_discard_reply,
@@ -488,7 +492,7 @@ stonith_fence_history(xmlNode *msg, xmlN
!crm_is_true(crm_element_value(history,
F_STONITH_DIFFERENTIAL))) {
out_history =
- stonith_local_history_diff(received_history, TRUE, NULL);
+ stonith_local_history_diff_and_merge(received_history, TRUE, NULL);
if (out_history) {
crm_trace("Broadcasting history-diff to peers");
crm_xml_add(out_history, F_STONITH_DIFFERENTIAL,
@@ -500,7 +504,6 @@ stonith_fence_history(xmlNode *msg, xmlN
crm_trace("History-diff is empty - skip broadcast");
}
}
- stonith_merge_in_history_list(received_history);
} else {
crm_trace("Skipping history-query-broadcast (%s%s)"
" we sent ourselves",
@@ -511,7 +514,7 @@ stonith_fence_history(xmlNode *msg, xmlN
/* plain history request */
crm_trace("Looking for operations on %s in %p", target,
stonith_remote_op_list);
- *output = stonith_local_history_diff(NULL, FALSE, target);
+ *output = stonith_local_history(FALSE, target);
}
free_xml(out_history);
return rc;