File cluster_support_mirrord_log.diff of Package lvm2.openSUSE_Leap_42.1_Update

Subject: Parallelize dispatching
References: FATE#314367
---
 daemons/cmirrord/Makefile.in |    2 
 daemons/cmirrord/cluster.c   |  110 +++++++++++++++++++++++++++----------------
 daemons/cmirrord/functions.c |   37 ++++++++++++--
 daemons/cmirrord/local.c     |    3 -
 lib/metadata/mirror.c        |   24 ---------
 lib/mirror/mirrored.c        |    5 +
 tools/lvconvert.c            |    9 ---
 7 files changed, 109 insertions(+), 81 deletions(-)

--- a/daemons/cmirrord/Makefile.in
+++ b/daemons/cmirrord/Makefile.in
@@ -26,7 +26,7 @@ TARGETS = cmirrord
 
 include $(top_builddir)/make.tmpl
 
-LIBS += -ldevmapper
+LIBS += -ldevmapper -lpthread
 LMLIBS += $(CPG_LIBS) $(SACKPT_LIBS)
 CFLAGS += $(CPG_CFLAGS) $(SACKPT_CFLAGS) $(EXTRA_EXEC_CFLAGS)
 LDFLAGS += $(EXTRA_EXEC_LDFLAGS)
--- a/daemons/cmirrord/cluster.c
+++ b/daemons/cmirrord/cluster.c
@@ -22,6 +22,7 @@
 #include <errno.h>
 #include <signal.h>
 #include <unistd.h>
+#include <pthread.h>
 #if CMIRROR_HAS_CHECKPOINT
 #include <openais/saAis.h>
 #include <openais/saCkpt.h>
@@ -151,9 +152,11 @@ struct clog_cpg {
 	struct checkpoint_data *checkpoint_list;
 	int idx;
 	char debugging[DEBUGGING_HISTORY][128];
+	pthread_t thread_pid;
 };
 
 static struct dm_list clog_cpg_list;
+static pthread_rwlock_t clog_cpg_lock = PTHREAD_RWLOCK_INITIALIZER;
 
 /*
  * cluster_send
@@ -168,12 +171,14 @@ int cluster_send(struct clog_request *rq
 	struct iovec iov;
 	struct clog_cpg *entry;
 
+	pthread_rwlock_rdlock(&clog_cpg_lock);
 	dm_list_iterate_items(entry, &clog_cpg_list)
 		if (!strncmp(entry->name.value, rq->u_rq.uuid,
 			     CPG_MAX_NAME_LENGTH)) {
 			found = 1;
 			break;
 		}
+	pthread_rwlock_unlock(&clog_cpg_lock);
 
 	if (!found) {
 		rq->u_rq.error = -ENOENT;
@@ -254,11 +259,11 @@ static struct clog_request *get_matching
 	return NULL;
 }
 
-static char rq_buffer[DM_ULOG_REQUEST_SIZE];
 static int handle_cluster_request(struct clog_cpg *entry __attribute__((unused)),
 				  struct clog_request *rq, int server)
 {
 	int r = 0;
+	char rq_buffer[DM_ULOG_REQUEST_SIZE];
 	struct clog_request *tmp = (struct clog_request *)rq_buffer;
 
 	/*
@@ -369,9 +374,13 @@ static struct clog_cpg *find_clog_cpg(cp
 {
 	struct clog_cpg *match;
 
+	pthread_rwlock_rdlock(&clog_cpg_lock);
 	dm_list_iterate_items(match, &clog_cpg_list)
-		if (match->handle == handle)
+		if (match->handle == handle) {
+			pthread_rwlock_unlock(&clog_cpg_lock);
 			return match;
+		}
+	pthread_rwlock_unlock(&clog_cpg_lock);
 
 	return NULL;
 }
@@ -981,34 +990,21 @@ static int resend_requests(struct clog_c
 	return r;
 }
 
-static int do_cluster_work(void *data __attribute__((unused)))
+static void cluster_thread_fn(void *data)
 {
 	int r = CS_OK;
-	struct clog_cpg *entry, *tmp;
-
-	dm_list_iterate_items_safe(entry, tmp, &clog_cpg_list) {
-		r = cpg_dispatch(entry->handle, CS_DISPATCH_ALL);
-		if (r != CS_OK) {
-			if ((r == CS_ERR_BAD_HANDLE) &&
-			    ((entry->state == INVALID) ||
-			     (entry->state == LEAVING)))
-				/* It's ok if we've left the cluster */
-				r = CS_OK;
-			else
-				LOG_ERROR("cpg_dispatch failed: %s",
-					  str_ais_error(r));
-		}
-
-		if (entry->free_me) {
-			free(entry);
-			continue;
-		}
-		do_checkpoints(entry, 0);
-
-		resend_requests(entry);
+	struct clog_cpg *match = data;
+	r = cpg_dispatch(match->handle, CS_DISPATCH_BLOCKING);
+	if (r != CS_OK) {
+		if ((r == CS_ERR_BAD_HANDLE) &&
+		    ((match->state == INVALID) ||
+		     (match->state == LEAVING)))
+			/* It's ok if we've left the cluster */
+			r = CS_OK;
+		else
+			LOG_ERROR("cpg_dispatch failed: %s",
+				  str_ais_error(r));
 	}
-
-	return (r == CS_OK) ? 0 : -1;  /* FIXME: good error number? */
 }
 
 static int flush_startup_list(struct clog_cpg *entry)
@@ -1061,23 +1057,37 @@ static int flush_startup_list(struct clo
 	return 0;
 }
 
+static void do_cpg_message_callback(struct clog_cpg *match, uint32_t nodeid,
+				    void *msg, size_t msg_len);
+
 static void cpg_message_callback(cpg_handle_t handle, const struct cpg_name *gname __attribute__((unused)),
 				 uint32_t nodeid, uint32_t pid __attribute__((unused)),
 				 void *msg, size_t msg_len)
 {
+	struct clog_cpg *entry;
+
+	entry = find_clog_cpg(handle);
+	if (!entry) {
+		LOG_ERROR("Unable to find clog_cpg for cluster message");
+		return;
+	}
+	do_cpg_message_callback(entry, nodeid, msg, msg_len);
+
+	do_checkpoints(entry, 0);
+	resend_requests(entry);
+
+}
+
+static void do_cpg_message_callback(struct clog_cpg *match, uint32_t nodeid,
+				    void *msg, size_t msg_len)
+{
 	int i;
 	int r = 0;
 	int i_am_server;
 	int response = 0;
 	struct clog_request *rq = msg;
 	struct clog_request *tmp_rq, *tmp_rq2;
-	struct clog_cpg *match;
 
-	match = find_clog_cpg(handle);
-	if (!match) {
-		LOG_ERROR("Unable to find clog_cpg for cluster message");
-		return;
-	}
 
 	/*
 	 * Perform necessary endian and version compatibility conversions
@@ -1373,7 +1383,7 @@ static void cpg_leave_callback(struct cl
 			       size_t member_list_entries)
 {
 	unsigned i;
-	int j, fd;
+	int j;
 	uint32_t lowest = match->lowest_id;
 	struct clog_request *rq, *n;
 	struct checkpoint_data *p_cp, *c_cp;
@@ -1384,10 +1394,9 @@ static void cpg_leave_callback(struct cl
 	/* Am I leaving? */
 	if (my_cluster_id == left->nodeid) {
 		LOG_DBG("Finalizing leave...");
+		pthread_rwlock_wrlock(&clog_cpg_lock);
 		dm_list_del(&match->list);
-
-		cpg_fd_get(match->handle, &fd);
-		links_unregister(fd);
+		pthread_rwlock_unlock(&clog_cpg_lock);
 
 		cluster_postsuspend(match->name.value, match->luid);
 
@@ -1515,11 +1524,13 @@ static void cpg_config_callback(cpg_hand
 	struct clog_cpg *match;
 	int found = 0;
 
+	pthread_rwlock_rdlock(&clog_cpg_lock);
 	dm_list_iterate_items(match, &clog_cpg_list)
 		if (match->handle == handle) {
 			found = 1;
 			break;
 		}
+	pthread_rwlock_unlock(&clog_cpg_lock);
 
 	if (!found) {
 		LOG_ERROR("Unable to find match for CPG config callback");
@@ -1536,6 +1547,16 @@ static void cpg_config_callback(cpg_hand
 	else
 		cpg_leave_callback(match, left_list,
 				   member_list, member_list_entries);
+
+
+	if (match->free_me) {
+		LOG_DBG("closing thread %x", (unsigned int)match->thread_pid);
+		free(match);
+		return;
+	}
+
+	do_checkpoints(match, 0);
+	resend_requests(match);
 }
 
 cpg_callbacks_t cpg_callbacks = {
@@ -1603,12 +1624,16 @@ int create_cluster_cpg(char *uuid, uint6
 	size_t size;
 	struct clog_cpg *new = NULL;
 	struct clog_cpg *tmp;
+	pthread_t new_pid;
 
+	pthread_rwlock_rdlock(&clog_cpg_lock);
 	dm_list_iterate_items(tmp, &clog_cpg_list)
 		if (!strncmp(tmp->name.value, uuid, CPG_MAX_NAME_LENGTH)) {
 			LOG_ERROR("Log entry already exists: %s", uuid);
+			pthread_rwlock_unlock(&clog_cpg_lock);
 			return -EEXIST;
 		}
+	pthread_rwlock_unlock(&clog_cpg_lock);
 
 	new = malloc(sizeof(*new));
 	if (!new) {
@@ -1650,13 +1675,16 @@ int create_cluster_cpg(char *uuid, uint6
 	}
 
 	new->cpg_state = VALID;
+	pthread_rwlock_wrlock(&clog_cpg_lock);
 	dm_list_add(&clog_cpg_list, &new->list);
+	pthread_rwlock_unlock(&clog_cpg_lock);
+
 	LOG_DBG("New   handle: %llu", (unsigned long long)new->handle);
 	LOG_DBG("New   name: %s", new->name.value);
 
-	/* FIXME: better variable */
-	cpg_fd_get(new->handle, &r);
-	links_register(r, "cluster", do_cluster_work, NULL);
+	pthread_create(&new_pid, NULL, (void *)cluster_thread_fn, (void*)new);
+	new->thread_pid = new_pid;
+	pthread_detach(new_pid);
 
 	return 0;
 }
@@ -1725,9 +1753,11 @@ int destroy_cluster_cpg(char *uuid)
 {
 	struct clog_cpg *del, *tmp;
 
+	pthread_rwlock_rdlock(&clog_cpg_lock);
 	dm_list_iterate_items_safe(del, tmp, &clog_cpg_list)
 		if (!strncmp(del->name.value, uuid, CPG_MAX_NAME_LENGTH))
 			_destroy_cluster_cpg(del);
+	pthread_rwlock_unlock(&clog_cpg_lock);
 
 	return 0;
 }
--- a/daemons/cmirrord/functions.c
+++ b/daemons/cmirrord/functions.c
@@ -19,6 +19,7 @@
 #include <sys/stat.h>
 #include <time.h>
 #include <unistd.h>
+#include <pthread.h>
 
 #define BYTE_SHIFT 3
 
@@ -105,6 +106,9 @@ struct recovery_request {
 static DM_LIST_INIT(log_list);
 static DM_LIST_INIT(log_pending_list);
 
+static pthread_rwlock_t log_list_lock = PTHREAD_RWLOCK_INITIALIZER;
+static pthread_rwlock_t log_pending_lock = PTHREAD_RWLOCK_INITIALIZER;
+
 static int log_test_bit(dm_bitset_t bs, int bit)
 {
 	return dm_bit(bs, bit) ? 1 : 0;
@@ -151,11 +155,15 @@ static struct log_c *get_log(const char
 {
 	struct log_c *lc;
 
+	pthread_rwlock_rdlock(&log_list_lock);
 	dm_list_iterate_items(lc, &log_list)
 		if (!strcmp(lc->uuid, uuid) &&
-		    (!luid || (luid == lc->luid)))
+		    (!luid || (luid == lc->luid))) {
+			pthread_rwlock_unlock(&log_list_lock);
 			return lc;
+		}
 
+	pthread_rwlock_unlock(&log_list_lock);
 	return NULL;
 }
 
@@ -171,10 +179,14 @@ static struct log_c *get_pending_log(con
 {
 	struct log_c *lc;
 
+	pthread_rwlock_rdlock(&log_pending_lock);
 	dm_list_iterate_items(lc, &log_pending_list)
 		if (!strcmp(lc->uuid, uuid) &&
-		    (!luid || (luid == lc->luid)))
+		    (!luid || (luid == lc->luid))) {
+			pthread_rwlock_unlock(&log_pending_lock);
 			return lc;
+		}
+	pthread_rwlock_unlock(&log_pending_lock);
 
 	return NULL;
 }
@@ -519,7 +531,9 @@ static int _clog_ctr(char *uuid, uint64_
 		LOG_DBG("Disk log ready");
 	}
 
+	pthread_rwlock_wrlock(&log_pending_lock);
 	dm_list_add(&log_pending_list, &lc->list);
+	pthread_rwlock_unlock(&log_pending_lock);
 
 	return 0;
 fail:
@@ -643,7 +657,10 @@ static int clog_dtr(struct dm_ulog_reque
 
 	LOG_DBG("[%s] Cluster log removed", SHORT_UUID(lc->uuid));
 
+	pthread_rwlock_wrlock(&log_list_lock);
 	dm_list_del(&lc->list);
+	pthread_rwlock_unlock(&log_list_lock);
+
 	if (lc->disk_fd != -1 && close(lc->disk_fd))
 		LOG_ERROR("Failed to close disk log: %s",
 			  strerror(errno));
@@ -715,8 +732,13 @@ int cluster_postsuspend(char *uuid, uint
 	lc->resume_override = 0;
 
 	/* move log to pending list */
+	pthread_rwlock_wrlock(&log_list_lock);
 	dm_list_del(&lc->list);
+	pthread_rwlock_unlock(&log_list_lock);
+
+	pthread_rwlock_wrlock(&log_pending_lock);
 	dm_list_add(&log_pending_list, &lc->list);
+	pthread_rwlock_unlock(&log_pending_lock);
 
 	return 0;
 }
@@ -820,9 +842,9 @@ no_disk:
 	if (commit_log && (lc->disk_fd >= 0)) {
 		rq->error = write_log(lc);
 		if (rq->error)
-			LOG_ERROR("Failed initial disk log write");
+			LOG_ERROR("[%s] Failed initial disk log write", SHORT_UUID(lc->uuid));
 		else
-			LOG_DBG("Disk log initialized");
+			LOG_DBG("[%s] Disk log initialized", SHORT_UUID(lc->uuid));
 		lc->touched = 0;
 	}
 out:
@@ -904,8 +926,13 @@ int local_resume(struct dm_ulog_request
 		}
 
 		/* move log to official list */
+		pthread_rwlock_wrlock(&log_pending_lock);
 		dm_list_del(&lc->list);
+		pthread_rwlock_unlock(&log_pending_lock);
+
+		pthread_rwlock_wrlock(&log_list_lock);
 		dm_list_add(&log_list, &lc->list);
+		pthread_rwlock_unlock(&log_list_lock);
 	}
 
 	return 0;
@@ -1928,7 +1955,6 @@ void log_debug(void)
 
 	LOG_ERROR("");
 	LOG_ERROR("LOG COMPONENT DEBUGGING::");
-	LOG_ERROR("Official log list:");
 	LOG_ERROR("Pending log list:");
 	dm_list_iterate_items(lc, &log_pending_list) {
 		LOG_ERROR("%s", lc->uuid);
@@ -1938,6 +1964,7 @@ void log_debug(void)
 		print_bits(lc->clean_bits, 1);
 	}
 
+	LOG_ERROR("Official log list:");
 	dm_list_iterate_items(lc, &log_list) {
 		LOG_ERROR("%s", lc->uuid);
 		LOG_ERROR("  recoverer        : %" PRIu32, lc->recoverer);
--- a/daemons/cmirrord/local.c
+++ b/daemons/cmirrord/local.c
@@ -29,13 +29,13 @@
 
 static int cn_fd = -1;  /* Connector (netlink) socket fd */
 static char recv_buf[2048];
-static char send_buf[2048];
 
 
 /* FIXME: merge this function with kernel_send_helper */
 static int kernel_ack(uint32_t seq, int error)
 {
 	int r;
+	char send_buf[2048];
 	struct nlmsghdr *nlh = (struct nlmsghdr *)send_buf;
 	struct cn_msg *msg = NLMSG_DATA(nlh);
 
@@ -179,6 +179,7 @@ static int kernel_send_helper(void *data
 	int r;
 	struct nlmsghdr *nlh;
 	struct cn_msg *msg;
+	char send_buf[2048];
 
 	memset(send_buf, 0, sizeof(send_buf));
 
--- a/lib/metadata/mirror.c
+++ b/lib/metadata/mirror.c
@@ -1946,10 +1946,6 @@ int add_mirror_log(struct cmd_context *c
 	unsigned old_log_count;
 	int r = 0;
 
-	if (vg_is_clustered(lv->vg) && (log_count > 1)) {
-		log_error("Log type, \"mirrored\", is unavailable to cluster mirrors");
-		return 0;
-	}
 
 	if (dm_list_size(&lv->segments) != 1) {
 		log_error("Multiple-segment mirror is not supported");
@@ -2113,26 +2109,6 @@ int lv_add_mirrors(struct cmd_context *c
 		return 0;
 	}
 
-	if (vg_is_clustered(lv->vg)) {
-		/* FIXME: move this test out of this function */
-		/* Skip test for pvmove mirrors, it can use local mirror */
-		if (!lv_is_pvmove(lv) && !lv_is_locked(lv) &&
-		    lv_is_active(lv) &&
-		    !lv_is_active_exclusive_locally(lv) && /* lv_is_active_remotely */
-		    !cluster_mirror_is_available(lv->vg->cmd)) {
-			log_error("Shared cluster mirrors are not available.");
-			return 0;
-		}
-
-		/*
-		 * No mirrored logs for cluster mirrors until
-		 * log daemon is multi-threaded.
-		 */
-		if (log_count > 1) {
-			log_error("Log type, \"mirrored\", is unavailable to cluster mirrors");
-			return 0;
-		}
-	}
 
 	/* For corelog mirror, activation code depends on
 	 * the global mirror_in_sync status. As we are adding
--- a/lib/mirror/mirrored.c
+++ b/lib/mirror/mirrored.c
@@ -361,12 +361,15 @@ static int _add_log(struct dm_pool *mem,
 			return 0;
 		}
 	} else {
-		/* If core log, use mirror's UUID and set DM_CORELOG flag */
+		/* If core log, use mirror's (UUID + CORE) and set DM_CORELOG flag */
 		if (!(log_dlid = build_dm_uuid(mem, seg->lv, NULL))) {
 			log_error("Failed to build uuid for mirror LV %s.",
 				  seg->lv->name);
 			return 0;
 		}
+		if (clustered)
+			memcpy(&log_dlid[strlen(log_dlid) - 4], "CORE", 4);
+
 		log_flags |= DM_CORELOG;
 	}
 
--- a/tools/lvconvert.c
+++ b/tools/lvconvert.c
@@ -1364,15 +1364,6 @@ static int _lvconvert_mirrors_parse_para
 	*new_log_count = arg_int_value(cmd, mirrorlog_ARG,
 				       arg_is_set(cmd, corelog_ARG) ? MIRROR_LOG_CORE : DEFAULT_MIRRORLOG);
 
-	/*
-	 * No mirrored logs for cluster mirrors until
-	 * log daemon is multi-threaded.
-	 */
-	if ((*new_log_count == MIRROR_LOG_MIRRORED) && vg_is_clustered(lv->vg)) {
-		log_error("Log type, \"mirrored\", is unavailable to cluster mirrors");
-		return 0;
-	}
-
	log_verbose("Setting logging type to %s", get_mirror_log_name(*new_log_count));
 
 	/*