File 0005-Add-a-new-clustered-disk.patch of Package mdadm

From 6ec321d56414392a9da9815733b94868f764bcd1 Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <gqjiang@suse.com>
Date: Tue, 7 Jul 2015 14:52:53 +0800
Subject: [PATCH 05/11] Add a new clustered disk

A clustered disk is added by the traditional --add sequence.
However, other nodes need to acknowledge that they can "see"
the device. This is done by --cluster-confirm:

--cluster-confirm SLOTNUM:/dev/whatever (if disk is found)
or
--cluster-confirm SLOTNUM:missing (if disk is not found)

The node initiating the --add, has the disk state tagged with
MD_DISK_CLUSTER_ADD and the one confirming tag the disk with
MD_DISK_CANDIDATE.

Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Signed-off-by: NeilBrown <neilb@suse.de>
---
 Manage.c   | 37 ++++++++++++++++++++++++++++++++++---
 ReadMe.c   |  1 +
 md_p.h     |  7 +++++++
 md_u.h     |  1 +
 mdadm.8.in |  9 +++++++++
 mdadm.c    |  4 ++++
 mdadm.h    |  2 ++
 util.c     | 10 ++++++++++
 8 files changed, 68 insertions(+), 3 deletions(-)

diff --git a/Manage.c b/Manage.c
index 1cf590a..4768bf1 100644
--- a/Manage.c
+++ b/Manage.c
@@ -713,7 +713,8 @@ skip_re_add:
 int Manage_add(int fd, int tfd, struct mddev_dev *dv,
 	       struct supertype *tst, mdu_array_info_t *array,
 	       int force, int verbose, char *devname,
-	       char *update, unsigned long rdev, unsigned long long array_size)
+	       char *update, unsigned long rdev, unsigned long long array_size,
+	       int raid_slot)
 {
 	unsigned long long ldsize;
 	struct supertype *dev_st = NULL;
@@ -906,7 +907,10 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
 	}
 	disc.major = major(rdev);
 	disc.minor = minor(rdev);
-	disc.number =j;
+	if (raid_slot < 0)
+		disc.number = j;
+	else
+		disc.number = raid_slot;
 	disc.state = 0;
 	if (array->not_persistent==0) {
 		int dfd;
@@ -949,6 +953,14 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
 			}
 		free(used);
 	}
+
+	if (array->state & (1 << MD_SB_CLUSTERED)) {
+		if (dv->disposition == 'c')
+			disc.state |= (1 << MD_DISK_CANDIDATE);
+		else
+			disc.state |= (1 << MD_DISK_CLUSTER_ADD);
+	}
+
 	if (dv->writemostly == 1)
 		disc.state |= (1 << MD_DISK_WRITEMOSTLY);
 	if (dv->failfast == 1)
@@ -1277,6 +1289,7 @@ int Manage_subdevs(char *devname, int fd,
 	 *        variant on 'A'
 	 *  'F' - Another variant of 'A', where the device was faulty
 	 *        so must be removed from the array first.
+	 *  'c' - confirm the device as found (for clustered environments)
 	 *
 	 * For 'f' and 'r', the device can also be a kernel-internal
 	 * name such as 'sdb'.
@@ -1293,6 +1306,7 @@ int Manage_subdevs(char *devname, int fd,
 	struct mdinfo info;
 	int frozen = 0;
 	int busy = 0;
+	int raid_slot = -1;
 
 	if (ioctl(fd, GET_ARRAY_INFO, &array)) {
 		pr_err("Cannot get array info for %s\n",
@@ -1320,6 +1334,17 @@ int Manage_subdevs(char *devname, int fd,
 	for (dv = devlist; dv; dv = dv->next) {
 		int rv;
 
+		raid_slot = -1;
+		if (dv->disposition == 'c') {
+			rv = parse_cluster_confirm_arg(dv->devname,
+						       &dv->devname,
+						       &raid_slot);
+			if (rv) {
+				pr_err("Could not get the devname of cluster\n");
+				goto abort;
+			}
+		}
+
 		if (strcmp(dv->devname, "failed") == 0 ||
 		    strcmp(dv->devname, "faulty") == 0) {
 			if (dv->disposition != 'A'
@@ -1347,6 +1372,11 @@ int Manage_subdevs(char *devname, int fd,
 		if (strcmp(dv->devname, "missing") == 0) {
 			struct mddev_dev *add_devlist = NULL;
 			struct mddev_dev **dp;
+			if (dv->disposition == 'c') {
+				rv = ioctl(fd, CLUSTERED_DISK_NACK, NULL);
+				break;
+			}
+
 			if (dv->disposition != 'A') {
 				pr_err("'missing' only meaningful "
 				       "with --re-add\n");
@@ -1473,6 +1503,7 @@ int Manage_subdevs(char *devname, int fd,
 		case 'A':
 		case 'M': /* --re-add missing */
 		case 'F': /* --re-add faulty  */
+		case 'c': /* --cluster-confirm */
 			/* add the device */
 			if (subarray) {
 				pr_err("Cannot add disks to a"
@@ -1513,7 +1544,7 @@ int Manage_subdevs(char *devname, int fd,
 			}
 			rv = Manage_add(fd, tfd, dv, tst, &array,
 					force, verbose, devname, update,
-					stb.st_rdev, array_size);
+					stb.st_rdev, array_size, raid_slot);
 			close(tfd);
 			tfd = -1;
 			if (rv < 0)
diff --git a/ReadMe.c b/ReadMe.c
index 80f2be8..0e6aa17 100644
--- a/ReadMe.c
+++ b/ReadMe.c
@@ -171,6 +171,7 @@ struct option long_options[] = {
     {"wait",	  0, 0,  WaitOpt},
     {"wait-clean", 0, 0, Waitclean },
     {"action",    1, 0, Action },
+    {"cluster-confirm", 0, 0, ClusterConfirm},
 
     /* For Detail/Examine */
     {"brief",	  0, 0, Brief},
diff --git a/md_p.h b/md_p.h
index ea899a9..b22799b 100644
--- a/md_p.h
+++ b/md_p.h
@@ -78,6 +78,12 @@
 #define MD_DISK_ACTIVE		1 /* disk is running but may not be in sync */
 #define MD_DISK_SYNC		2 /* disk is in sync with the raid set */
 #define MD_DISK_REMOVED		3 /* disk is in sync with the raid set */
+#define MD_DISK_CLUSTER_ADD	4 /* Initiate a disk add across the cluster
+				   * For clustered enviroments only.
+				   */
+#define MD_DISK_CANDIDATE	5 /* disk is added as spare (local) until confirmed
+				   * For clustered enviroments only.
+				   */
 
 #define	MD_DISK_WRITEMOSTLY	9 /* disk is "write-mostly" is RAID1 config.
 				   * read requests will only be sent here in
@@ -108,6 +114,7 @@ typedef struct mdp_device_descriptor_s {
 #define MD_SB_BLOCK_CONTAINER_RESHAPE 3 /* block container wide reshapes */
 #define MD_SB_BLOCK_VOLUME	4 /* block activation of array, other arrays
 				   * in container can be activated */
+#define MD_SB_CLUSTERED		5 /* MD is clustered  */
 #define	MD_SB_BITMAP_PRESENT	8 /* bitmap may be present nearby */
 
 typedef struct mdp_superblock_s {
diff --git a/md_u.h b/md_u.h
index be9868a..76068d6 100644
--- a/md_u.h
+++ b/md_u.h
@@ -44,6 +44,7 @@
 #define STOP_ARRAY		_IO (MD_MAJOR, 0x32)
 #define STOP_ARRAY_RO		_IO (MD_MAJOR, 0x33)
 #define RESTART_ARRAY_RW	_IO (MD_MAJOR, 0x34)
+#define CLUSTERED_DISK_NACK	_IO (MD_MAJOR, 0x35)
 
 typedef struct mdu_version_s {
 	int major;
diff --git a/mdadm.8.in b/mdadm.8.in
index db0d06f..4367246 100644
--- a/mdadm.8.in
+++ b/mdadm.8.in
@@ -1421,6 +1421,15 @@ will avoid reading from these devices if possible.
 .BR \-\-readwrite
 Subsequent devices that are added or re\-added will have the 'write-mostly'
 flag cleared.
+.TP
+.BR \-\-cluster\-confirm
+Confirm the existence of the device. This is issued in response to an \-\-add
+request by a node in a cluster. When a node adds a device it sends a message
+to all nodes in the cluster to look for a device with a UUID. This translates
+to a udev notification with the UUID of the device to be added and the slot
+number. The receiving node must acknowledge this message
+with \-\-cluster\-confirm. Valid arguments are <slot>:<devicename> in case
+the device is found or <slot>:missing in case the device is not found.
 
 .TP
 .BR \-\-failfast
diff --git a/mdadm.c b/mdadm.c
index 3235ff4..be2c5ac 100644
--- a/mdadm.c
+++ b/mdadm.c
@@ -196,6 +196,7 @@ int main(int argc, char *argv[])
 		case 'f':
 		case Fail:
 		case ReAdd: /* re-add */
+		case ClusterConfirm:
 			if (!mode) {
 				newmode = MANAGE;
 				shortopt = short_bitmap_options;
@@ -961,6 +962,9 @@ int main(int argc, char *argv[])
 					   * remove the device */
 			devmode = 'f';
 			continue;
+		case O(MANAGE, ClusterConfirm):
+			devmode = 'c';
+			continue;
 		case O(MANAGE,Replace):
 			/* Mark these devices for replacement */
 			devmode = 'R';
diff --git a/mdadm.h b/mdadm.h
index 80a1073..a45bd03 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -347,6 +347,7 @@ enum special_options {
 	Action,
 	Nodes,
 	ClusterName,
+	ClusterConfirm,
 };
 
 enum prefix_standard {
@@ -1282,6 +1283,7 @@ extern int parse_uuid(char *str, int uuid[4]);
 extern int parse_layout_10(char *layout);
 extern int parse_layout_faulty(char *layout);
 extern long parse_num(char *num);
+extern int parse_cluster_confirm_arg(char *inp, char **devname, int *slot);
 extern int check_ext2(int fd, char *name);
 extern int check_reiser(int fd, char *name);
 extern int check_raid(int fd, char *name);
diff --git a/util.c b/util.c
index f7d902f..70d45dd 100644
--- a/util.c
+++ b/util.c
@@ -280,6 +280,16 @@ long parse_num(char *num)
 }
 #endif
 
+int parse_cluster_confirm_arg(char *input, char **devname, int *slot)
+{
+	char *dev;
+	*slot = strtoul(input, &dev, 10);
+	if (dev == input || dev[0] != ':')
+		return -1;
+	*devname = dev+1;
+	return 0;
+}
+
 void remove_partitions(int fd)
 {
 	/* remove partitions from this block devices.
-- 
1.7.12.4

openSUSE Build Service is sponsored by