File Revert-drbd-serialize-syncs-from-multiple-sources.patch of Package drbd.20953

From ac536e992d337a22c9639d0b321f0c3e521b31ea Mon Sep 17 00:00:00 2001
From: Joel Colledge <joel.colledge@linbit.com>
Date: Mon, 31 May 2021 10:05:32 +0200
Subject: [PATCH] Revert "drbd: serialize syncs from multiple sources"

This reverts commit 262103c65d2849ff31ac52210dc938dfb8f4ac7f.

This approach introduced race conditions. It led to accounting issues
for online verify. As a result, the logged starting sectors were wrong
on both sides and the VerifyT node might consider the verify to be
finished too early. It may well also cause other, as yet undiscovered,
problems.

The accounting issues occurred because drbd_device_resync_request could
run and start making requests before the corresponding
w_after_state_change had run. As a result, verify requests were sent
before the VerifyS node had logged the start of the verify and before
the new state was sent to the peer.
---
 drbd/drbd_int.h      |   3 +-
 drbd/drbd_main.c     |   3 ++
 drbd/drbd_receiver.c |  11 ++---
 drbd/drbd_sender.c   | 118 ++++++++++++++++++---------------------------------
 drbd/drbd_state.c    |   8 ++--
 5 files changed, 52 insertions(+), 91 deletions(-)

diff --git a/drbd/drbd_int.h b/drbd/drbd_int.h
index 0f7c3b0c..848a6c61 100644
--- a/drbd/drbd_int.h
+++ b/drbd/drbd_int.h
@@ -530,7 +530,6 @@ enum device_flag {
         DESTROY_DISK,           /* tell worker to close backing devices and destroy related structures. */
 	MD_SYNC,		/* tell worker to call drbd_md_sync() */
 	MAKE_NEW_CUR_UUID,	/* tell worker to ping peers and eventually write new current uuid */
-	MAKE_RESYNC_REQUEST,	/* tell worker to send resync requests */
 
 	STABLE_RESYNC,		/* One peer_device finished the resync stable! */
 	READ_BALANCE_RR,
@@ -1151,6 +1150,7 @@ struct drbd_peer_device {
 	enum drbd_repl_state start_resync_side;
 	enum drbd_repl_state last_repl_state; /* What we received from the peer */
 	struct timer_list start_resync_timer;
+	struct drbd_work resync_work;
 	struct timer_list resync_timer;
 	struct drbd_work propagate_uuids_work;
 
@@ -1943,6 +1943,7 @@ extern int w_e_end_rsdata_req(struct drbd_work *, int);
 extern int w_e_end_csum_rs_req(struct drbd_work *, int);
 extern int w_e_end_ov_reply(struct drbd_work *, int);
 extern int w_e_end_ov_req(struct drbd_work *, int);
+extern int w_resync_timer(struct drbd_work *, int);
 extern int w_send_dblock(struct drbd_work *, int);
 extern int w_send_read_req(struct drbd_work *, int);
 extern int w_e_reissue(struct drbd_work *, int);
diff --git a/drbd/drbd_main.c b/drbd/drbd_main.c
index 1e325aed..cbb77fd3 100644
--- a/drbd/drbd_main.c
+++ b/drbd/drbd_main.c
@@ -3610,6 +3610,9 @@ struct drbd_peer_device *create_peer_device(struct drbd_device *device, struct d
 	}
 
 	timer_setup(&peer_device->start_resync_timer, start_resync_timer_fn, 0);
+
+	INIT_LIST_HEAD(&peer_device->resync_work.list);
+	peer_device->resync_work.cb  = w_resync_timer;
 	timer_setup(&peer_device->resync_timer, resync_timer_fn, 0);
 
 	INIT_LIST_HEAD(&peer_device->propagate_uuids_work.list);
diff --git a/drbd/drbd_receiver.c b/drbd/drbd_receiver.c
index 76d8e08e..21fb6067 100644
--- a/drbd/drbd_receiver.c
+++ b/drbd/drbd_receiver.c
@@ -396,7 +396,9 @@ static void rs_sectors_came_in(struct drbd_peer_device *peer_device, int size)
 
 	/* In case resync runs faster than anticipated, run the resync_work early */
 	if (rs_sect_in >= peer_device->rs_in_flight)
-		drbd_device_post_work(peer_device->device, MAKE_RESYNC_REQUEST);
+		drbd_queue_work_if_unqueued(
+			&peer_device->connection->sender_work,
+			&peer_device->resync_work);
 }
 
 static void reclaim_finished_net_peer_reqs(struct drbd_connection *connection,
@@ -8335,13 +8337,6 @@ static void peer_device_disconnected(struct drbd_peer_device *peer_device)
 	 * again via drbd_try_clear_on_disk_bm(). */
 	drbd_rs_cancel_all(peer_device);
 
-	if (get_ldev(device)) {
-		/* Avoid holding a different resync because this one looks like it is
-		 * still active. */
-		drbd_rs_controller_reset(peer_device);
-		put_ldev(device);
-	}
-
 	peer_device->uuids_received = false;
 
 	if (!drbd_suspended(device)) {
diff --git a/drbd/drbd_sender.c b/drbd/drbd_sender.c
index 2dc1ba41..f9d14857 100644
--- a/drbd/drbd_sender.c
+++ b/drbd/drbd_sender.c
@@ -31,8 +31,8 @@
 
 void drbd_panic_after_delayed_completion_of_aborted_request(struct drbd_device *device);
 
-static int make_ov_request(struct drbd_peer_device *, unsigned int sect_in);
-static int make_resync_request(struct drbd_peer_device *, unsigned int sect_in);
+static int make_ov_request(struct drbd_peer_device *, int);
+static int make_resync_request(struct drbd_peer_device *, int);
 static bool should_send_barrier(struct drbd_connection *, unsigned int epoch);
 static void maybe_send_barrier(struct drbd_connection *, unsigned int);
 static unsigned long get_work_bits(const unsigned long mask, unsigned long *flags);
@@ -447,67 +447,28 @@ defer:
 	return -EAGAIN;
 }
 
-static void drbd_device_resync_request(struct drbd_device *device)
+int w_resync_timer(struct drbd_work *w, int cancel)
 {
-	struct drbd_peer_device *peer_device;
-	struct drbd_peer_device *peer_device_active = NULL;
-	struct drbd_peer_device *peer_device_target = NULL;
-	unsigned int sect_in_target = 0;  /* Number of sectors that came in since the last turn for peer to which we are target. */
-	bool other_peer_active;
-
-	rcu_read_lock();
-	for_each_peer_device_rcu(peer_device, device) {
-		unsigned int sect_in;  /* Number of sectors that came in since the last turn */
-
-		sect_in = atomic_xchg(&peer_device->rs_sect_in, 0);
-		peer_device->rs_in_flight -= sect_in;
-
-		if (peer_device->repl_state[NOW] == L_VERIFY_S || peer_device->repl_state[NOW] == L_SYNC_TARGET) {
-			if (peer_device_target && drbd_ratelimit())
-				drbd_warn(device, "%s to peer %d while %s to %d\n",
-						drbd_repl_str(peer_device_target->repl_state[NOW]),
-						peer_device_target->connection->peer_node_id,
-						drbd_repl_str(peer_device->repl_state[NOW]),
-						peer_device->connection->peer_node_id);
-			peer_device_target = peer_device;
-			sect_in_target = sect_in;
-		}
-
-		if (peer_device->connection->cstate[NOW] == C_CONNECTED && peer_device->rs_in_flight > 0) {
-			if (peer_device_active && drbd_ratelimit())
-				drbd_warn(device, "resync requests in-flight with peer %d and peer %d\n",
-						peer_device_active->connection->peer_node_id,
-						peer_device->connection->peer_node_id);
-			peer_device_active = peer_device;
-		}
-	}
-
-	other_peer_active = peer_device_active && peer_device_target != peer_device_active;
-	if (!peer_device_target || /* Nothing to do. */
-			other_peer_active || /* Wait for activity to drain before making requests to other peer. */
-			test_bit(SYNC_TARGET_TO_BEHIND, &peer_device_target->flags)) {
-		rcu_read_unlock();
-		return;
-	}
+	struct drbd_peer_device *peer_device =
+		container_of(w, struct drbd_peer_device, resync_work);
 
-	kref_get(&peer_device_target->connection->kref);
-	rcu_read_unlock();
+	if (test_bit(SYNC_TARGET_TO_BEHIND, &peer_device->flags))
+		return 0;
 
-	mutex_lock(&peer_device_target->resync_next_bit_mutex);
-	switch (peer_device_target->repl_state[NOW]) {
+	mutex_lock(&peer_device->resync_next_bit_mutex);
+	switch (peer_device->repl_state[NOW]) {
 	case L_VERIFY_S:
-		make_ov_request(peer_device_target, sect_in_target);
+		make_ov_request(peer_device, cancel);
 		break;
 	case L_SYNC_TARGET:
-		make_resync_request(peer_device_target, sect_in_target);
+		make_resync_request(peer_device, cancel);
 		break;
 	default:
 		break;
 	}
-	mutex_unlock(&peer_device_target->resync_next_bit_mutex);
+	mutex_unlock(&peer_device->resync_next_bit_mutex);
 
-	kref_put(&peer_device_target->connection->kref, drbd_destroy_connection);
-	return;
+	return 0;
 }
 
 int w_send_uuids(struct drbd_work *w, int cancel)
@@ -531,10 +492,9 @@ void resync_timer_fn(struct timer_list *t)
 	if (test_bit(SYNC_TARGET_TO_BEHIND, &peer_device->flags))
 		return;
 
-	/* Post work for the device regardless of the peer_device to which this
-	 * timer is attached. This may result in some extra runs of the resync
-	 * work, but that is harmless. */
-	drbd_device_post_work(peer_device->device, MAKE_RESYNC_REQUEST);
+	drbd_queue_work_if_unqueued(
+		&peer_device->connection->sender_work,
+		&peer_device->resync_work);
 }
 
 static void fifo_set(struct fifo_buffer *fb, int value)
@@ -661,12 +621,16 @@ static int drbd_rs_controller(struct drbd_peer_device *peer_device, u64 sect_in,
 	return req_sect;
 }
 
-static int drbd_rs_number_requests(struct drbd_peer_device *peer_device, unsigned int sect_in)
+static int drbd_rs_number_requests(struct drbd_peer_device *peer_device)
 {
 	struct net_conf *nc;
 	ktime_t duration, now;
+	unsigned int sect_in;  /* Number of sectors that came in since the last turn */
 	int number, mxb;
 
+	sect_in = atomic_xchg(&peer_device->rs_sect_in, 0);
+	peer_device->rs_in_flight -= sect_in;
+
 	now = ktime_get();
 	duration = ktime_sub(now, peer_device->rs_last_mk_req_kt);
 	peer_device->rs_last_mk_req_kt = now;
@@ -735,7 +699,7 @@ static int drbd_resync_delay(struct drbd_peer_device *peer_device)
 	return delay;
 }
 
-static int make_resync_request(struct drbd_peer_device *peer_device, unsigned int sect_in)
+static int make_resync_request(struct drbd_peer_device *peer_device, int cancel)
 {
 	struct drbd_device *device = peer_device->device;
 	struct drbd_transport *transport = &peer_device->connection->transport;
@@ -748,6 +712,9 @@ static int make_resync_request(struct drbd_peer_device *peer_device, unsigned in
 	int i;
 	int discard_granularity = 0;
 
+	if (unlikely(cancel))
+		return 0;
+
 	if (peer_device->rs_total == 0) {
 		/* empty resync? */
 		drbd_resync_finished(peer_device, D_MASK);
@@ -779,7 +746,7 @@ static int make_resync_request(struct drbd_peer_device *peer_device, unsigned in
 	}
 
 	max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9;
-	number = drbd_rs_number_requests(peer_device, sect_in);
+	number = drbd_rs_number_requests(peer_device);
 	/* don't let rs_sectors_came_in() re-schedule us "early"
 	 * just because the first reply came "fast", ... */
 	peer_device->rs_in_flight += number * BM_SECT_PER_BIT;
@@ -918,7 +885,9 @@ request_done:
 
 	/* and in case that raced with the receiver, reschedule ourselves right now */
 	if (i > 0 && atomic_read(&peer_device->rs_sect_in) >= peer_device->rs_in_flight) {
-		drbd_device_post_work(device, MAKE_RESYNC_REQUEST);
+		drbd_queue_work_if_unqueued(
+			&peer_device->connection->sender_work,
+			&peer_device->resync_work);
 	} else {
 		mod_timer(&peer_device->resync_timer, jiffies + drbd_resync_delay(peer_device));
 	}
@@ -926,7 +895,7 @@ request_done:
 	return 0;
 }
 
-static int make_ov_request(struct drbd_peer_device *peer_device, unsigned int sect_in)
+static int make_ov_request(struct drbd_peer_device *peer_device, int cancel)
 {
 	struct drbd_device *device = peer_device->device;
 	int number, i, size;
@@ -934,7 +903,10 @@ static int make_ov_request(struct drbd_peer_device *peer_device, unsigned int se
 	const sector_t capacity = get_capacity(device->vdisk);
 	bool stop_sector_reached = false;
 
-	number = drbd_rs_number_requests(peer_device, sect_in);
+	if (unlikely(cancel))
+		return 1;
+
+	number = drbd_rs_number_requests(peer_device);
 	sector = peer_device->ov_position;
 
 	/* don't let rs_sectors_came_in() re-schedule us "early"
@@ -976,7 +948,9 @@ static int make_ov_request(struct drbd_peer_device *peer_device, unsigned int se
 	/* ... and in case that raced with the receiver,
 	 * reschedule ourselves right now */
 	if (i > 0 && atomic_read(&peer_device->rs_sect_in) >= peer_device->rs_in_flight)
-		drbd_device_post_work(device, MAKE_RESYNC_REQUEST);
+		drbd_queue_work_if_unqueued(
+			&peer_device->connection->sender_work,
+			&peer_device->resync_work);
 	if (i == 0)
 		mod_timer(&peer_device->resync_timer, jiffies + RS_MAKE_REQS_INTV);
 	return 1;
@@ -1173,14 +1147,10 @@ int drbd_resync_finished(struct drbd_peer_device *peer_device,
 	int verify_done = 0;
 	bool aborted = false;
 
-	if (repl_state[NOW] == L_SYNC_SOURCE || repl_state[NOW] == L_PAUSED_SYNC_S ||
-			repl_state[NOW] == L_SYNC_TARGET || repl_state[NOW] == L_PAUSED_SYNC_T) {
+
+	if (repl_state[NOW] == L_SYNC_SOURCE || repl_state[NOW] == L_PAUSED_SYNC_S) {
 		/* Make sure all queued w_update_peers()/consider_sending_peers_in_sync()
-		 * executed before killing the resync_lru with drbd_rs_del_all().
-		 *
-		 * Also make sure w_after_state_change has run and sent notifications
-		 * for the new state before potentially calling a usermode helper
-		 * corresponding to the new sync target state. */
+		   executed before killing the resync_lru with drbd_rs_del_all() */
 		if (current == device->resource->worker.task)
 			goto queue_on_sender_workq;
 		else
@@ -2003,9 +1973,6 @@ void drbd_rs_controller_reset(struct drbd_peer_device *peer_device)
 	plan->total = 0;
 	fifo_set(plan, 0);
 	rcu_read_unlock();
-
-	/* Clearing rs_in_flight may release some other resync. */
-	drbd_device_post_work(peer_device->device, MAKE_RESYNC_REQUEST);
 }
 
 void start_resync_timer_fn(struct timer_list *t)
@@ -2271,7 +2238,7 @@ skip_helper:
 		 * No matter, that is handled in resync_timer_fn() */
 		if (repl_state == L_SYNC_TARGET) {
 			drbd_uuid_resync_starting(peer_device);
-			drbd_device_post_work(peer_device->device, MAKE_RESYNC_REQUEST);
+			mod_timer(&peer_device->resync_timer, jiffies);
 		}
 
 		drbd_md_sync_if_dirty(device);
@@ -2512,8 +2479,6 @@ static void do_device_work(struct drbd_device *device, const unsigned long todo)
 		drbd_ldev_destroy(device);
 	if (test_bit(MAKE_NEW_CUR_UUID, &todo))
 		make_new_current_uuid(device);
-	if (test_bit(MAKE_RESYNC_REQUEST, &todo))
-		drbd_device_resync_request(device);
 }
 
 static void do_peer_device_work(struct drbd_peer_device *peer_device, const unsigned long todo)
@@ -2535,7 +2500,6 @@ static void do_peer_device_work(struct drbd_peer_device *peer_device, const unsi
 	|(1UL << DESTROY_DISK)	\
 	|(1UL << MD_SYNC)	\
 	|(1UL << MAKE_NEW_CUR_UUID)\
-	|(1UL << MAKE_RESYNC_REQUEST)\
 	)
 
 #define DRBD_PEER_DEVICE_WORK_MASK	\
diff --git a/drbd/drbd_state.c b/drbd/drbd_state.c
index c52681ce..9a2e5267 100644
--- a/drbd/drbd_state.c
+++ b/drbd/drbd_state.c
@@ -2485,6 +2485,8 @@ static void finish_state_change(struct drbd_resource *resource, struct completio
 						  -(long)peer_device->rs_mark_time[peer_device->rs_last_mark];
 				initialize_resync_progress_marks(peer_device);
 				peer_device->resync_next_bit = 0;
+				if (repl_state[NEW] == L_SYNC_TARGET)
+					mod_timer(&peer_device->resync_timer, jiffies);
 			}
 
 			if ((repl_state[OLD] == L_SYNC_TARGET  || repl_state[OLD] == L_SYNC_SOURCE) &&
@@ -3493,10 +3495,6 @@ static int w_after_state_change(struct drbd_work *w, int unused)
 			     (resync_susp_user[OLD] != resync_susp_user[NEW])))
 				send_state = true;
 
-			/* Resync continues, start making requests. */
-			if (repl_state[OLD] == L_PAUSED_SYNC_T && repl_state[NEW] == L_SYNC_TARGET)
-				drbd_device_post_work(device, MAKE_RESYNC_REQUEST);
-
 			/* finished resync, tell sync source */
 			if ((repl_state[OLD] == L_SYNC_TARGET || repl_state[OLD] == L_PAUSED_SYNC_T) &&
 			    repl_state[NEW] == L_ESTABLISHED)
@@ -3640,7 +3638,7 @@ static int w_after_state_change(struct drbd_work *w, int unused)
 			if (repl_state[OLD] != L_VERIFY_S && repl_state[NEW] == L_VERIFY_S && get_ldev(device)) {
 				drbd_info(peer_device, "Starting Online Verify from sector %llu\n",
 						(unsigned long long)peer_device->ov_position);
-				drbd_device_post_work(peer_device->device, MAKE_RESYNC_REQUEST);
+				mod_timer(&peer_device->resync_timer, jiffies);
 				put_ldev(device);
 			}
 		}
-- 
2.16.4

openSUSE Build Service is sponsored by