File fix-stuck-resync-when-cancelled.patch of Package drbd.28197
{ "version": "drbd-9.0.17-0rc1~13", "commit": "f571cabed0e2955cc3b3bf5ba89fb371780bf0c6", "comment": "drbd: Improve the resync controller for fast back end devices and network", "author": "Philipp Reisner <philipp.reisner@linbit.com>", "date": "Tue Jan 29 12:45:31 2019 +0100" }
{ "version": "drbd-9.0.19-1~30", "commit": "c2d3d9150402c71ae94d3146a7de0c10a3b6e25e", "comment": "drbd: Fix stuck resync when many resync requests are cancelled", "author": "Joel Colledge <joel.colledge@linbit.com>", "date": "Mon May 27 11:49:29 2019 +0200" }
{ "version": "drbd-9.0.25-1~2", "commit": "eceb2bc40a31f06acdd8d3d12dd36156934ede04", "comment": "drbd: Fix handing of P_NEG_RS_DREPLY packet", "author": "Philipp Reisner <philipp.reisner@linbit.com>", "date": "Tue Sep 22 11:42:08 2020 +0200" }
diff -Naur drbd-9.0.14+git.62f906cf.orig/drbd/drbd_int.h drbd-9.0.14+git.62f906cf.test/drbd/drbd_int.h
--- drbd-9.0.14+git.62f906cf.orig/drbd/drbd_int.h 2021-09-10 13:54:03.216030195 +0800
+++ drbd-9.0.14+git.62f906cf.test/drbd/drbd_int.h 2021-09-10 13:57:52.775120933 +0800
@@ -1155,6 +1155,7 @@
int rs_last_events; /* counter of read or write "events" (unit sectors)
* on the lower level device when we last looked. */
int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */
+ unsigned long rs_last_mk_req_jif;
unsigned long ov_left; /* in bits */
unsigned long ov_skipped; /* in bits */
diff -Naur drbd-9.0.14+git.62f906cf.orig/drbd/drbd_receiver.c drbd-9.0.14+git.62f906cf.test/drbd/drbd_receiver.c
--- drbd-9.0.14+git.62f906cf.orig/drbd/drbd_receiver.c 2021-09-10 13:54:03.216030195 +0800
+++ drbd-9.0.14+git.62f906cf.test/drbd/drbd_receiver.c 2021-09-10 14:13:28.475414699 +0800
@@ -217,6 +217,17 @@
return NULL;
}
+static void rs_sectors_came_in(struct drbd_peer_device *peer_device, int size)
+{
+ int rs_sect_in = atomic_add_return(size >> 9, &peer_device->rs_sect_in);
+
+ /* In case resync runs faster than anticipated, run the resync_work early */
+ if (rs_sect_in >= peer_device->rs_in_flight)
+ drbd_queue_work_if_unqueued(
+ &peer_device->connection->sender_work,
+ &peer_device->resync_work);
+}
+
/* kick lower level device, if we have more than (arbitrary number)
* reference counts on it, which typically are locally submitted io
* requests. don't use unacked_cnt, so we speed up proto A and B, too. */
@@ -2174,7 +2185,7 @@
drbd_send_ack_dp(peer_device, P_NEG_ACK, &d);
}
- atomic_add(d.bi_size >> 9, &peer_device->rs_sect_in);
+ rs_sectors_came_in(peer_device, d.bi_size);
return err;
}
@@ -3082,7 +3093,7 @@
peer_device->use_csums = true;
} else if (pi->cmd == P_OV_REPLY) {
/* track progress, we may need to throttle */
- atomic_add(size >> 9, &peer_device->rs_sect_in);
+ rs_sectors_came_in(peer_device, size);
peer_req->w.cb = w_e_end_ov_reply;
dec_rs_pending(peer_device);
/* drbd_rs_begin_io done when we sent this request,
@@ -7314,7 +7325,7 @@
drbd_send_ack_ex(peer_device, P_NEG_ACK, sector, size, ID_SYNCER);
}
- atomic_add(size >> 9, &peer_device->rs_sect_in);
+ rs_sectors_came_in(peer_device, size);
return err;
}
@@ -8182,7 +8193,7 @@
put_ldev(device);
}
dec_rs_pending(peer_device);
- atomic_add(blksize >> 9, &peer_device->rs_sect_in);
+ rs_sectors_came_in(peer_device, blksize);
return 0;
}
@@ -8355,12 +8366,12 @@
mutex_unlock(&device->bm_resync_fo_mutex);
}
- atomic_add(size >> 9, &peer_device->rs_sect_in);
- mod_timer(&peer_device->resync_timer, jiffies + SLEEP_TIME);
break;
default:
BUG();
}
+ rs_sectors_came_in(peer_device, size);
+ mod_timer(&peer_device->resync_timer, jiffies + SLEEP_TIME);
put_ldev(device);
}
diff -Naur drbd-9.0.14+git.62f906cf.orig/drbd/drbd_sender.c drbd-9.0.14+git.62f906cf.test/drbd/drbd_sender.c
--- drbd-9.0.14+git.62f906cf.orig/drbd/drbd_sender.c 2021-09-10 13:54:03.216030195 +0800
+++ drbd-9.0.14+git.62f906cf.test/drbd/drbd_sender.c 2021-09-10 16:11:54.594827839 +0800
@@ -551,7 +551,7 @@
return fb;
}
-static int drbd_rs_controller(struct drbd_peer_device *peer_device, unsigned int sect_in)
+static int drbd_rs_controller(struct drbd_peer_device *peer_device, unsigned int sect_in, unsigned long duration)
{
struct peer_device_conf *pdc;
unsigned int want; /* The number of sectors we want in-flight */
@@ -563,6 +563,13 @@
int max_sect;
struct fifo_buffer *plan;
+ if (duration == 0)
+ duration = 1;
+ else if (duration > SLEEP_TIME * 10)
+ duration = SLEEP_TIME * 10;
+
+ sect_in = (u64)sect_in * SLEEP_TIME / duration;
+
pdc = rcu_dereference(peer_device->conf);
plan = rcu_dereference(peer_device->rs_plan_s);
@@ -572,7 +579,7 @@
want = ((pdc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps;
} else { /* normal path */
want = pdc->c_fill_target ? pdc->c_fill_target :
- sect_in * pdc->c_delay_target * HZ / (SLEEP_TIME * 10);
+ sect_in * pdc->c_delay_target * HZ / (duration * 10);
}
correction = want - peer_device->rs_in_flight - plan->total;
@@ -590,12 +597,12 @@
if (req_sect < 0)
req_sect = 0;
- max_sect = (pdc->c_max_rate * 2 * SLEEP_TIME) / HZ;
+ max_sect = (pdc->c_max_rate * 2 * duration) / HZ;
if (req_sect > max_sect)
req_sect = max_sect;
/*
- drbd_warn(device, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
+ drbd_warn(device, "si=%llu if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
sect_in, peer_device->rs_in_flight, want, correction,
steps, cps, peer_device->rs_planed, curr_corr, req_sect);
*/
@@ -606,17 +613,21 @@
static int drbd_rs_number_requests(struct drbd_peer_device *peer_device)
{
struct net_conf *nc;
+ unsigned long duration, now;
unsigned int sect_in; /* Number of sectors that came in since the last turn */
int number, mxb;
sect_in = atomic_xchg(&peer_device->rs_sect_in, 0);
peer_device->rs_in_flight -= sect_in;
+ now = jiffies;
+ duration = now - peer_device->rs_last_mk_req_jif;
+
rcu_read_lock();
nc = rcu_dereference(peer_device->connection->transport.net_conf);
mxb = nc ? nc->max_buffers : 0;
if (rcu_dereference(peer_device->rs_plan_s)->size) {
- number = drbd_rs_controller(peer_device, sect_in) >> (BM_BLOCK_SHIFT - 9);
+ number = drbd_rs_controller(peer_device, sect_in, duration) >> (BM_BLOCK_SHIFT - 9);
peer_device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
} else {
peer_device->c_sync_rate = rcu_dereference(peer_device->conf)->resync_rate;
@@ -648,8 +659,8 @@
const sector_t capacity = drbd_get_capacity(device->this_bdev);
int max_bio_size;
int number, rollback_i, size;
- int align, requeue = 0;
- int i = 0;
+ int align;
+ int i;
int discard_granularity = 0;
if (unlikely(cancel))
@@ -678,10 +689,9 @@
max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9;
number = drbd_rs_number_requests(peer_device);
- if (number <= 0)
- goto requeue;
for (i = 0; i < number; i++) {
+ bool send_buffer_ok = true;
/* Stop generating RS requests, when half of the send buffer is filled */
mutex_lock(&peer_device->connection->mutex[DATA_STREAM]);
if (transport->ops->stream_ok(transport, DATA_STREAM)) {
@@ -692,14 +702,14 @@
queued = transport_stats.send_buffer_used;
sndbuf = transport_stats.send_buffer_size;
if (queued > sndbuf / 2) {
- requeue = 1;
+ send_buffer_ok = false;
transport->ops->hint(transport, DATA_STREAM, NOSPACE);
}
} else
- requeue = 1;
+ send_buffer_ok = false;
mutex_unlock(&peer_device->connection->mutex[DATA_STREAM]);
- if (requeue)
- goto requeue;
+ if (!send_buffer_ok)
+ goto request_done;
next_sector:
size = BM_BLOCK_SIZE;
@@ -707,24 +717,22 @@
if (bit == DRBD_END_OF_BITMAP) {
device->bm_resync_fo = drbd_bm_bits(device);
- put_ldev(device);
- return 0;
+ goto request_done;
}
sector = BM_BIT_TO_SECT(bit);
if (drbd_try_rs_begin_io(peer_device, sector, true)) {
device->bm_resync_fo = bit;
- goto requeue;
+ goto request_done;
}
- device->bm_resync_fo = bit + 1;
if (unlikely(drbd_bm_test_bit(peer_device, bit) == 0)) {
+ device->bm_resync_fo = bit + 1;
drbd_rs_complete_io(peer_device, sector);
goto next_sector;
}
-#if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE
/* try to find some adjacent bits.
* we stop if we have already the maximum req size.
*
@@ -733,7 +741,7 @@
*/
align = 1;
rollback_i = i;
- while (i < number) {
+ while (i + 1 < number) {
if (size + BM_BLOCK_SIZE > max_bio_size)
break;
@@ -760,11 +768,8 @@
align++;
i++;
}
- /* if we merged some,
- * reset the offset to start the next drbd_bm_find_next from */
- if (size > BM_BLOCK_SIZE)
- device->bm_resync_fo = bit + 1;
-#endif
+ /* set the offset to start the next drbd_bm_find_next from */
+ device->bm_resync_fo = bit + 1;
/* adjust very last sectors, in case we are oddly sized */
if (sector + (size>>9) > capacity)
@@ -779,7 +784,7 @@
drbd_rs_complete_io(peer_device, sector);
device->bm_resync_fo = BM_SECT_TO_BIT(sector);
i = rollback_i;
- goto requeue;
+ goto request_done;
case 0:
/* everything ok */
break;
@@ -802,6 +807,10 @@
}
}
+request_done:
+ /* ... but do a correction, in case we had to break/goto request_done; */
+ peer_device->rs_in_flight -= (number - i) * BM_SECT_PER_BIT;
+
if (device->bm_resync_fo >= drbd_bm_bits(device)) {
/* last syncer _request_ was sent,
* but the P_RS_DATA_REPLY not yet received. sync will end (and
@@ -813,7 +822,6 @@
return 0;
}
- requeue:
peer_device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
mod_timer(&peer_device->resync_timer, jiffies + SLEEP_TIME);
put_ldev(device);
@@ -1806,6 +1814,7 @@
atomic_set(&peer_device->rs_sect_in, 0);
atomic_set(&peer_device->device->rs_sect_ev, 0); /* FIXME: ??? */
+ peer_device->rs_last_mk_req_jif = jiffies;
peer_device->rs_in_flight = 0;
peer_device->rs_last_events =
drbd_backing_bdev_events(peer_device->device->ldev->backing_bdev->bd_contains->bd_disk);