Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
SUSE:SLE-12-SP5:Update
drbd
bsc-1197431-drbd-fix-regression-in-request-time...
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File bsc-1197431-drbd-fix-regression-in-request-timeout-handling.patch of Package drbd
From 24b44c83f246c874681876d06ab0d1c3cc33fcbf Mon Sep 17 00:00:00 2001 From: Lars Ellenberg <lars.ellenberg@linbit.com> Date: Tue, 2 Oct 2018 15:03:19 +0200 Subject: [PATCH] drbd: fix regression in request timeout handling Symptoms: Frequent messages like | We did not send a P_BARRIER for 80360848ms > ko-count (7) * timeout (60 * 0.1s); drbd kernel thread blocked? | Remote failed to finish a request within 80360852ms > ko-count (7) * timeout (60 * 0.1s) with ridiculous times (which effectively are the uptime) By introducing CONFIG_DRBD_TIMING_STATS, and disabling it by default, we deactivated the detailed request timing in 9.0.15. But still relied on those now initialized-to-zero timestamps in the request_timer_fn(). Go back to using jiffies there, for this purpose its granularity is more than good enough. --- drbd/drbd_debugfs.c | 51 ++++++++++++++++++++++++++------------------- drbd/drbd_int.h | 49 +++++++++++++++++++++++-------------------- drbd/drbd_main.c | 6 ++++-- drbd/drbd_req.c | 48 +++++++++++++++++++++++------------------- drbd/drbd_sender.c | 2 ++ 5 files changed, 88 insertions(+), 68 deletions(-) diff --git a/drbd/drbd_debugfs.c b/drbd/drbd_debugfs.c index 73160dbf..ac231f6f 100644 --- a/drbd/drbd_debugfs.c +++ b/drbd/drbd_debugfs.c @@ -123,7 +123,7 @@ static void print_one_age_or_dash(struct seq_file *m, struct drbd_request *req, } #endif -static void seq_print_one_request(struct seq_file *m, struct drbd_request *req, ktime_t now) +static void seq_print_one_request(struct seq_file *m, struct drbd_request *req, ktime_t now, unsigned long jif) { /* change anything here, fixup header below! */ unsigned int s = req->local_rq_state; @@ -134,26 +134,30 @@ static void seq_print_one_request(struct seq_file *m, struct drbd_request *req, (unsigned long long)req->i.sector, req->i.size >> 9, (s & RQ_WRITE) ? "W" : "R"); +#ifdef CONFIG_DRBD_TIMING_STATS #define RQ_HDR_2 "\tstart\tin AL\tsubmit" -#define RQ_HDR_3 "\tsent\tacked\tdone" seq_printf(m, "\t%d", (int)ktime_to_ms(ktime_sub(now, req->start_kt))); -#ifdef CONFIG_DRBD_TIMING_STATS seq_print_age_or_dash(m, s & RQ_IN_ACT_LOG, ktime_sub(now, req->in_actlog_kt)); seq_print_age_or_dash(m, s & RQ_LOCAL_PENDING, ktime_sub(now, req->pre_submit_kt)); +#define RQ_HDR_3 "\tsent\tacked\tdone" print_one_age_or_dash(m, req, RQ_NET_SENT, 0, now, offsetof(typeof(*req), pre_send_kt)); print_one_age_or_dash(m, req, RQ_NET_SENT, RQ_NET_PENDING, now, offsetof(typeof(*req), acked_kt)); print_one_age_or_dash(m, req, RQ_NET_DONE, 0, now, offsetof(typeof(*req), net_done_kt)); +#else +#define RQ_HDR_2 "\tstart" +#define RQ_HDR_3 "" + seq_printf(m, "\t%d", (int)jiffies_to_msecs(jif - req->start_jif)); #endif #define RQ_HDR_4 "\tstate\n" seq_print_request_state(m, req); } #define RQ_HDR RQ_HDR_1 RQ_HDR_2 RQ_HDR_3 RQ_HDR_4 -static void seq_print_minor_vnr_req(struct seq_file *m, struct drbd_request *req, ktime_t now) +static void seq_print_minor_vnr_req(struct seq_file *m, struct drbd_request *req, ktime_t now, unsigned long jif) { seq_printf(m, "%u\t%u\t", req->device->minor, req->device->vnr); - seq_print_one_request(m, req, now); + seq_print_one_request(m, req, now, jif); } static void seq_print_resource_pending_meta_io(struct seq_file *m, struct drbd_resource *resource, unsigned long jif) @@ -184,7 +188,7 @@ static void seq_print_resource_pending_meta_io(struct seq_file *m, struct drbd_r rcu_read_unlock(); } -static void seq_print_waiting_for_AL(struct seq_file *m, struct drbd_resource *resource, ktime_t now) +static void seq_print_waiting_for_AL(struct seq_file *m, struct drbd_resource *resource, ktime_t now, unsigned long jif) { struct drbd_device *device; int i; @@ -192,7 +196,6 @@ static void seq_print_waiting_for_AL(struct seq_file *m, struct drbd_resource *r seq_puts(m, "minor\tvnr\tage\t#waiting\n"); rcu_read_lock(); idr_for_each_entry(&resource->devices, device, i) { - ktime_t ktime; struct drbd_request *req; int n = atomic_read(&device->ap_actlog_cnt); if (n) { @@ -201,17 +204,19 @@ static void seq_print_waiting_for_AL(struct seq_file *m, struct drbd_resource *r struct drbd_request, req_pending_master_completion); /* if the oldest request does not wait for the activity log * it is not interesting for us here */ - if (req && !(req->local_rq_state & RQ_IN_ACT_LOG)) - ktime = req->start_kt; - else + if (req && (req->local_rq_state & RQ_IN_ACT_LOG)) req = NULL; spin_unlock_irq(&device->resource->req_lock); } if (n) { seq_printf(m, "%u\t%u\t", device->minor, device->vnr); - if (req) - seq_printf(m, "%d\t", (int)ktime_to_ms(ktime_sub(now, ktime))); - else + if (req) { +#ifdef CONFIG_DRBD_TIMING_STATS + seq_printf(m, "%d\t", (int)ktime_to_ms(ktime_sub(now, req->start_kt))); +#else + seq_printf(m, "%d\t", (int)jiffies_to_msecs(jif - req->start_jif)); +#endif + } else seq_puts(m, "-\t"); seq_printf(m, "%u\n", n); } @@ -345,7 +350,7 @@ static void seq_print_resource_pending_peer_requests(struct seq_file *m, static void seq_print_resource_transfer_log_summary(struct seq_file *m, struct drbd_resource *resource, struct drbd_connection *connection, - ktime_t now) + ktime_t now, unsigned long jif) { struct drbd_request *req; unsigned int count = 0; @@ -400,7 +405,7 @@ static void seq_print_resource_transfer_log_summary(struct seq_file *m, continue; show_state |= tmp; seq_printf(m, "%u\t", count); - seq_print_minor_vnr_req(m, req, now); + seq_print_minor_vnr_req(m, req, now, jif); if (show_state == 0x1f) break; } @@ -453,11 +458,11 @@ static int resource_in_flight_summary_show(struct seq_file *m, void *pos) seq_putc(m, '\n'); seq_puts(m, "application requests waiting for activity log\n"); - seq_print_waiting_for_AL(m, resource, now); + seq_print_waiting_for_AL(m, resource, now, jif); seq_putc(m, '\n'); seq_puts(m, "oldest application requests\n"); - seq_print_resource_transfer_log_summary(m, resource, connection, now); + seq_print_resource_transfer_log_summary(m, resource, connection, now, jif); seq_putc(m, '\n'); jif = jiffies - jif; @@ -760,6 +765,7 @@ static int connection_oldest_requests_show(struct seq_file *m, void *ignored) { struct drbd_connection *connection = m->private; ktime_t now = ktime_get(); + unsigned long jif = jiffies; struct drbd_request *r1, *r2; /* BUMP me if you change the file format/content/presentation */ @@ -768,15 +774,15 @@ static int connection_oldest_requests_show(struct seq_file *m, void *ignored) spin_lock_irq(&connection->resource->req_lock); r1 = connection->todo.req_next; if (r1) - seq_print_minor_vnr_req(m, r1, now); + seq_print_minor_vnr_req(m, r1, now, jif); r2 = connection->req_ack_pending; if (r2 && r2 != r1) { r1 = r2; - seq_print_minor_vnr_req(m, r1, now); + seq_print_minor_vnr_req(m, r1, now, jif); } r2 = connection->req_not_net_done; if (r2 && r2 != r1) - seq_print_minor_vnr_req(m, r2, now); + seq_print_minor_vnr_req(m, r2, now, jif); spin_unlock_irq(&connection->resource->req_lock); return 0; } @@ -1005,6 +1011,7 @@ static int device_oldest_requests_show(struct seq_file *m, void *ignored) struct drbd_device *device = m->private; struct drbd_resource *resource = device->resource; ktime_t now = ktime_get(); + unsigned long jif = jiffies; struct drbd_request *r1, *r2; int i; @@ -1020,9 +1027,9 @@ static int device_oldest_requests_show(struct seq_file *m, void *ignored) r2 = list_first_entry_or_null(&device->pending_completion[i], struct drbd_request, req_pending_local); if (r1) - seq_print_one_request(m, r1, now); + seq_print_one_request(m, r1, now, jif); if (r2 && r2 != r1) - seq_print_one_request(m, r2, now); + seq_print_one_request(m, r2, now, jif); } spin_unlock_irq(&resource->req_lock); return 0; diff --git a/drbd/drbd_int.h b/drbd/drbd_int.h index de8df2a1..231b0497 100644 --- a/drbd/drbd_int.h +++ b/drbd/drbd_int.h @@ -313,57 +313,55 @@ struct drbd_request { struct list_head req_pending_local; /* for generic IO accounting */ - ktime_t start_kt; - - /* for DRBD internal statistics */ - ktime_t pre_submit_kt; + unsigned long start_jif; - /* per connection */ - ktime_t pre_send_kt[DRBD_PEERS_MAX]; - - /* Minimal set of time stamps to determine if we wait for activity log - * transactions, local disk or peer. 32 bit "jiffies" are good enough, - * we don't expect a DRBD request to be stalled for several month. - */ + /* for request_timer_fn() */ + unsigned long pre_submit_jif; + unsigned long pre_send_jif[DRBD_PEERS_MAX]; #ifdef CONFIG_DRBD_TIMING_STATS + /* for DRBD internal statistics */ + ktime_t start_kt; + /* before actual request processing */ ktime_t in_actlog_kt; /* local disk */ + ktime_t pre_submit_kt; /* per connection */ + ktime_t pre_send_kt[DRBD_PEERS_MAX]; ktime_t acked_kt[DRBD_PEERS_MAX]; ktime_t net_done_kt[DRBD_PEERS_MAX]; #endif /* Possibly even more detail to track each phase: - * master_completion_jif + * master_completion_kt * how long did it take to complete the master bio * (application visible latency) - * allocated_jif + * allocated_kt * how long the master bio was blocked until we finally allocated * a tracking struct - * in_actlog_jif + * in_actlog_kt * how long did we wait for activity log transactions * - * net_queued_jif + * net_queued_kt * when did we finally queue it for sending - * pre_send_jif + * pre_send_kt * when did we start sending it - * post_send_jif + * post_send_kt * how long did we block in the network stack trying to send it - * acked_jif + * acked_kt * when did we receive (or fake, in protocol A) a remote ACK - * net_done_jif + * net_done_kt * when did we receive final acknowledgement (P_BARRIER_ACK), * or decide, e.g. on connection loss, that we do no longer expect * anything from this peer for this request. * - * pre_submit_jif - * post_sub_jif + * pre_submit_kt + * post_sub_kt * when did we start submiting to the lower level device, * and how long did we block in that submit function - * local_completion_jif + * local_completion_kt * how long did it take the lower level device to complete this request */ @@ -1849,7 +1847,10 @@ extern void conn_free_crypto(struct drbd_connection *connection); /* drbd_req */ extern void do_submit(struct work_struct *ws); -extern void __drbd_make_request(struct drbd_device *, struct bio *, ktime_t); +#ifndef CONFIG_DRBD_TIMING_STATS +#define __drbd_make_request(d,b,k,j) __drbd_make_request(d,b,j) +#endif +extern void __drbd_make_request(struct drbd_device *, struct bio *, ktime_t, unsigned long); extern MAKE_REQUEST_TYPE drbd_make_request(struct request_queue *q, struct bio *bio); #ifdef COMPAT_HAVE_BLK_QUEUE_MERGE_BVEC extern int drbd_merge_bvec(struct request_queue *, struct bvec_merge_data *, struct bio_vec *); @@ -2816,12 +2817,14 @@ static inline struct drbd_connection *first_connection(struct drbd_resource *res #define ktime_aggregate(D, R, M) D->M = ktime_add(D->M, ktime_sub(R->M, R->start_kt)) #define ktime_aggregate_pd(P, N, R, M) P->M = ktime_add(P->M, ktime_sub(R->M[N], R->start_kt)) #define ktime_get_accounting(V) V = ktime_get() +#define ktime_get_accounting_assign(V, T) V = T #define ktime_var_for_accounting(V) ktime_t V = ktime_get() #else #define ktime_aggregate_delta(D, ST, M) #define ktime_aggregate(D, R, M) #define ktime_aggregate_pd(P, N, R, M) #define ktime_get_accounting(V) +#define ktime_get_accounting_assign(V, T) #define ktime_var_for_accounting(V) #endif diff --git a/drbd/drbd_main.c b/drbd/drbd_main.c index 56da6be4..85f38664 100644 --- a/drbd/drbd_main.c +++ b/drbd/drbd_main.c @@ -3056,8 +3056,10 @@ static void do_retry(struct work_struct *ws) list_for_each_entry_safe(req, tmp, &writes, tl_requests) { struct drbd_device *device = req->device; struct bio *bio = req->master_bio; - ktime_t start_kt = req->start_kt; + unsigned long start_jif = req->start_jif; bool expected; + ktime_get_accounting_assign(ktime_t start_kt, req->start_kt); + expected = expect(device, atomic_read(&req->completion_ref) == 0) && @@ -3091,7 +3093,7 @@ static void do_retry(struct work_struct *ws) /* We are not just doing generic_make_request(), * as we want to keep the start_time information. */ inc_ap_bio(device, bio_data_dir(bio)); - __drbd_make_request(device, bio, start_kt); + __drbd_make_request(device, bio, start_kt, start_jif); } } diff --git a/drbd/drbd_req.c b/drbd/drbd_req.c index 6190970a..a2a6b143 100644 --- a/drbd/drbd_req.c +++ b/drbd/drbd_req.c @@ -34,14 +34,6 @@ static bool drbd_may_do_local_read(struct drbd_device *device, sector_t sector, int size); -static inline unsigned long ktime_to_jiffies(ktime_t kt) -{ - /* ktime_t starts 1st of jan 1970, 00:00 Uhr UTC, - while jiffies start a system boot. */ - ktime_t o = ktime_sub(ktime_get(), kt); - return jiffies - nsecs_to_jiffies(ktime_to_ns(o)); -} - #ifndef __disk_stat_inc /* Update disk stats at start of I/O request */ static void _drbd_start_io_acct(struct drbd_device *device, struct drbd_request *req) @@ -55,7 +47,7 @@ static void _drbd_end_io_acct(struct drbd_device *device, struct drbd_request *r { generic_end_io_acct(device->rq_queue, bio_data_dir(req->master_bio), &device->vdisk->part0, - ktime_to_jiffies(req->start_kt)); + req->start_jif); } #else static void _drbd_start_io_acct(struct drbd_device *device, struct drbd_request *req) @@ -1580,8 +1572,13 @@ static void drbd_req_in_actlog(struct drbd_request *req) * request on the submitter thread. * Returns ERR_PTR(-ENOMEM) if we cannot allocate a drbd_request. */ +#ifndef CONFIG_DRBD_TIMING_STATS +#define drbd_request_prepare(d,b,k,j) drbd_request_prepare(d,b,j) +#endif static struct drbd_request * -drbd_request_prepare(struct drbd_device *device, struct bio *bio, ktime_t start_kt) +drbd_request_prepare(struct drbd_device *device, struct bio *bio, + ktime_t start_kt, + unsigned long start_jif) { const int rw = bio_data_dir(bio); struct drbd_request *req; @@ -1599,7 +1596,8 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, ktime_t start_ if (get_ldev(device)) req_make_private_bio(req, bio); - req->start_kt = start_kt; + req->start_jif = start_jif; + ktime_get_accounting_assign(req->start_kt, start_kt); /* Update disk stats */ _drbd_start_io_acct(device, req); @@ -1833,12 +1831,14 @@ static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request list_add_tail(&req->req_pending_master_completion, &device->pending_master_completion[rw == WRITE]); if (req->private_bio) { - /* needs to be marked within the same spinlock */ + /* pre_submit_jif is used in request_timer_fn() */ + req->pre_submit_jif = jiffies; ktime_get_accounting(req->pre_submit_kt); list_add_tail(&req->req_pending_local, &device->pending_completion[rw == WRITE]); _req_mod(req, TO_BE_SUBMITTED, NULL); - /* but we need to give up the spinlock to submit */ + /* needs to be marked within the same spinlock + * but we need to give up the spinlock to submit */ submit_private_bio = true; } else if (no_remote) { nodata: @@ -1870,9 +1870,11 @@ out: complete_master_bio(device, &m); } -void __drbd_make_request(struct drbd_device *device, struct bio *bio, ktime_t start_kt) +void __drbd_make_request(struct drbd_device *device, struct bio *bio, + ktime_t start_kt, + unsigned long start_jif) { - struct drbd_request *req = drbd_request_prepare(device, bio, start_kt); + struct drbd_request *req = drbd_request_prepare(device, bio, start_kt, start_jif); if (IS_ERR_OR_NULL(req)) return; drbd_send_and_submit(device, req); @@ -2243,7 +2245,10 @@ MAKE_REQUEST_TYPE drbd_make_request(struct request_queue *q, struct bio *bio) { struct drbd_device *device = (struct drbd_device *) q->queuedata; struct drbd_resource *resource = device->resource; +#ifdef CONFIG_DRBD_TIMING_STATS ktime_t start_kt; +#endif + unsigned long start_jif; #ifdef COMPAT_NEED_MAKE_REQUEST_RECURSION struct bio_list *current_bio_list; #endif @@ -2268,10 +2273,11 @@ MAKE_REQUEST_TYPE drbd_make_request(struct request_queue *q, struct bio *bio) MAKE_REQUEST_RETURN; } - start_kt = ktime_get(); + ktime_get_accounting(start_kt); + start_jif = jiffies; inc_ap_bio(device, bio_data_dir(bio)); - __drbd_make_request(device, bio, start_kt); + __drbd_make_request(device, bio, start_kt, start_jif); #ifdef COMPAT_NEED_MAKE_REQUEST_RECURSION current->bio_list = current_bio_list; @@ -2333,7 +2339,7 @@ static bool net_timeout_reached(struct drbd_request *net_req, struct drbd_device *device = net_req->device; struct drbd_peer_device *peer_device = conn_peer_device(connection, device->vnr); int peer_node_id = peer_device->node_id; - unsigned long pre_send_jif = ktime_to_jiffies(net_req->pre_send_kt[peer_node_id]); + unsigned long pre_send_jif = net_req->pre_send_jif[peer_node_id]; if (!time_after(now, pre_send_jif + ent)) return false; @@ -2426,8 +2432,8 @@ void request_timer_fn(DRBD_TIMER_FN_ARG) req_read = list_first_entry_or_null(&device->pending_completion[0], struct drbd_request, req_pending_local); req_write = list_first_entry_or_null(&device->pending_completion[1], struct drbd_request, req_pending_local); - write_pre_submit_jif = ktime_to_jiffies(req_write->pre_submit_kt); - read_pre_submit_jif = ktime_to_jiffies(req_read->pre_submit_kt); + write_pre_submit_jif = req_write->pre_submit_jif; + read_pre_submit_jif = req_read->pre_submit_jif; oldest_submit_jif = (req_write && req_read) ? ( time_before(write_pre_submit_jif, read_pre_submit_jif) @@ -2491,7 +2497,7 @@ void request_timer_fn(DRBD_TIMER_FN_ARG) if (!timeout) continue; - pre_send_jif = ktime_to_jiffies(req->pre_send_kt[connection->peer_node_id]); + pre_send_jif = req->pre_send_jif[connection->peer_node_id]; ent = timeout * HZ/10 * ko_count; et = min_not_zero(et, ent); diff --git a/drbd/drbd_sender.c b/drbd/drbd_sender.c index 053cff9a..c925a535 100644 --- a/drbd/drbd_sender.c +++ b/drbd/drbd_sender.c @@ -2611,6 +2611,8 @@ static int process_one_request(struct drbd_connection *connection) int err; enum drbd_req_event what; + /* pre_send_jif[] is used in net_timeout_reached() */ + req->pre_send_jif[peer_device->node_id] = jiffies; ktime_get_accounting(req->pre_send_kt[peer_device->node_id]); if (drbd_req_is_write(req)) { /* If a WRITE does not expect a barrier ack, -- 2.34.1
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor