File _service:download_files:0003-block.patch of Package linux-clear-goldmont
From 23e901a7a9c31beb27816d8f7e149752f2402c7d Mon Sep 17 00:00:00 2001
From: Peter Jung <admin@ptr1337.dev>
Date: Fri, 26 Jan 2024 11:07:04 +0100
Subject: [PATCH 3/7] block
Signed-off-by: Peter Jung <admin@ptr1337.dev>
---
block/bfq-iosched.c | 120 ++++++++++++++++++++++++++++++++++++--------
block/bfq-iosched.h | 16 +++++-
block/mq-deadline.c | 114 +++++++++++++++++++++++++++++++++--------
3 files changed, 205 insertions(+), 45 deletions(-)
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 3cce6de464a7..9bd57baa4b0b 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -467,6 +467,21 @@ static struct bfq_io_cq *bfq_bic_lookup(struct request_queue *q)
return icq;
}
+static struct bfq_io_cq *bfq_bic_try_lookup(struct request_queue *q)
+{
+ if (!current->io_context)
+ return NULL;
+ if (spin_trylock_irq(&q->queue_lock)) {
+ struct bfq_io_cq *icq;
+
+ icq = icq_to_bic(ioc_lookup_icq(q));
+ spin_unlock_irq(&q->queue_lock);
+ return icq;
+ }
+
+ return NULL;
+}
+
/*
* Scheduler run of queue, if there are requests pending and no one in the
* driver that will restart queueing.
@@ -2454,10 +2469,21 @@ static bool bfq_bio_merge(struct request_queue *q, struct bio *bio,
* returned by bfq_bic_lookup does not go away before
* bfqd->lock is taken.
*/
- struct bfq_io_cq *bic = bfq_bic_lookup(q);
+ struct bfq_io_cq *bic = bfq_bic_try_lookup(q);
bool ret;
- spin_lock_irq(&bfqd->lock);
+ /*
+ * bio merging is called for every bio queued, and it's very easy
+ * to run into contention because of that. If we fail getting
+ * the dd lock, just skip this merge attempt. For related IO, the
+ * plug will be the successful merging point. If we get here, we
+ * already failed doing the obvious merge. Chances of actually
+ * getting a merge off this path is a lot slimmer, so skipping an
+ * occassional lookup that will most likely not succeed anyway should
+ * not be a problem.
+ */
+ if (!spin_trylock_irq(&bfqd->lock))
+ return false;
if (bic) {
/*
@@ -5148,6 +5174,10 @@ static bool bfq_has_work(struct blk_mq_hw_ctx *hctx)
{
struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
+ if (!list_empty_careful(&bfqd->at_head) ||
+ !list_empty_careful(&bfqd->at_tail))
+ return true;
+
/*
* Avoiding lock: a race on bfqd->queued should cause at
* most a call to dispatch for nothing
@@ -5297,15 +5327,61 @@ static inline void bfq_update_dispatch_stats(struct request_queue *q,
bool idle_timer_disabled) {}
#endif /* CONFIG_BFQ_CGROUP_DEBUG */
+static void bfq_insert_request(struct request_queue *q, struct request *rq,
+ blk_insert_t flags, struct list_head *free);
+
+static void __bfq_do_insert(struct request_queue *q, blk_insert_t flags,
+ struct list_head *list, struct list_head *free)
+{
+ while (!list_empty(list)) {
+ struct request *rq;
+
+ rq = list_first_entry(list, struct request, queuelist);
+ list_del_init(&rq->queuelist);
+ bfq_insert_request(q, rq, flags, free);
+ }
+}
+
+static void bfq_do_insert(struct request_queue *q, struct list_head *free)
+{
+ struct bfq_data *bfqd = q->elevator->elevator_data;
+ LIST_HEAD(at_head);
+ LIST_HEAD(at_tail);
+
+ spin_lock(&bfqd->insert_lock);
+ list_splice_init(&bfqd->at_head, &at_head);
+ list_splice_init(&bfqd->at_tail, &at_tail);
+ spin_unlock(&bfqd->insert_lock);
+
+ __bfq_do_insert(q, BLK_MQ_INSERT_AT_HEAD, &at_head, free);
+ __bfq_do_insert(q, 0, &at_tail, free);
+}
+
static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
{
- struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
+ struct request_queue *q = hctx->queue;
+ struct bfq_data *bfqd = q->elevator->elevator_data;
struct request *rq;
struct bfq_queue *in_serv_queue;
bool waiting_rq, idle_timer_disabled = false;
+ LIST_HEAD(free);
+
+ /*
+ * If someone else is already dispatching, skip this one. This will
+ * defer the next dispatch event to when something completes, and could
+ * potentially lower the queue depth for contended cases.
+ *
+ * See the logic in blk_mq_do_dispatch_sched(), which loops and
+ * retries if nothing is dispatched.
+ */
+ if (test_bit(BFQ_DISPATCHING, &bfqd->run_state) ||
+ test_and_set_bit_lock(BFQ_DISPATCHING, &bfqd->run_state))
+ return NULL;
spin_lock_irq(&bfqd->lock);
+ bfq_do_insert(hctx->queue, &free);
+
in_serv_queue = bfqd->in_service_queue;
waiting_rq = in_serv_queue && bfq_bfqq_wait_request(in_serv_queue);
@@ -5315,7 +5391,9 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
waiting_rq && !bfq_bfqq_wait_request(in_serv_queue);
}
+ clear_bit_unlock(BFQ_DISPATCHING, &bfqd->run_state);
spin_unlock_irq(&bfqd->lock);
+ blk_mq_free_requests(&free);
bfq_update_dispatch_stats(hctx->queue, rq,
idle_timer_disabled ? in_serv_queue : NULL,
idle_timer_disabled);
@@ -6236,27 +6314,21 @@ static inline void bfq_update_insert_stats(struct request_queue *q,
static struct bfq_queue *bfq_init_rq(struct request *rq);
-static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
- blk_insert_t flags)
+static void bfq_insert_request(struct request_queue *q, struct request *rq,
+ blk_insert_t flags, struct list_head *free)
{
- struct request_queue *q = hctx->queue;
struct bfq_data *bfqd = q->elevator->elevator_data;
struct bfq_queue *bfqq;
bool idle_timer_disabled = false;
blk_opf_t cmd_flags;
- LIST_HEAD(free);
#ifdef CONFIG_BFQ_GROUP_IOSCHED
if (!cgroup_subsys_on_dfl(io_cgrp_subsys) && rq->bio)
bfqg_stats_update_legacy_io(q, rq);
#endif
- spin_lock_irq(&bfqd->lock);
bfqq = bfq_init_rq(rq);
- if (blk_mq_sched_try_insert_merge(q, rq, &free)) {
- spin_unlock_irq(&bfqd->lock);
- blk_mq_free_requests(&free);
+ if (blk_mq_sched_try_insert_merge(q, rq, free))
return;
- }
trace_block_rq_insert(rq);
@@ -6286,8 +6358,6 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
* merge).
*/
cmd_flags = rq->cmd_flags;
- spin_unlock_irq(&bfqd->lock);
-
bfq_update_insert_stats(q, bfqq, idle_timer_disabled,
cmd_flags);
}
@@ -6296,13 +6366,15 @@ static void bfq_insert_requests(struct blk_mq_hw_ctx *hctx,
struct list_head *list,
blk_insert_t flags)
{
- while (!list_empty(list)) {
- struct request *rq;
+ struct request_queue *q = hctx->queue;
+ struct bfq_data *bfqd = q->elevator->elevator_data;
- rq = list_first_entry(list, struct request, queuelist);
- list_del_init(&rq->queuelist);
- bfq_insert_request(hctx, rq, flags);
- }
+ spin_lock_irq(&bfqd->insert_lock);
+ if (flags & BLK_MQ_INSERT_AT_HEAD)
+ list_splice_init(list, &bfqd->at_head);
+ else
+ list_splice_init(list, &bfqd->at_tail);
+ spin_unlock_irq(&bfqd->insert_lock);
}
static void bfq_update_hw_tag(struct bfq_data *bfqd)
@@ -7211,6 +7283,12 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
q->elevator = eq;
spin_unlock_irq(&q->queue_lock);
+ spin_lock_init(&bfqd->lock);
+ spin_lock_init(&bfqd->insert_lock);
+
+ INIT_LIST_HEAD(&bfqd->at_head);
+ INIT_LIST_HEAD(&bfqd->at_tail);
+
/*
* Our fallback bfqq if bfq_find_alloc_queue() runs into OOM issues.
* Grab a permanent reference to it, so that the normal code flow
@@ -7329,8 +7407,6 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
/* see comments on the definition of next field inside bfq_data */
bfqd->actuator_load_threshold = 4;
- spin_lock_init(&bfqd->lock);
-
/*
* The invocation of the next bfq_create_group_hierarchy
* function is the head of a chain of function calls
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 467e8cfc41a2..f44f5d4ec2f4 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -504,12 +504,26 @@ struct bfq_io_cq {
unsigned int requests; /* Number of requests this process has in flight */
};
+enum {
+ BFQ_DISPATCHING = 0,
+};
+
/**
* struct bfq_data - per-device data structure.
*
* All the fields are protected by @lock.
*/
struct bfq_data {
+ struct {
+ spinlock_t lock;
+ spinlock_t insert_lock;
+ } ____cacheline_aligned_in_smp;
+
+ unsigned long run_state;
+
+ struct list_head at_head;
+ struct list_head at_tail;
+
/* device request queue */
struct request_queue *queue;
/* dispatch queue */
@@ -795,8 +809,6 @@ struct bfq_data {
/* fallback dummy bfqq for extreme OOM conditions */
struct bfq_queue oom_bfqq;
- spinlock_t lock;
-
/*
* bic associated with the task issuing current bio for
* merging. This and the next field are used as a support to
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index f958e79277b8..1b0de4fc3958 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -79,10 +79,24 @@ struct dd_per_prio {
struct io_stats_per_prio stats;
};
+enum {
+ DD_DISPATCHING = 0,
+};
+
struct deadline_data {
/*
* run time data
*/
+ struct {
+ spinlock_t lock;
+ spinlock_t insert_lock;
+ spinlock_t zone_lock;
+ } ____cacheline_aligned_in_smp;
+
+ unsigned long run_state;
+
+ struct list_head at_head;
+ struct list_head at_tail;
struct dd_per_prio per_prio[DD_PRIO_COUNT];
@@ -100,9 +114,6 @@ struct deadline_data {
int front_merges;
u32 async_depth;
int prio_aging_expire;
-
- spinlock_t lock;
- spinlock_t zone_lock;
};
/* Maps an I/O priority class to a deadline scheduler priority. */
@@ -113,6 +124,9 @@ static const enum dd_prio ioprio_class_to_prio[] = {
[IOPRIO_CLASS_IDLE] = DD_IDLE_PRIO,
};
+static void dd_insert_request(struct request_queue *q, struct request *rq,
+ blk_insert_t flags, struct list_head *free);
+
static inline struct rb_root *
deadline_rb_root(struct dd_per_prio *per_prio, struct request *rq)
{
@@ -585,6 +599,33 @@ static struct request *dd_dispatch_prio_aged_requests(struct deadline_data *dd,
return NULL;
}
+static void __dd_do_insert(struct request_queue *q, blk_insert_t flags,
+ struct list_head *list, struct list_head *free)
+{
+ while (!list_empty(list)) {
+ struct request *rq;
+
+ rq = list_first_entry(list, struct request, queuelist);
+ list_del_init(&rq->queuelist);
+ dd_insert_request(q, rq, flags, free);
+ }
+}
+
+static void dd_do_insert(struct request_queue *q, struct list_head *free)
+{
+ struct deadline_data *dd = q->elevator->elevator_data;
+ LIST_HEAD(at_head);
+ LIST_HEAD(at_tail);
+
+ spin_lock(&dd->insert_lock);
+ list_splice_init(&dd->at_head, &at_head);
+ list_splice_init(&dd->at_tail, &at_tail);
+ spin_unlock(&dd->insert_lock);
+
+ __dd_do_insert(q, BLK_MQ_INSERT_AT_HEAD, &at_head, free);
+ __dd_do_insert(q, 0, &at_tail, free);
+}
+
/*
* Called from blk_mq_run_hw_queue() -> __blk_mq_sched_dispatch_requests().
*
@@ -595,12 +636,27 @@ static struct request *dd_dispatch_prio_aged_requests(struct deadline_data *dd,
*/
static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
{
- struct deadline_data *dd = hctx->queue->elevator->elevator_data;
+ struct request_queue *q = hctx->queue;
+ struct deadline_data *dd = q->elevator->elevator_data;
const unsigned long now = jiffies;
struct request *rq;
enum dd_prio prio;
+ LIST_HEAD(free);
+
+ /*
+ * If someone else is already dispatching, skip this one. This will
+ * defer the next dispatch event to when something completes, and could
+ * potentially lower the queue depth for contended cases.
+ *
+ * See the logic in blk_mq_do_dispatch_sched(), which loops and
+ * retries if nothing is dispatched.
+ */
+ if (test_bit(DD_DISPATCHING, &dd->run_state) ||
+ test_and_set_bit_lock(DD_DISPATCHING, &dd->run_state))
+ return NULL;
spin_lock(&dd->lock);
+ dd_do_insert(q, &free);
rq = dd_dispatch_prio_aged_requests(dd, now);
if (rq)
goto unlock;
@@ -616,8 +672,10 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
}
unlock:
+ clear_bit_unlock(DD_DISPATCHING, &dd->run_state);
spin_unlock(&dd->lock);
+ blk_mq_free_requests(&free);
return rq;
}
@@ -706,6 +764,13 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
eq->elevator_data = dd;
+ spin_lock_init(&dd->lock);
+ spin_lock_init(&dd->insert_lock);
+ spin_lock_init(&dd->zone_lock);
+
+ INIT_LIST_HEAD(&dd->at_head);
+ INIT_LIST_HEAD(&dd->at_tail);
+
for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
struct dd_per_prio *per_prio = &dd->per_prio[prio];
@@ -722,8 +787,6 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
dd->last_dir = DD_WRITE;
dd->fifo_batch = fifo_batch;
dd->prio_aging_expire = prio_aging_expire;
- spin_lock_init(&dd->lock);
- spin_lock_init(&dd->zone_lock);
/* We dispatch from request queue wide instead of hw queue */
blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q);
@@ -779,7 +842,19 @@ static bool dd_bio_merge(struct request_queue *q, struct bio *bio,
struct request *free = NULL;
bool ret;
- spin_lock(&dd->lock);
+ /*
+ * bio merging is called for every bio queued, and it's very easy
+ * to run into contention because of that. If we fail getting
+ * the dd lock, just skip this merge attempt. For related IO, the
+ * plug will be the successful merging point. If we get here, we
+ * already failed doing the obvious merge. Chances of actually
+ * getting a merge off this path is a lot slimmer, so skipping an
+ * occassional lookup that will most likely not succeed anyway should
+ * not be a problem.
+ */
+ if (!spin_trylock(&dd->lock))
+ return false;
+
ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free);
spin_unlock(&dd->lock);
@@ -792,10 +867,9 @@ static bool dd_bio_merge(struct request_queue *q, struct bio *bio,
/*
* add rq to rbtree and fifo
*/
-static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
+static void dd_insert_request(struct request_queue *q, struct request *rq,
blk_insert_t flags, struct list_head *free)
{
- struct request_queue *q = hctx->queue;
struct deadline_data *dd = q->elevator->elevator_data;
const enum dd_data_dir data_dir = rq_data_dir(rq);
u16 ioprio = req_get_ioprio(rq);
@@ -867,19 +941,13 @@ static void dd_insert_requests(struct blk_mq_hw_ctx *hctx,
{
struct request_queue *q = hctx->queue;
struct deadline_data *dd = q->elevator->elevator_data;
- LIST_HEAD(free);
- spin_lock(&dd->lock);
- while (!list_empty(list)) {
- struct request *rq;
-
- rq = list_first_entry(list, struct request, queuelist);
- list_del_init(&rq->queuelist);
- dd_insert_request(hctx, rq, flags, &free);
- }
- spin_unlock(&dd->lock);
-
- blk_mq_free_requests(&free);
+ spin_lock(&dd->insert_lock);
+ if (flags & BLK_MQ_INSERT_AT_HEAD)
+ list_splice_init(list, &dd->at_head);
+ else
+ list_splice_init(list, &dd->at_tail);
+ spin_unlock(&dd->insert_lock);
}
/* Callback from inside blk_mq_rq_ctx_init(). */
@@ -958,6 +1026,10 @@ static bool dd_has_work(struct blk_mq_hw_ctx *hctx)
struct deadline_data *dd = hctx->queue->elevator->elevator_data;
enum dd_prio prio;
+ if (!list_empty_careful(&dd->at_head) ||
+ !list_empty_careful(&dd->at_tail))
+ return true;
+
for (prio = 0; prio <= DD_PRIO_MAX; prio++)
if (dd_has_work_for_prio(&dd->per_prio[prio]))
return true;
--
2.44.0