File 6703-Use-a-queue-rather-than-a-stack-for-contended-runque.patch of Package erlang
From b39804501cd06530d64266b7e7dbc7dd5f78a037 Mon Sep 17 00:00:00 2001
From: Robin Morisset <rmorisset@meta.com>
Date: Mon, 14 Apr 2025 02:36:05 -0700
Subject: [PATCH 3/4] Use a queue rather than a stack for contended runqueues
There are two benefits:
- Being FIFO rather than LIFO, we are much more likely to find the
runqueue uncontended on the second pass (since it is a runqueue that
was found contended a lot earlier)
- We can now do several passes, rather than defaulting to a blocking
call to lock on the second pass.
There was just one small issue: the implementation of equeues did not
support passing them across function boundaries, since it used a
preprocesor macro to find the default queue (when trying to grow the
queue). I fixed that by adding a field to the queue itself.
---
erts/emulator/beam/copy.c | 2 ++
erts/emulator/beam/erl_process.c | 29 ++++++++++++-----------------
erts/emulator/beam/global.h | 6 ++++--
3 files changed, 18 insertions(+), 19 deletions(-)
diff --git a/erts/emulator/beam/copy.c b/erts/emulator/beam/copy.c
index 3eb41b33fe..4a5588248d 100644
--- a/erts/emulator/beam/copy.c
+++ b/erts/emulator/beam/copy.c
@@ -1013,6 +1013,7 @@ do { \
EQUE_DEF_QUEUE(q), /* back */ \
1, /* possibly_empty */ \
EQUE_DEF_QUEUE(q) + DEF_EQUEUE_SIZE, /* end */ \
+ EQUE_DEF_QUEUE(q), /* default */ \
ERTS_ALC_T_ESTACK /* alloc_type */ \
}
@@ -1024,6 +1025,7 @@ do { \
info->queue_start, /* back */ \
1, /* possibly_empty */ \
info->queue_end, /* end */ \
+ info->queue_default, /* default */ \
info->queue_alloc_type /* alloc_type */ \
}
diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c
index f09ff0adda..9bfbc8bd4b 100644
--- a/erts/emulator/beam/erl_process.c
+++ b/erts/emulator/beam/erl_process.c
@@ -4617,7 +4617,7 @@ no_procs:
/* Expects rq to be unlocked
rq is locked on return iff the return value is non-zero */
static ERTS_INLINE int
-check_possible_steal_victim(ErtsRunQueue *rq, int vix, Process **result_proc, ErtsWStack* contended_runqueues)
+check_possible_steal_victim(ErtsRunQueue *rq, int vix, Process **result_proc, ErtsEQueue* contended_runqueues)
{
ErtsRunQueue *vrq = ERTS_RUNQ_IX(vix);
Uint32 flags = ERTS_RUNQ_FLGS_GET(vrq);
@@ -4625,16 +4625,11 @@ check_possible_steal_victim(ErtsRunQueue *rq, int vix, Process **result_proc, Er
if (!runq_got_work_to_execute_flags(flags))
return 0;
- if (contended_runqueues) {
- if (erts_mtx_trylock(&vrq->mtx) == EBUSY) {
- WSTACK_PUSH((*contended_runqueues), vix);
- return 0;
- }
- goto lock_taken;
+ if (erts_mtx_trylock(&vrq->mtx) == EBUSY) {
+ EQUEUE_PUT((*contended_runqueues), ((Eterm) vix));
+ return 0;
}
- erts_mtx_lock(&vrq->mtx);
-lock_taken:
return try_steal_task_from_victim(rq, vrq, flags, result_proc);
}
@@ -4643,7 +4638,7 @@ try_steal_task(ErtsRunQueue *rq, Process **result_proc)
{
int res, vix, active_rqs, blnc_rqs;
Uint32 flags;
- DECLARE_WSTACK(contended_runqueues);
+ DECLARE_EQUEUE(contended_runqueues);
flags = empty_runq_get_old_flags(rq);
if (flags & ERTS_RUNQ_FLG_SUSPENDED)
@@ -4670,7 +4665,7 @@ try_steal_task(ErtsRunQueue *rq, Process **result_proc)
while (1) {
res = check_possible_steal_victim(rq, vix, result_proc, &contended_runqueues);
if (res) {
- DESTROY_WSTACK(contended_runqueues);
+ DESTROY_EQUEUE(contended_runqueues);
return res;
}
vix++;
@@ -4693,25 +4688,25 @@ try_steal_task(ErtsRunQueue *rq, Process **result_proc)
res = check_possible_steal_victim(rq, vix, result_proc, &contended_runqueues);
if (res) {
- DESTROY_WSTACK(contended_runqueues);
+ DESTROY_EQUEUE(contended_runqueues);
return res;
}
}
/* ... and finally re-try stealing from the queues that were skipped because contended.
We recheck the number of empty runqueues in each iteration, as taking the runqueue lock in check_possible_steal_victim can take quite a while. */
- while (!WSTACK_ISEMPTY(contended_runqueues)
+ while (!EQUEUE_ISEMPTY(contended_runqueues)
&& (erts_atomic32_read_acqb(&no_empty_run_queues) < blnc_rqs)) {
- vix = WSTACK_POP(contended_runqueues);
- res = check_possible_steal_victim(rq, vix, result_proc, NULL);
+ vix = (int) EQUEUE_GET(contended_runqueues);
+ res = check_possible_steal_victim(rq, vix, result_proc, &contended_runqueues);
if (res) {
- DESTROY_WSTACK(contended_runqueues);
+ DESTROY_EQUEUE(contended_runqueues);
return res;
}
}
end_try_steal_task:
- DESTROY_WSTACK(contended_runqueues);
+ DESTROY_EQUEUE(contended_runqueues);
erts_runq_lock(rq);
return runq_got_work_to_execute(rq);
}
diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h
index 15ffdb2f6f..50a6e1b52f 100644
--- a/erts/emulator/beam/global.h
+++ b/erts/emulator/beam/global.h
@@ -831,6 +831,7 @@ typedef struct {
Eterm* back;
int possibly_empty;
Eterm* end;
+ Eterm* default_equeue;
ErtsAlcType_t alloc_type;
} ErtsEQueue;
@@ -848,12 +849,13 @@ void erl_grow_equeue(ErtsEQueue*, Eterm* def_queue);
EQUE_DEF_QUEUE(q), /* back */ \
1, /* possibly_empty */ \
EQUE_DEF_QUEUE(q) + DEF_EQUEUE_SIZE, /* end */ \
+ EQUE_DEF_QUEUE(q), /* default_equeue */ \
ERTS_ALC_T_ESTACK /* alloc_type */ \
}
#define DESTROY_EQUEUE(q) \
do { \
- if (q.start != EQUE_DEF_QUEUE(q)) { \
+ if (q.start != q.default_equeue) { \
erts_free(q.alloc_type, q.start); \
} \
} while(0)
@@ -870,7 +872,7 @@ do { \
#define EQUEUE_PUT(q, x) \
do { \
if (q.back == q.front && !q.possibly_empty) { \
- erl_grow_equeue(&q, EQUE_DEF_QUEUE(q)); \
+ erl_grow_equeue(&q, q.default_equeue); \
} \
EQUEUE_PUT_UNCHECKED(q, x); \
} while(0)
--
2.51.0