File 2590-Optimize-checking-the-number-of-empty-queues-in-try_.patch of Package erlang
From 970f212e45519e997d60da96375bd7c29116ef6f Mon Sep 17 00:00:00 2001
From: Robin Morisset <rmorisset@meta.com>
Date: Tue, 12 Nov 2024 16:31:39 -0800
Subject: [PATCH 10/15] Optimize checking the number of empty queues in
try_steal_task
Currently we're checking it in every iteration in the fast loops (and
this shows in profiling), and not at all in the final loop (which is
the only one that can be slow enough to deserve it). This just fixes
this small point I had forgotten in the last commit.
---
erts/emulator/beam/erl_process.c | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c
index 7697eabb3a..8bc98e6af5 100644
--- a/erts/emulator/beam/erl_process.c
+++ b/erts/emulator/beam/erl_process.c
@@ -4659,12 +4659,15 @@ try_steal_task(ErtsRunQueue *rq, Process **result_proc)
if (active_rqs > blnc_rqs)
active_rqs = blnc_rqs;
+ if (erts_atomic32_read_acqb(&no_empty_run_queues) >= blnc_rqs)
+ goto end_try_steal_task;
+
if (rq->ix < active_rqs) {
/* First try to steal from an inactive run queue... */
if (active_rqs < blnc_rqs) {
int no = blnc_rqs - active_rqs;
int stop_ix = vix = active_rqs + rq->ix % no;
- while (erts_atomic32_read_acqb(&no_empty_run_queues) < blnc_rqs) {
+ while (1) {
res = check_possible_steal_victim(rq, vix, result_proc, &contended_runqueues);
if (res) {
DESTROY_WSTACK(contended_runqueues);
@@ -4681,7 +4684,7 @@ try_steal_task(ErtsRunQueue *rq, Process **result_proc)
vix = rq->ix;
/* ... then try to steal a job from another active queue... */
- while (erts_atomic32_read_acqb(&no_empty_run_queues) < blnc_rqs) {
+ while (1) {
vix++;
if (vix >= active_rqs)
vix = 0;
@@ -4696,7 +4699,9 @@ try_steal_task(ErtsRunQueue *rq, Process **result_proc)
}
/* ... and finally re-try stealing from the queues that were skipped because contended. */
- while (!WSTACK_ISEMPTY(contended_runqueues)) {
+ // We recheck the number of empty runqueues in each iteration, as taking the runqueue lock in check_possible_steal_victim can take quite a while.
+ while (!WSTACK_ISEMPTY(contended_runqueues)
+ && (erts_atomic32_read_acqb(&no_empty_run_queues) < blnc_rqs)) {
vix = WSTACK_POP(contended_runqueues);
res = check_possible_steal_victim(rq, vix, result_proc, NULL);
if (res) {
@@ -4706,6 +4711,7 @@ try_steal_task(ErtsRunQueue *rq, Process **result_proc)
}
}
+end_try_steal_task:
DESTROY_WSTACK(contended_runqueues);
erts_runq_lock(rq);
return runq_got_work_to_execute(rq);
--
2.43.0