File 2586-Use-the-ErtsRunQueueInfo-to-know-how-many-tasks-to-s.patch of Package erlang

From 249a5bd4e0790c234ac64e092dca037d260fa79a Mon Sep 17 00:00:00 2001
From: Robin Morisset <rmorisset@meta.com>
Date: Wed, 23 Oct 2024 03:07:38 -0700
Subject: [PATCH 06/15] Use the ErtsRunQueueInfo to know how many tasks to
 steal

Currently we steal half the tasks by alternating between taking a
process and leaving it behind. With this commit we instead compute how
many tasks to steal, and then steal that many without skipping any
non-bound processes.
This should halve the number of pointer dereferenced during task
stealing, and thus also halve the number of cache misses, making that
(highly contended) critical section faster
---
 erts/emulator/beam/erl_process.c | 38 +++++++++++++++++++-------------
 1 file changed, 23 insertions(+), 15 deletions(-)

diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c
index 6b458aa144..6dee6c0747 100644
--- a/erts/emulator/beam/erl_process.c
+++ b/erts/emulator/beam/erl_process.c
@@ -4475,7 +4475,6 @@ try_steal_task_from_victim(ErtsRunQueue *rq, ErtsRunQueue *vrq, Uint32 flags, Pr
 {
     Uint32 procs_qmask = flags & ERTS_RUNQ_FLGS_PROCS_QMASK;
     int max_prio_bit;
-    int skip_process = 0;
     ErtsRunPrioQueue *rpq;
 #define PSTACK_TYPE ErtsStolenProcess
     PSTACK_DECLARE(stolen_processes, 16);
@@ -4498,18 +4497,28 @@ try_steal_task_from_victim(ErtsRunQueue *rq, ErtsRunQueue *vrq, Uint32 flags, Pr
 	Process *proc;
         unsigned max_processes_to_steal;
         unsigned n_procs_stolen[ERTS_NO_PROC_PRIO_LEVELS];
+        unsigned prio_q;
+        ErtsRunQueueInfo *rqi;
 
 	max_prio_bit = procs_qmask & -procs_qmask;
 	switch (max_prio_bit) {
 	case MAX_BIT:
-	    rpq = &vrq->procs.prio[PRIORITY_MAX];
+            prio_q = PRIORITY_MAX;
+            rqi = &vrq->procs.prio_info[prio_q];
+            max_processes_to_steal = erts_atomic32_read_dirty(&rqi->len);
 	    break;
 	case HIGH_BIT:
-	    rpq = &vrq->procs.prio[PRIORITY_HIGH];
+            prio_q = PRIORITY_HIGH;
+            rqi = &vrq->procs.prio_info[prio_q];
+            max_processes_to_steal = erts_atomic32_read_dirty(&rqi->len);
 	    break;
 	case NORMAL_BIT:
 	case LOW_BIT:
-	    rpq = &vrq->procs.prio[PRIORITY_NORMAL];
+            prio_q = PRIORITY_NORMAL;
+            rqi = &vrq->procs.prio_info[PRIORITY_NORMAL];
+            max_processes_to_steal = erts_atomic32_read_dirty(&rqi->len);
+            rqi = &vrq->procs.prio_info[PRIORITY_LOW];
+            max_processes_to_steal += erts_atomic32_read_dirty(&rqi->len);
 	    break;
 	case 0:
 	    goto no_procs;
@@ -4519,18 +4528,20 @@ try_steal_task_from_victim(ErtsRunQueue *rq, ErtsRunQueue *vrq, Uint32 flags, Pr
 	    goto no_procs;
 	}
 
-        max_processes_to_steal = 100;
+        rpq = &vrq->procs.prio[prio_q];
+        // Steal at least one task, even if there is a single one
+        max_processes_to_steal++;
+        // Only steal half the tasks (to balance the load between the victim runqueue and this one)
+        max_processes_to_steal /= 2;
+        // Don't steal too many tasks at once, to keep the critical section from getting too long
+        max_processes_to_steal = max_processes_to_steal > 100 ? 100 : max_processes_to_steal;
         for (int i = 0; i < ERTS_NO_PROC_PRIO_LEVELS; ++i) {
             n_procs_stolen[i] = 0;
         }
 	prev_proc = NULL;
 	proc = rpq->first;
-	while (proc) {
-            // We try to steal roughly half the processes that we can steal.
-            if (skip_process) {
-                skip_process = 0;
-                prev_proc = proc;
-            } else if (erts_try_change_runq_proc(proc, rq)) {
+	while (proc && max_processes_to_steal) {
+            if (erts_try_change_runq_proc(proc, rq)) {
                 erts_aint32_t state = erts_atomic32_read_acqb(&proc->state);
                 int prio = (int) ERTS_PSFLGS_GET_PRQ_PRIO(state);
                 ErtsStolenProcess *sp = PSTACK_PUSH(stolen_processes);
@@ -4538,10 +4549,7 @@ try_steal_task_from_victim(ErtsRunQueue *rq, ErtsRunQueue *vrq, Uint32 flags, Pr
                 sp->prio = prio;
                 n_procs_stolen[prio]++;
                 unqueue_process_no_update_lengths(rpq, prev_proc, proc);
-                if (--max_processes_to_steal == 0) {
-                    break;
-                }
-                skip_process = 1;
+                --max_processes_to_steal;
             } else {
                 prev_proc = proc;
             }
-- 
2.43.0

openSUSE Build Service is sponsored by