File 0931-erts-Fix-scheduler_wall_time-tests-in-statistics_SUI.patch of Package erlang

From 36e1d85230a1f4c6b8df4711bfd44706f97f0d96 Mon Sep 17 00:00:00 2001
From: Rickard Green <rickard@erlang.org>
Date: Thu, 24 Apr 2025 17:17:58 +0200
Subject: [PATCH] [erts] Fix scheduler_wall_time tests in statistics_SUITE

Failures while all dirty schedulers hogged resulted in timeout instead of
failure
---
 erts/emulator/beam/beam_debug.c         | 29 ++++++++++-------
 erts/emulator/test/statistics_SUITE.erl | 43 ++++++++++++++-----------
 2 files changed, 43 insertions(+), 29 deletions(-)

diff --git a/erts/emulator/beam/beam_debug.c b/erts/emulator/beam/beam_debug.c
index 2eb95269d9..33314badd7 100644
--- a/erts/emulator/beam/beam_debug.c
+++ b/erts/emulator/beam/beam_debug.c
@@ -1242,13 +1242,15 @@ dirty_test(Process *c_p, Eterm type, Eterm arg1, Eterm arg2, ErtsCodePtr I)
 	dirty_send_message(c_p, arg2, AM_done);
 	ERTS_BIF_PREP_RET(ret, am_ok);
     }
-    else if (ERTS_IS_ATOM_STR("alive_waitexiting", arg1)) {
+    else if (ERTS_IS_ATOM_STR("alive_waitexiting", arg1)
+             || ERTS_IS_ATOM_STR("alive_waitexitingonly", arg1)) {
 	Process *real_c_p = erts_proc_shadow2real(c_p);
 	Eterm *hp, *hp2;
 	Uint sz;
 	int i;
 	ErtsSchedulerData *esdp = erts_get_scheduler_data();
         int dirty_io = esdp->type == ERTS_SCHED_DIRTY_IO;
+        int no_wait_alloc = ERTS_IS_ATOM_STR("alive_waitexitingonly", arg1);
 
 	if (ERTS_PROC_IS_EXITING(real_c_p))
 	    goto badarg;
@@ -1262,16 +1264,21 @@ dirty_test(Process *c_p, Eterm type, Eterm arg1, Eterm arg2, ErtsCodePtr I)
                 erts_thr_yield();
         }
 
-	ms_wait(c_p, make_small(1000), 0);
-
-	/* Should still be able to allocate memory */
-	hp = HAlloc(c_p, 3); /* Likely on heap */
-	sz = 10000;
-	hp2 = HAlloc(c_p, sz); /* Likely in heap fragment */
-	*hp2 = make_pos_bignum_header(sz);
-	for (i = 1; i < sz; i++)
-	    hp2[i] = (Eterm) 4711;
-	ERTS_BIF_PREP_RET(ret, TUPLE2(hp, am_ok, make_big(hp2)));
+        if (no_wait_alloc) {
+            ERTS_BIF_PREP_RET(ret, am_ok);
+        }
+        else {
+            ms_wait(c_p, make_small(1000), 0);
+
+            /* Should still be able to allocate memory */
+            hp = HAlloc(c_p, 3); /* Likely on heap */
+            sz = 10000;
+            hp2 = HAlloc(c_p, sz); /* Likely in heap fragment */
+            *hp2 = make_pos_bignum_header(sz);
+            for (i = 1; i < sz; i++)
+                hp2[i] = (Eterm) 4711;
+            ERTS_BIF_PREP_RET(ret, TUPLE2(hp, am_ok, make_big(hp2)));
+        }
     }
     else {
     badarg:
diff --git a/erts/emulator/test/statistics_SUITE.erl b/erts/emulator/test/statistics_SUITE.erl
index 014c0a114e..41cb482b66 100644
--- a/erts/emulator/test/statistics_SUITE.erl
+++ b/erts/emulator/test/statistics_SUITE.erl
@@ -337,6 +337,12 @@ scheduler_wall_time_test(Type) ->
     end.
 
 run_scheduler_wall_time_test(Type) ->
+    %% All dirty schedulers will be hogged during a period of time
+    %% during these tests. If the testcase fails during this time, all
+    %% disc io and large gc will be blocked making the test case
+    %% timeout instead of failing, i.e., do not add stuff that can
+    %% fail the test case while all dirty schedulers are hogged...
+
     %% Should return undefined if system_flag is not turned on yet
     undefined = statistics(Type),
     %% Turn on statistics
@@ -383,7 +389,7 @@ run_scheduler_wall_time_test(Type) ->
                    end,
         StartDirtyHog = fun(Func) ->
                                 F = fun () ->
-                                            erts_debug:Func(alive_waitexiting,
+                                            erts_debug:Func(alive_waitexitingonly,
                                                             MeMySelfAndI)
                                     end,
                                 Pid = spawn_link(F),
@@ -394,8 +400,9 @@ run_scheduler_wall_time_test(Type) ->
         %% Max on one, the other schedulers empty (hopefully)
         %% Be generous the process can jump between schedulers
         %% which is ok and we don't want the test to fail for wrong reasons
-        _L1 = [S1Load|EmptyScheds1] = get_load(Type),
-        {true,_}  = {S1Load > 50,S1Load},
+        L1 = get_load(Type),
+        [High1Load|EmptyScheds1] = lists:reverse(lists:sort(L1)),
+        {true,_}  = {High1Load > 50,High1Load},
         {false,_} = {lists:any(fun(Load) -> Load > 50 end, EmptyScheds1),EmptyScheds1},
         {true,_}  = {lists:sum(EmptyScheds1) < 60,EmptyScheds1},
 
@@ -417,33 +424,33 @@ run_scheduler_wall_time_test(Type) ->
         %% 100% load. Need to take into consideration an odd number of
         %% schedulers and also special consideration for when there is
         %% only 1 scheduler
-        LastHogs = [StartHog() || _ <- lists:seq(1, (Schedulers+1) div 2),
-                                  Schedulers =/= 1],
+
         LastDirtyCPUHogs = [StartDirtyHog(dirty_cpu)
                             || _ <- lists:seq(1, (DirtyCPUSchedulers+1) div 2),
                                    DirtyCPUSchedulers =/= 1],
         LastDirtyIOHogs = [StartDirtyHog(dirty_io)
                            || _ <- lists:seq(1, (DirtyIOSchedulers+1) div 2),
                                    DirtyIOSchedulers =/= 1],
+        LastHogs = [StartHog() || _ <- lists:seq(1, (Schedulers+1) div 2),
+                                  Schedulers =/= 1],
         FullScheds = get_load(Type),
         ct:log("FullScheds: ~w",[FullScheds]),
+
+        AllHogs = [P1|HalfHogs++HalfDirtyCPUHogs++HalfDirtyIOHogs
+                   ++LastHogs++LastDirtyCPUHogs++LastDirtyIOHogs],
+
+        KillHog = fun (HP) -> unlink(HP), exit(HP, kill) end,
+        WaitKilledHog = fun (HP) -> false = is_process_alive(HP) end,
+        [KillHog(Pid) || Pid <- AllHogs],
+        [WaitKilledHog(Pid) || Pid <- AllHogs],
+        receive after 1000 -> ok end, %% Give dirty schedulers time to complete...
+
         {false,_} = {lists:any(fun(Load) -> Load < 80 end, FullScheds),FullScheds},
         FullLoad = lists:sum(FullScheds) div TotLoadSchedulers,
         if FullLoad > 90 -> ok;
            true -> exit({fullload, FullLoad})
         end,
 
-	KillHog = fun (HP) ->
-			  HPM = erlang:monitor(process, HP),
-                          unlink(HP),
-			  exit(HP, kill),
-			  receive
-			      {'DOWN', HPM, process, HP, killed} ->
-				  ok
-			  end
-		  end,
-        [KillHog(Pid) || Pid <- [P1|HalfHogs++HalfDirtyCPUHogs++HalfDirtyIOHogs
-                                 ++LastHogs++LastDirtyCPUHogs++LastDirtyIOHogs]],
         receive after 2000 -> ok end, %% Give dirty schedulers time to complete...
         AfterLoad = get_load(Type),
 	io:format("AfterLoad=~p~n", [AfterLoad]),
@@ -454,12 +461,12 @@ run_scheduler_wall_time_test(Type) ->
     end.
 
 get_load(Type) ->
+    %% Returns info for each *online* scheduler in scheduler id order
     Start = erlang:statistics(Type),
     timer:sleep(1500),
     End = erlang:statistics(Type),
 
-    lists:reverse(
-      lists:sort(load_percentage(online_statistics(Start),online_statistics(End)))).
+    load_percentage(online_statistics(Start),online_statistics(End)).
 
 %% We are only interested in schedulers that are online to remove all
 %% offline normal and dirty cpu schedulers (dirty io cannot be offline)
-- 
2.43.0
Places

File 0931-erts-Fix-scheduler_wall_time-tests-in-statistics_SUI.patch of Package erlang

Places