File 0931-erts-Fix-scheduler_wall_time-tests-in-statistics_SUI.patch of Package erlang
From 36e1d85230a1f4c6b8df4711bfd44706f97f0d96 Mon Sep 17 00:00:00 2001
From: Rickard Green <rickard@erlang.org>
Date: Thu, 24 Apr 2025 17:17:58 +0200
Subject: [PATCH] [erts] Fix scheduler_wall_time tests in statistics_SUITE
Failures while all dirty schedulers hogged resulted in timeout instead of
failure
---
erts/emulator/beam/beam_debug.c | 29 ++++++++++-------
erts/emulator/test/statistics_SUITE.erl | 43 ++++++++++++++-----------
2 files changed, 43 insertions(+), 29 deletions(-)
diff --git a/erts/emulator/beam/beam_debug.c b/erts/emulator/beam/beam_debug.c
index 2eb95269d9..33314badd7 100644
--- a/erts/emulator/beam/beam_debug.c
+++ b/erts/emulator/beam/beam_debug.c
@@ -1242,13 +1242,15 @@ dirty_test(Process *c_p, Eterm type, Eterm arg1, Eterm arg2, ErtsCodePtr I)
dirty_send_message(c_p, arg2, AM_done);
ERTS_BIF_PREP_RET(ret, am_ok);
}
- else if (ERTS_IS_ATOM_STR("alive_waitexiting", arg1)) {
+ else if (ERTS_IS_ATOM_STR("alive_waitexiting", arg1)
+ || ERTS_IS_ATOM_STR("alive_waitexitingonly", arg1)) {
Process *real_c_p = erts_proc_shadow2real(c_p);
Eterm *hp, *hp2;
Uint sz;
int i;
ErtsSchedulerData *esdp = erts_get_scheduler_data();
int dirty_io = esdp->type == ERTS_SCHED_DIRTY_IO;
+ int no_wait_alloc = ERTS_IS_ATOM_STR("alive_waitexitingonly", arg1);
if (ERTS_PROC_IS_EXITING(real_c_p))
goto badarg;
@@ -1262,16 +1264,21 @@ dirty_test(Process *c_p, Eterm type, Eterm arg1, Eterm arg2, ErtsCodePtr I)
erts_thr_yield();
}
- ms_wait(c_p, make_small(1000), 0);
-
- /* Should still be able to allocate memory */
- hp = HAlloc(c_p, 3); /* Likely on heap */
- sz = 10000;
- hp2 = HAlloc(c_p, sz); /* Likely in heap fragment */
- *hp2 = make_pos_bignum_header(sz);
- for (i = 1; i < sz; i++)
- hp2[i] = (Eterm) 4711;
- ERTS_BIF_PREP_RET(ret, TUPLE2(hp, am_ok, make_big(hp2)));
+ if (no_wait_alloc) {
+ ERTS_BIF_PREP_RET(ret, am_ok);
+ }
+ else {
+ ms_wait(c_p, make_small(1000), 0);
+
+ /* Should still be able to allocate memory */
+ hp = HAlloc(c_p, 3); /* Likely on heap */
+ sz = 10000;
+ hp2 = HAlloc(c_p, sz); /* Likely in heap fragment */
+ *hp2 = make_pos_bignum_header(sz);
+ for (i = 1; i < sz; i++)
+ hp2[i] = (Eterm) 4711;
+ ERTS_BIF_PREP_RET(ret, TUPLE2(hp, am_ok, make_big(hp2)));
+ }
}
else {
badarg:
diff --git a/erts/emulator/test/statistics_SUITE.erl b/erts/emulator/test/statistics_SUITE.erl
index 014c0a114e..41cb482b66 100644
--- a/erts/emulator/test/statistics_SUITE.erl
+++ b/erts/emulator/test/statistics_SUITE.erl
@@ -337,6 +337,12 @@ scheduler_wall_time_test(Type) ->
end.
run_scheduler_wall_time_test(Type) ->
+ %% All dirty schedulers will be hogged during a period of time
+ %% during these tests. If the testcase fails during this time, all
+ %% disc io and large gc will be blocked making the test case
+ %% timeout instead of failing, i.e., do not add stuff that can
+ %% fail the test case while all dirty schedulers are hogged...
+
%% Should return undefined if system_flag is not turned on yet
undefined = statistics(Type),
%% Turn on statistics
@@ -383,7 +389,7 @@ run_scheduler_wall_time_test(Type) ->
end,
StartDirtyHog = fun(Func) ->
F = fun () ->
- erts_debug:Func(alive_waitexiting,
+ erts_debug:Func(alive_waitexitingonly,
MeMySelfAndI)
end,
Pid = spawn_link(F),
@@ -394,8 +400,9 @@ run_scheduler_wall_time_test(Type) ->
%% Max on one, the other schedulers empty (hopefully)
%% Be generous the process can jump between schedulers
%% which is ok and we don't want the test to fail for wrong reasons
- _L1 = [S1Load|EmptyScheds1] = get_load(Type),
- {true,_} = {S1Load > 50,S1Load},
+ L1 = get_load(Type),
+ [High1Load|EmptyScheds1] = lists:reverse(lists:sort(L1)),
+ {true,_} = {High1Load > 50,High1Load},
{false,_} = {lists:any(fun(Load) -> Load > 50 end, EmptyScheds1),EmptyScheds1},
{true,_} = {lists:sum(EmptyScheds1) < 60,EmptyScheds1},
@@ -417,33 +424,33 @@ run_scheduler_wall_time_test(Type) ->
%% 100% load. Need to take into consideration an odd number of
%% schedulers and also special consideration for when there is
%% only 1 scheduler
- LastHogs = [StartHog() || _ <- lists:seq(1, (Schedulers+1) div 2),
- Schedulers =/= 1],
+
LastDirtyCPUHogs = [StartDirtyHog(dirty_cpu)
|| _ <- lists:seq(1, (DirtyCPUSchedulers+1) div 2),
DirtyCPUSchedulers =/= 1],
LastDirtyIOHogs = [StartDirtyHog(dirty_io)
|| _ <- lists:seq(1, (DirtyIOSchedulers+1) div 2),
DirtyIOSchedulers =/= 1],
+ LastHogs = [StartHog() || _ <- lists:seq(1, (Schedulers+1) div 2),
+ Schedulers =/= 1],
FullScheds = get_load(Type),
ct:log("FullScheds: ~w",[FullScheds]),
+
+ AllHogs = [P1|HalfHogs++HalfDirtyCPUHogs++HalfDirtyIOHogs
+ ++LastHogs++LastDirtyCPUHogs++LastDirtyIOHogs],
+
+ KillHog = fun (HP) -> unlink(HP), exit(HP, kill) end,
+ WaitKilledHog = fun (HP) -> false = is_process_alive(HP) end,
+ [KillHog(Pid) || Pid <- AllHogs],
+ [WaitKilledHog(Pid) || Pid <- AllHogs],
+ receive after 1000 -> ok end, %% Give dirty schedulers time to complete...
+
{false,_} = {lists:any(fun(Load) -> Load < 80 end, FullScheds),FullScheds},
FullLoad = lists:sum(FullScheds) div TotLoadSchedulers,
if FullLoad > 90 -> ok;
true -> exit({fullload, FullLoad})
end,
- KillHog = fun (HP) ->
- HPM = erlang:monitor(process, HP),
- unlink(HP),
- exit(HP, kill),
- receive
- {'DOWN', HPM, process, HP, killed} ->
- ok
- end
- end,
- [KillHog(Pid) || Pid <- [P1|HalfHogs++HalfDirtyCPUHogs++HalfDirtyIOHogs
- ++LastHogs++LastDirtyCPUHogs++LastDirtyIOHogs]],
receive after 2000 -> ok end, %% Give dirty schedulers time to complete...
AfterLoad = get_load(Type),
io:format("AfterLoad=~p~n", [AfterLoad]),
@@ -454,12 +461,12 @@ run_scheduler_wall_time_test(Type) ->
end.
get_load(Type) ->
+ %% Returns info for each *online* scheduler in scheduler id order
Start = erlang:statistics(Type),
timer:sleep(1500),
End = erlang:statistics(Type),
- lists:reverse(
- lists:sort(load_percentage(online_statistics(Start),online_statistics(End)))).
+ load_percentage(online_statistics(Start),online_statistics(End)).
%% We are only interested in schedulers that are online to remove all
%% offline normal and dirty cpu schedulers (dirty io cannot be offline)
--
2.43.0