File 7821-erts-Better-testcase-for-dirty-signal-handling-race.patch of Package erlang
From 9c6c7b014d155f03b13bdb30aec1fa52eea6d993 Mon Sep 17 00:00:00 2001
From: Rickard Green <rickard@erlang.org>
Date: Thu, 12 Jan 2023 09:55:51 +0100
Subject: [PATCH] [erts] Better testcase for dirty signal handling race
---
erts/emulator/beam/erl_bif_info.c | 57 ++++++++++++
erts/emulator/beam/erl_proc_sig_queue.c | 5 +-
erts/emulator/test/signal_SUITE.erl | 117 +++++++++++++++++++++++-
3 files changed, 177 insertions(+), 2 deletions(-)
diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c
index d5e598515c..fc5039c126 100644
--- a/erts/emulator/beam/erl_bif_info.c
+++ b/erts/emulator/beam/erl_bif_info.c
@@ -4559,6 +4559,44 @@ test_multizero_timeout_in_timeout(void *vproc)
erts_start_timer_callback(0, test_multizero_timeout_in_timeout2, vproc);
}
+static Eterm
+proc_sig_block(Process *c_p, void *arg, int *redsp, ErlHeapFragment **bpp)
+{
+ ErtsMonotonicTime time, timeout_time, ms = (ErtsMonotonicTime) (Sint) arg;
+
+ if (redsp)
+ *redsp = 1;
+
+ time = erts_get_monotonic_time(NULL);
+
+ if (ms < 0)
+ timeout_time = time;
+ else
+ timeout_time = time + ERTS_MSEC_TO_MONOTONIC(ms);
+
+ while (time < timeout_time) {
+ ErtsMonotonicTime timeout = timeout_time - time;
+
+#ifdef __WIN32__
+ Sleep((DWORD) ERTS_MONOTONIC_TO_MSEC(timeout));
+#else
+ {
+ ErtsMonotonicTime to = ERTS_MONOTONIC_TO_USEC(timeout);
+ struct timeval tv;
+
+ tv.tv_sec = (long) to / (1000*1000);
+ tv.tv_usec = (long) to % (1000*1000);
+
+ select(0, NULL, NULL, NULL, &tv);
+ }
+#endif
+
+ time = erts_get_monotonic_time(NULL);
+ }
+
+ return am_ok;
+}
+
BIF_RETTYPE erts_debug_set_internal_state_2(BIF_ALIST_2)
{
/*
@@ -4928,6 +4966,25 @@ BIF_RETTYPE erts_debug_set_internal_state_2(BIF_ALIST_2)
BIF_RET(am_ok);
}
}
+ else if (ERTS_IS_ATOM_STR("proc_sig_block", BIF_ARG_1)) {
+ if (is_tuple_arity(BIF_ARG_2, 2)) {
+ Eterm *tp = tuple_val(BIF_ARG_2);
+ Sint64 time;
+ if (is_internal_pid(tp[1]) && term_to_Sint64(tp[2], &time)) {
+ ErtsMonotonicTime wait_time = time;
+ Eterm res;
+
+ res = erts_proc_sig_send_rpc_request(BIF_P,
+ tp[1],
+ 0,
+ proc_sig_block,
+ (void *) (Sint) wait_time);
+ if (is_non_value(res))
+ BIF_RET(am_false);
+ BIF_RET(am_true);
+ }
+ }
+ }
}
BIF_ERROR(BIF_P, BADARG);
diff --git a/erts/emulator/beam/erl_proc_sig_queue.c b/erts/emulator/beam/erl_proc_sig_queue.c
index c2068b768c..d1d6f7b133 100644
--- a/erts/emulator/beam/erl_proc_sig_queue.c
+++ b/erts/emulator/beam/erl_proc_sig_queue.c
@@ -7304,7 +7304,10 @@ erts_proc_sig_do_wait_dirty_handle_signals__(Process *c_p)
* * A dirty process signal handler starts handling
* signals for the process and unlocks the main
* lock while doing so. This can currently only
- * occur if handling an 'unlink' signal from a port.
+ * occur if handling an 'unlink' signal from a port, or
+ * when handling an alias message where the alias
+ * has been created when monitoring a port using
+ * '{alias, reply_demonitor}' option.
* * While the dirty process signal handler is handling
* signals for the process, the process stops executing
* dirty, gets scheduled on a normal scheduler, and
diff --git a/erts/emulator/test/signal_SUITE.erl b/erts/emulator/test/signal_SUITE.erl
index 076d9f9885..31b4d3431e 100644
--- a/erts/emulator/test/signal_SUITE.erl
+++ b/erts/emulator/test/signal_SUITE.erl
@@ -37,6 +37,7 @@
-export([xm_sig_order/1,
kill2killed/1,
contended_signal_handling/1,
+ dirty_signal_handling_race/1,
busy_dist_exit_signal/1,
busy_dist_demonitor_signal/1,
busy_dist_down_signal/1,
@@ -63,6 +64,7 @@ all() ->
[xm_sig_order,
kill2killed,
contended_signal_handling,
+ dirty_signal_handling_race,
busy_dist_exit_signal,
busy_dist_demonitor_signal,
busy_dist_down_signal,
@@ -186,7 +188,15 @@ contended_signal_handling(Config) when is_list(Config) ->
%% when the bug exists, but this testcase at least
%% sometimes causes a crash when the bug is present.
%%
+ move_dirty_signal_handlers_to_first_scheduler(),
process_flag(priority, high),
+ case erlang:system_info(schedulers_online) of
+ 1 ->
+ ok;
+ SOnln ->
+ process_flag(scheduler, SOnln),
+ ok
+ end,
Drv = unlink_signal_drv,
ok = load_driver(Config, Drv),
try
@@ -209,7 +219,7 @@ contended_signal_handling_test(Drv, N) ->
contended_signal_handling_cmd_ports([]) ->
ok;
contended_signal_handling_cmd_ports([P|Ps]) ->
- P ! {self(), {command, ""}},
+ P ! {self(), {command, "c"}},
contended_signal_handling_cmd_ports(Ps).
contended_signal_handling_make_ports(_Drv, 0, Ports) ->
@@ -219,6 +229,111 @@ contended_signal_handling_make_ports(Drv, N, Ports) ->
true = is_port(Port),
contended_signal_handling_make_ports(Drv, N-1, [Port|Ports]).
+dirty_signal_handling_race(Config) ->
+ %% This test case trigger more or less the same
+ %% problematic scenario as the contended_signal_handling
+ %% test case is trying to trigger. This test case triggers
+ %% it via another signal and is also much more likely
+ %% (close to 100%) to trigger the problematic schenario.
+ Tester = self(),
+ move_dirty_signal_handlers_to_first_scheduler(),
+ {S0, S1} = case erlang:system_info(schedulers_online) of
+ 1 -> {1, 1};
+ 2 -> {2, 1};
+ SOnln -> {SOnln, SOnln-1}
+ end,
+ process_flag(priority, high),
+ process_flag(scheduler, S0),
+ erts_debug:set_internal_state(available_internal_state, true),
+ Drv = unlink_signal_drv,
+ ok = load_driver(Config, Drv),
+ try
+ %% {parallelism, true} option will ensure that each
+ %% signal to the port from a process is scheduled which
+ %% forces the process to release its main lock when
+ %% sending the signal...
+ Port = open_port({spawn, Drv}, [{parallelism, true}]),
+ true = is_port(Port),
+ %% The {alias, reply_demonitor} option will trigger a
+ %% 'demonitor' signal from Tester to the port when an
+ %% alias message sent using the alias is received by
+ %% Tester...
+ MA1 = erlang:monitor(port, Port, [{alias, reply_demonitor}]),
+ MA2 = erlang:monitor(port, Port, [{alias, reply_demonitor}]),
+ Pid = spawn_opt(fun () ->
+ Tester ! go,
+ receive after 500 -> ok end,
+ %% The 'proc_sig_block' test signal will cause
+ %% dirty signal handling to start and be
+ %% blocked in the signal handling.
+ erts_debug:set_internal_state(proc_sig_block,
+ {Tester, 1000}),
+ %% Tester will be stuck waiting for main lock
+ %% when being scheduled out from its dirty
+ %% execution. When this alias message is
+ %% by the dirty signal handler Tester will be
+ %% able to aquire the main lock and complete
+ %% the schedule out operation.
+ MA1 ! {MA1, trigger_demonitor_port_please},
+ erts_debug:set_internal_state(proc_sig_block,
+ {Tester, 100}),
+ %% Tester will have been selected for
+ %% execution, but stuck waiting for main lock.
+ %% When this alias message is handled by the
+ %% dirty signal handler, Tester will be able
+ %% to aquire the main lock which will let it
+ %% enter the problematic scenario. That is,
+ %% ongoing dirty signal handling while it
+ %% begins executing.
+ MA2 ! {MA2, trigger_demonitor_port_please},
+ erts_debug:set_internal_state(proc_sig_block,
+ {Tester, 500}),
+ ok
+ end, [link, {scheduler, S1}]),
+ receive go -> ok end,
+ receive {'DOWN', MA1, port, Port, _} -> ct:fail(unexpected_port_down)
+ after 0 -> ok
+ end,
+ receive {'DOWN', MA2, port, Port, _} -> ct:fail(unexpected_port_down)
+ after 0 -> ok
+ end,
+ erts_debug:dirty_cpu(wait, 1000),
+ receive
+ {MA1, trigger_demonitor_port_please} -> ok
+ end,
+ receive
+ {MA2, trigger_demonitor_port_please} -> ok
+ end,
+ unlink(Pid),
+ unlink(Port),
+ exit(Pid, kill),
+ exit(Port, kill),
+ false = erlang:is_process_alive(Pid)
+ after
+ ok = erl_ddll:unload_driver(Drv)
+ end,
+ ok.
+
+move_dirty_signal_handlers_to_first_scheduler() ->
+ SOnln = erlang:system_flag(schedulers_online, 1),
+ try
+ true = lists:foldl(
+ fun (Pid, FoundOne) ->
+ case process_info(Pid, initial_call) of
+ {initial_call, {erts_dirty_process_signal_handler,start,0}} ->
+ Pid ! please_execute_a_bit,
+ true;
+ _ ->
+ FoundOne
+ end
+ end,
+ false,
+ processes())
+ after
+ erlang:system_flag(schedulers_online, SOnln)
+ end,
+ ok.
+
busy_dist_exit_signal(Config) when is_list(Config) ->
BusyTime = 1000,
{ok, BusyChannelNode} = start_node(Config),
--
2.35.3