File 1373-erts-Ensure-dist-spawn-abort-on-decode-failure.patch of Package erlang
From 71d70840d1bd7e253ec9bf6a096707725e347000 Mon Sep 17 00:00:00 2001
From: Rickard Green <rickard@erlang.org>
Date: Fri, 6 Oct 2023 23:52:35 +0200
Subject: [PATCH] [erts] Ensure dist spawn abort on decode failure
---
erts/emulator/beam/erl_process.c | 9 ++++
erts/emulator/test/distribution_SUITE.erl | 62 +++++++++++++++++++++-
erts/preloaded/ebin/erts_internal.beam | Bin 27772 -> 27952 bytes
erts/preloaded/src/erts_internal.erl | 37 +++++++------
4 files changed, 91 insertions(+), 17 deletions(-)
diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c
index 776094dfbf..f573d5863b 100644
--- a/erts/emulator/beam/erl_process.c
+++ b/erts/emulator/beam/erl_process.c
@@ -12560,15 +12560,24 @@ erl_create_process(Process* parent, /* Parent of process (default group leader).
p->u.initial.function = tp[2];
p->u.initial.arity = (Uint) unsigned_val(tp[3]);
+ ASSERT(locks & ERTS_PROC_LOCK_MAIN);
ASSERT(locks & ERTS_PROC_LOCK_MSGQ);
/*
* Pass the (on external format) encoded argument list as
* *first* message to the process. Note that this message
* *must* be first in the message queue of the newly
* spawned process!
+ *
+ * After the argument list, pass the message 'dist_spawn_init'.
+ * This makes it possible for the spawned process to detect a
+ * decode failure of the argument list. If 'dist_spawn_init'
+ * appears as first message, the decode of the argument list has
+ * failed and the process should be terminated abnormally.
*/
erts_queue_dist_message(p, locks, so->edep, so->ede_hfrag,
token, parent_id);
+ erts_queue_message(p, locks, erts_alloc_message(0, NULL),
+ am_dist_spawn_init, am_system);
erts_proc_unlock(p, locks & ERTS_PROC_LOCKS_ALL_MINOR);
diff --git a/erts/emulator/test/distribution_SUITE.erl b/erts/emulator/test/distribution_SUITE.erl
index 23a6018e69..eb2c4f2a7c 100644
--- a/erts/emulator/test/distribution_SUITE.erl
+++ b/erts/emulator/test/distribution_SUITE.erl
@@ -62,6 +62,7 @@
bad_dist_ext_control/1,
bad_dist_ext_connection_id/1,
bad_dist_ext_size/1,
+ bad_dist_ext_spawn_request_arg_list/1,
start_epmd_false/1, no_epmd/1, epmd_module/1,
bad_dist_fragments/1,
exit_dist_fragments/1,
@@ -124,7 +125,8 @@ groups() ->
{bad_dist_ext, [],
[bad_dist_ext_receive, bad_dist_ext_process_info,
bad_dist_ext_size,
- bad_dist_ext_control, bad_dist_ext_connection_id]},
+ bad_dist_ext_control, bad_dist_ext_connection_id,
+ bad_dist_ext_spawn_request_arg_list]},
{message_latency, [],
[message_latency_large_message,
message_latency_large_link_exit,
@@ -1752,6 +1754,8 @@ test_system_limit(Config) when is_list(Config) ->
-define(DOP_PAYLOAD_EXIT2_TT, 27).
-define(DOP_PAYLOAD_MONITOR_P_EXIT, 28).
+-define(DOP_SPAWN_REQUEST, 29).
+
start_monitor(Offender,P) ->
Parent = self(),
Q = spawn(Offender,
@@ -2322,6 +2326,53 @@ bad_dist_ext_size(Config) when is_list(Config) ->
stop_node(Offender),
stop_node(Victim).
+bad_dist_ext_spawn_request_arg_list(Config) when is_list(Config) ->
+ {ok, Offender} = start_node(bad_dist_spawn_request_arg_list_offender, "-connect_all false"),
+ {ok, Victim} = start_node(bad_dist_spawn_request_arg_list_victim, "-connect_all false"),
+ Parent = self(),
+ start_node_monitors([Offender,Victim]),
+ SuccessfulSpawn = make_ref(),
+ BrokenSpawn = make_ref(),
+ P = spawn_link(
+ Offender,
+ fun () ->
+ ReqId1 = make_ref(),
+ dctrl_dop_spawn_request(Victim, ReqId1, self(), group_leader(),
+ {erlang, send, 2}, [],
+ dmsg_ext([self(), SuccessfulSpawn])),
+ receive SuccessfulSpawn -> Parent ! SuccessfulSpawn end,
+ receive BrokenSpawn -> ok end,
+ ReqId2 = make_ref(),
+ dctrl_dop_spawn_request(Victim, ReqId2, self(), group_leader(),
+ {erlang, send, 2}, [],
+ dmsg_bad_atom_cache_ref()),
+ Parent ! BrokenSpawn,
+ receive after infinity -> ok end
+ end),
+ receive SuccessfulSpawn -> ok end,
+ verify_up(Offender, Victim),
+ P ! BrokenSpawn,
+ receive BrokenSpawn -> ok end,
+ verify_down(Offender, connection_closed, Victim, killed),
+ [] = erpc:call(
+ Victim,
+ fun () ->
+ lists:filter(
+ fun (Proc) ->
+ case process_info(Proc, current_function) of
+ {current_function,
+ {erts_internal, dist_spawn_init, 1}} ->
+ true;
+ _ ->
+ false
+ end
+ end,
+ processes())
+ end),
+ unlink(P),
+ stop_node(Offender),
+ stop_node(Victim),
+ ok.
bad_dist_struct_check_msgs([]) ->
receive
@@ -2385,6 +2436,15 @@ dctrl_dop_send(To, Msg) ->
dmsg_ext({?DOP_SEND, ?COOKIE, To}),
dmsg_ext(Msg)]).
+dctrl_dop_spawn_request(Node, ReqId, From, GL, MFA, OptList, ArgListExt) ->
+ %% {29, ReqId, From, GroupLeader, {Module, Function, Arity}, OptList}
+ %%
+ %% Followed by ArgList.
+ dctrl_send(ensure_dctrl(Node),
+ [dmsg_hdr(),
+ dmsg_ext({?DOP_SPAWN_REQUEST, ReqId, From, GL, MFA, OptList}),
+ ArgListExt]).
+
send_bad_structure(Offender,Victim,Bad,WhereToPutSelf) ->
send_bad_structure(Offender,Victim,Bad,WhereToPutSelf,[]).
send_bad_structure(Offender,Victim,Bad,WhereToPutSelf,PayLoad) ->
diff --git a/erts/preloaded/src/erts_internal.erl b/erts/preloaded/src/erts_internal.erl
index c7716e2740..024014bbc3 100644
--- a/erts/preloaded/src/erts_internal.erl
+++ b/erts/preloaded/src/erts_internal.erl
@@ -918,25 +918,30 @@ dist_spawn_request(_Node, _MFA, _Opts, _Type) ->
dist_spawn_init(MFA) ->
%%
- %% The argument list is passed as a message
- %% to the newly created process. This since
- %% it might be large and require a substantial
- %% amount of work to decode. This way we put
- %% this work on the newly created process
- %% (which can execute in parallel with all
- %% other tasks) instead of on the distribution
- %% channel code which is a bottleneck in the
- %% system.
- %%
- %% erl_create_process() ensures that the
- %% argument list to use in apply is
- %% guaranteed to be the first message in the
- %% message queue.
+ %% The argument list is passed as a message to the newly created process.
+ %% This since it might be large and require a substantial amount of work
+ %% to decode. This way we put this work on the newly created process
+ %% (which can execute in parallel with all other tasks) instead of on the
+ %% distribution channel code which is a bottleneck in the system.
+ %%
+ %% erl_create_process() adds two messages to the message queue. These two
+ %% messages are guaranteed to be first in the message queue. First the
+ %% argument list to use followed by a 'dist_spawn_init' message. The
+ %% 'dist_spawn_init' message makes it possible to detect decode failures
+ %% of the argument list.
%%
{M, F, _NoA} = MFA,
receive
- A ->
- erlang:apply(M, F, A)
+ A when A =/= dist_spawn_init ->
+ receive dist_spawn_init -> ok end,
+ erlang:apply(M, F, A);
+ dist_spawn_init ->
+ %% Missing argument list due to faulty encoding of the argument
+ %% list. The failed decode operation of the argument list caused
+ %% the message to be removed from the message queue and also
+ %% scheduled a take down of the connection. We, however, need to
+ % ensure that this process is terminated...
+ exit(argument_list_decode_failure)
end.
%%
--
2.35.3