Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
home:Ledest:erlang:19
erlang
1435-kernel-Fix-deadlock-caused-by-net_kernel-s...
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File 1435-kernel-Fix-deadlock-caused-by-net_kernel-setopts-new.patch of Package erlang
From d0ea376d289bb38cca79db112bfd165bafbe196f Mon Sep 17 00:00:00 2001 From: Rickard Green <rickard@erlang.org> Date: Thu, 11 Aug 2022 23:09:42 +0200 Subject: [PATCH] [kernel] Fix deadlock caused by net_kernel:setopts(new, _) Both pending connection owners and net_kernel could do synchronous requests towards eachother when calling net_kernel:setopts(new, _) which could end up in a deadlock. This is solved by making asynchronous requests from net_kernel instead of synchronous requests. --- lib/kernel/src/net_kernel.erl | 145 ++++++++++++++------- lib/kernel/test/erl_distribution_SUITE.erl | 42 +++++- 2 files changed, 139 insertions(+), 48 deletions(-) diff --git a/lib/kernel/src/net_kernel.erl b/lib/kernel/src/net_kernel.erl index 8dfae3a505..fcb969e0af 100644 --- a/lib/kernel/src/net_kernel.erl +++ b/lib/kernel/src/net_kernel.erl @@ -109,7 +109,8 @@ listen, %% list of #listen allowed, %% list of allowed nodes in a restricted system verbose = 0, %% level of verboseness - publish_on_nodes = undefined + publish_on_nodes = undefined, + req_map = #{} %% Map for outstanding async requests }). -record(listen, { @@ -676,36 +677,13 @@ handle_call({new_ticktime,_T,_TP}, async_reply({reply, {ongoing_change_to, T}, State}, From); handle_call({setopts, new, Opts}, From, State) -> - Ret = setopts_new(Opts, State), - async_reply({reply, Ret, State}, From); + setopts_new(Opts, From, State); handle_call({setopts, Node, Opts}, From, State) -> - Return = - case ets:lookup(sys_dist, Node) of - [Conn] when Conn#connection.state =:= up -> - case call_owner(Conn#connection.owner, {setopts, Opts}) of - {ok, Ret} -> Ret; - _ -> {error, noconnection} - end; - - _ -> - {error, noconnection} - end, - async_reply({reply, Return, State}, From); + opts_node(setopts, Node, Opts, From, State); handle_call({getopts, Node, Opts}, From, State) -> - Return = - case ets:lookup(sys_dist, Node) of - [Conn] when Conn#connection.state =:= up -> - case call_owner(Conn#connection.owner, {getopts, Opts}) of - {ok, Ret} -> Ret; - _ -> {error, noconnection} - end; - - _ -> - {error, noconnection} - end, - async_reply({reply, Return, State}, From); + opts_node(getopts, Node, Opts, From, State); handle_call(_Msg, _From, State) -> {noreply, State}. @@ -716,6 +694,14 @@ handle_info({SetupPid, {is_pending, Node {noreply, State}; %% +%% Responses to asynchronous requests we've made... +%% +handle_info({ReqId, Reply}, S) -> + handle_async_response(reply, ReqId, Reply, S); +handle_info({'DOWN', ReqId, process, _Pid, Reason}, S) -> + handle_async_response(down, ReqId, Reason, S); + +%% %% Handle different types of process terminations. %% handle_info({'EXIT', From, Reason}, State) when is_pid(From) -> @@ -1624,17 +1610,38 @@ async_gen_server_reply(From, Msg) -> ok end. -call_owner(Owner, Msg) -> - Mref = monitor(process, Owner), - Owner ! {self(), Mref, Msg}, - receive - {Mref, Reply} -> - erlang:demonitor(Mref, [flush]), - {ok, Reply}; - {'DOWN', Mref, _, _, _} -> - error +handle_async_response(ResponseType, ReqId, Result, #state{req_map = ReqMap0} = S0) -> + case maps:take(ReqId, ReqMap0) of + error -> {noreply, S0}; + {{SetGetOpts, From}, ReqMap1} when SetGetOpts == setopts; + SetGetOpts == getopts -> + ResponseType =:= down orelse erlang:demonitor(ReqId, [flush]), + Reply = case ResponseType of + reply -> Result; + down -> {error, noconnection} + end, + S1 = S0#state{req_map = ReqMap1}, + async_reply({reply, Reply, S1}, From); + {{setopts_new, Op}, ReqMap1} -> + ResponseType =:= down orelse erlang:demonitor(ReqId, [flush]), + case maps:get(Op, ReqMap1) of + {setopts_new, From, 1} -> + %% Last response for this operation... + ReqMap2 = maps:remove(Op, ReqMap1), + S1 = S0#state{req_map = ReqMap2}, + async_reply({reply, ok, S1}, From); + {setopts_new, From, N} -> + ReqMap2 = ReqMap1#{Op => {setopts_new, From, N-1}}, + S1 = S0#state{req_map = ReqMap2}, + {noreply, S1} + end end. +send_owner_request(ReqOpMap, Label, Owner, Msg) -> + ReqId = monitor(process, Owner), + Owner ! {self(), ReqId, Msg}, + ReqOpMap#{ReqId => Label}. + -spec setopts(Node, Options) -> ok | {error, Reason} | ignored when Node :: node() | new, Options :: [inet:socket_setopt()], @@ -2089,15 +2100,16 @@ call_owner(Owner, Msg) -> setopts(Node, Opts) when is_atom(Node), is_list(Opts) -> request({setopts, Node, Opts}). -setopts_new(Opts, State) -> +setopts_new(Opts, From, State) -> %% First try setopts on listening socket(s) %% Bail out on failure. %% If successful, we are pretty sure Opts are ok %% and we continue with config params and pending connections. case setopts_on_listen(Opts, State#state.listen) of ok -> - setopts_new_1(Opts); - Fail -> Fail + setopts_new_1(Opts, From, State); + Fail -> + async_reply({reply, Fail, State}, From) end. setopts_on_listen(_, []) -> ok; @@ -2110,7 +2122,7 @@ setopts_on_listen(Opts, [#listen {listen = LSocket, module = Mod} | T]) -> error:undef -> {error, enotsup} end. -setopts_new_1(Opts) -> +setopts_new_1(Opts, From, #state{req_map = ReqMap0} = State) -> ConnectOpts = case application:get_env(kernel, inet_dist_connect_options) of {ok, CO} -> CO; _ -> [] @@ -2133,13 +2145,36 @@ setopts_new_1(Opts) -> PendingConns = ets:select(sys_dist, [{'_', [{'=/=',{element,#connection.state,'$_'},up}], ['$_']}]), - lists:foreach(fun(#connection{state = pending, owner = Owner}) -> - call_owner(Owner, {setopts, Opts}); - (#connection{state = up_pending, pending_owner = Owner}) -> - call_owner(Owner, {setopts, Opts}); - (_) -> ignore - end, PendingConns), - ok. + + Op = make_ref(), + SendReq = fun (ReqMap, N, Owner) -> + {send_owner_request(ReqMap, {setopts_new, Op}, + Owner, + {setopts, Opts}), + N+1} + end, + {ReqMap1, NoReqs} = lists:foldl(fun(#connection{state = pending, + owner = Owner}, + {ReqMap, N}) -> + SendReq(ReqMap, N, Owner); + (#connection{state = up_pending, + pending_owner = Owner}, + {ReqMap, N}) -> + SendReq(ReqMap, N, Owner); + (_, Acc) -> + Acc + end, + {ReqMap0, 0}, + PendingConns), + if NoReqs == 0 -> + async_reply({reply, ok, State}, From); + true -> + %% Reply made later from handle_async_response() when + %% we've got responses from all owners that we've + %% made requests to... + ReqMap2 = ReqMap1#{Op => {setopts_new, From, NoReqs}}, + {noreply, State#state{req_map = ReqMap2}} + end. merge_opts([], B) -> B; @@ -2158,3 +2193,19 @@ merge_opts([H|T], B0) -> getopts(Node, Opts) when is_atom(Node), is_list(Opts) -> request({getopts, Node, Opts}). +opts_node(Op, Node, Opts, From, #state{req_map = ReqMap0} = S0) -> + case ets:lookup(sys_dist, Node) of + [Conn] when Conn#connection.state =:= up -> + ReqMap1 = send_owner_request(ReqMap0, + {Op, From}, + Conn#connection.owner, + {Op, Opts}), + %% Reply made later from handle_async_response() when + %% we get a response from the owner that we made the + %% request to... + S1 = S0#state{req_map = ReqMap1}, + {noreply, S1}; + _ -> + async_reply({reply, {error, noconnection}, S0}, From) + end. + diff --git a/lib/kernel/test/erl_distribution_SUITE.erl b/lib/kernel/test/erl_distribution_SUITE.erl index 1c7b067375..8a5547e3e9 100644 --- a/lib/kernel/test/erl_distribution_SUITE.erl +++ b/lib/kernel/test/erl_distribution_SUITE.erl @@ -57,6 +57,7 @@ tick_serv_test/2, tick_serv_test1/1, run_remote_test/1, setopts_do/2, + setopts_deadlock_test/2, keep_conn/1, time_ping/1]). -export([init_per_testcase/2, end_per_testcase/2]). @@ -573,7 +574,7 @@ setopts(Config) when is_list(Config) -> setopts(Config) when is_list(Config) -> register(setopts_regname, self()), - [N1,N2,N3,N4] = get_nodenames(4, setopts), + [N1,N2,N3,N4,N5] = get_nodenames(5, setopts), {_N1F,Port1} = start_node_unconnected(N1, ?MODULE, run_remote_test, ["setopts_do", atom_to_list(node()), "1", "ping"]), @@ -598,6 +599,32 @@ setopts(Config) when is_list(Config) -> wait_and_connect(LSock, N4F, Port4), 0 = wait_for_port_exit(Port4), + %% net_kernel:setopts(new, _) used to be able to produce a deadlock + %% in net_kernel. GH-6129/OTP-18198 + {N5F,Port5} = start_node_unconnected(DCfg, N5, ?MODULE, run_remote_test, + ["setopts_deadlock_test", atom_to_list(node()), + integer_to_list(LTcpPort)]), + wait_and_connect(LSock, N5F, Port5), + repeat(fun () -> + receive after 10 -> ok end, + erlang:disconnect_node(N5F), + WD = spawn_link(fun () -> + receive after 2000 -> ok end, + exit({net_kernel_probably_deadlocked, N5F}) + end), + pong = net_adm:ping(N5F), + unlink(WD), + exit(WD, kill), + false = is_process_alive(WD) + end, + 200), + try + erpc:call(N5F, erlang, halt, []) + catch + error:{erpc,noconnection} -> ok + end, + 0 = wait_for_port_exit(Port5), + ok. wait_and_connect(LSock, NodeName, NodePort) -> @@ -686,6 +713,19 @@ setopts_do(TestNode, [OptNr, ConnectData]) -> ok. +setopts_deadlock_test(_TestNode, [TcpPort]) -> + {ok, Sock} = gen_tcp:connect("localhost", list_to_integer(TcpPort), + [{active,false},{packet,2}]), + ok = gen_tcp:send(Sock, "Connect please"), + {ok, "Connect done"} = gen_tcp:recv(Sock, 0), + gen_tcp:close(Sock), + setopts_new_loop(). + +setopts_new_loop() -> + ok = net_kernel:setopts(new, [{nodelay, true}]), + receive after 10 -> ok end, + setopts_new_loop(). + opt_from_nr("1") -> {nodelay, true}; opt_from_nr("2") -> {nodelay, false}. -- 2.35.3
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor