File 3421-Reconnect-to-epmd.patch of Package erlang

From 180d8555d8b364b367cd3e43a4cb03e7be9642b6 Mon Sep 17 00:00:00 2001
From: Simon Cornish <zl9d97p02@sneakemail.com>
Date: Mon, 25 Jan 2021 09:43:28 -0800
Subject: [PATCH] Reconnect to epmd

When the connection to epmd is lost, try to reconnect every 2s
---
 erts/doc/src/erl_dist_protocol.xml         |  3 -
 lib/kernel/doc/src/erl_epmd.xml            |  6 +-
 lib/kernel/src/erl_epmd.erl                | 17 ++++-
 lib/kernel/test/erl_distribution_SUITE.erl | 88 ++++++++++++++++++++++
 4 files changed, 107 insertions(+), 7 deletions(-)

diff --git a/erts/doc/src/erl_dist_protocol.xml b/erts/doc/src/erl_dist_protocol.xml
index 8cb8e09615..a8ec5bbaeb 100644
--- a/erts/doc/src/erl_dist_protocol.xml
+++ b/erts/doc/src/erl_dist_protocol.xml
@@ -430,9 +430,6 @@ io:format("old/unused name ~ts at port ~p, fd = ~p ~n",
 
       <p>where n = <c>Length</c> - 1.</p>
 
-      <p>The current implementation of Erlang does not care if the connection
-        to the EPMD is broken.</p>
-
       <p>The response for a <c>STOP_REQ</c> is as follows:</p>
 
       <table align="left">
diff --git a/lib/kernel/src/erl_epmd.erl b/lib/kernel/src/erl_epmd.erl
index 7cc84b2475..96806ae3e7 100644
--- a/lib/kernel/src/erl_epmd.erl
+++ b/lib/kernel/src/erl_epmd.erl
@@ -53,13 +53,14 @@
 
 -import(lists, [reverse/1]).
 
--record(state, {socket, port_no = -1, name = ""}).
+-record(state, {socket, port_no = -1, name = "", family}).
 -type state() :: #state{}.
 
 -include("inet_int.hrl").
 -include("erl_epmd.hrl").
 
+-define(RECONNECT_TIME, 2000).
 
 %%%----------------------------------------------------------------------
 %%% API
 %%%----------------------------------------------------------------------
@@ -228,7 +230,8 @@ handle_call({register, Name, PortNo, Family}, _From, State) ->
 		{alive, Socket, Creation} ->
 		    S = State#state{socket = Socket,
 				    port_no = PortNo,
-				    name = Name},
+				    name = Name,
+				    family = Family},
 		    {reply, {ok, Creation}, S};
 		Error ->
 		    {reply, Error, State}
@@ -263,7 +266,17 @@ handle_cast(_, State) ->
 -spec handle_info(term(), state()) -> {'noreply', state()}.
 
 handle_info({tcp_closed, Socket}, State) when State#state.socket =:= Socket ->
+    erlang:send_after(?RECONNECT_TIME, self(), reconnect),
     {noreply, State#state{socket = -1}};
+handle_info(reconnect, State) when State#state.socket =:= -1 ->
+    case do_register_node(State#state.name, State#state.port_no, State#state.family) of
+	{alive, Socket, _Creation} ->
+            %% ignore the received creation
+            {noreply, State#state{socket = Socket}};
+	_Error ->
+	    erlang:send_after(?RECONNECT_TIME, self(), reconnect),
+	    {noreply, State}
+    end;
 handle_info(_, State) ->
     {noreply, State}.
 
diff --git a/lib/kernel/test/erl_distribution_SUITE.erl b/lib/kernel/test/erl_distribution_SUITE.erl
index 67faa4911c..1c7b067375 100644
--- a/lib/kernel/test/erl_distribution_SUITE.erl
+++ b/lib/kernel/test/erl_distribution_SUITE.erl
@@ -30,6 +30,7 @@
 -export([tick/1, tick_change/1,
          nodenames/1, hostnames/1,
          illegal_nodenames/1, hidden_node/1,
+         epmd_reconnect/1,
 	 setopts/1,
 	 table_waste/1, net_setuptime/1,
 	 inet_dist_options_options/1,
@@ -54,6 +55,7 @@
 	 tick_cli_test/1, tick_cli_test1/1,
 	 tick_serv_test/2, tick_serv_test1/1,
 	 run_remote_test/1,
+         epmd_reconnect_do/2,
 	 setopts_do/2,
          setopts_deadlock_test/2,
 	 keep_conn/1, time_ping/1]).
@@ -64,6 +66,8 @@
 -export([pinger/1]).
 
 -define(DUMMY_NODE,dummy@test01).
+-define(ALT_EPMD_PORT, "12321").
+-define(ALT_EPMD_CMD, "epmd -port "++?ALT_EPMD_PORT).
 
 %%-----------------------------------------------------------------
 %% The distribution is mainly tested in the big old test_suite.
@@ -82,6 +86,7 @@ all() ->
 
 all() -> 
     [tick, tick_change, nodenames, hostnames, illegal_nodenames,
+     epmd_reconnect,
      hidden_node, setopts,
      table_waste, net_setuptime, inet_dist_options_options,
      {group, monitor_nodes}].
@@ -117,9 +122,15 @@ init_per_testcase(TC, Config) when TC == hostnames;
     file:make_dir("hostnames_nodedir"),
     file:write_file("hostnames_nodedir/ignore_core_files",""),
     Config;
+init_per_testcase(epmd_reconnect, Config) ->
+    [] = os:cmd(?ALT_EPMD_CMD++" -relaxed_command_check -daemon"),
+    Config;
 init_per_testcase(Func, Config) when is_atom(Func), is_list(Config) ->
     Config.
 
+end_per_testcase(epmd_reconnect, _Config) ->
+    os:cmd(?ALT_EPMD_CMD++" -kill"),
+    ok;
 end_per_testcase(_Func, _Config) ->
     ok.
 
@@ -427,6 +438,83 @@ tick_cli_test1(Node) ->
 	    end
     end.
 
+epmd_reconnect(Config) when is_list(Config) ->
+    NodeNames = [N1,N2,N3] = get_nodenames(3, ?FUNCTION_NAME),
+    Nodes = [atom_to_list(full_node_name(NN)) || NN <- NodeNames],
+
+    DCfg = "-epmd_port "++?ALT_EPMD_PORT,
+
+    {_N1F,Port1} = start_node_unconnected(DCfg, N1, ?MODULE, run_remote_test,
+					["epmd_reconnect_do", atom_to_list(node()), "1" | Nodes]),
+    {_N2F,Port2} = start_node_unconnected(DCfg, N2, ?MODULE, run_remote_test,
+					["epmd_reconnect_do", atom_to_list(node()), "2" | Nodes]),
+    {_N3F,Port3} = start_node_unconnected(DCfg, N3, ?MODULE, run_remote_test,
+					["epmd_reconnect_do", atom_to_list(node()), "3" | Nodes]),
+    Ports = [Port1, Port2, Port3],
+
+    ok = reap_ports(Ports),
+   
+    ok.
+
+reap_ports([]) ->
+    ok;
+reap_ports(Ports) ->
+    case (receive M -> M end) of
+	{Port, Message} ->
+            case lists:member(Port, Ports) andalso Message of
+                {data,String} ->
+                    io:format("~p: ~s\n", [Port, String]),
+                    reap_ports(Ports);
+                {exit_status,0} ->
+                    reap_ports(Ports -- [Port])
+            end
+    end.
+    
+epmd_reconnect_do(_Node, ["1", Node1, Node2, Node3]) ->
+    Names = [Name || Name <- [hd(string:tokens(Node, "@")) || Node <- [Node1, Node2, Node3]]],
+    %% wait until all nodes are registered
+    ok = wait_for_names(Names),
+    "Killed" ++_ = os:cmd(?ALT_EPMD_CMD++" -kill"),
+    open_port({spawn, ?ALT_EPMD_CMD}, []),
+    %% check that all nodes reregister with epmd
+    ok = wait_for_names(Names),
+    lists:foreach(fun(Node) ->
+                          ANode = list_to_atom(Node),
+                          pong = net_adm:ping(ANode),
+                          {epmd_reconnect_do, ANode} ! {stop, Node1, Node}
+                  end, [Node2, Node3]),
+    ok;
+epmd_reconnect_do(_Node, ["2", Node1, Node2, _Node3]) ->
+    register(epmd_reconnect_do, self()),
+    receive {stop, Node1, Node2} ->
+            ok
+    after 7000 ->
+            exit(timeout)
+    end;
+epmd_reconnect_do(_Node, ["3", Node1, _Node2, Node3]) ->
+    register(epmd_reconnect_do, self()),
+    receive {stop, Node1, Node3} ->
+            ok
+    after 7000 ->
+            exit(timeout)
+    end.
+
+wait_for_names(Names) ->
+    %% wait for up to 3 seconds (the current retry timer in erl_epmd is 2s)
+    wait_for_names(lists:sort(Names), 30, 100).
+
+wait_for_names(Names, N, Wait) when N > 0 ->
+    try
+        {ok, Info} = erl_epmd:names(),
+        Names = lists:sort([Name || {Name, _Port} <- Info]),
+        ok
+    catch
+        error:{badmatch, _} ->
+            timer:sleep(Wait),
+            wait_for_names(Names, N-1, Wait)
+    end.
+
+
 setopts(Config) when is_list(Config) ->
     register(setopts_regname, self()),
     [N1,N2,N3,N4,N5] = get_nodenames(5, setopts),
-- 
2.26.2

openSUSE Build Service is sponsored by