File 1410-Retry-to-early-add_table_copy.patch of Package erlang

From d17819b9d053027657497b9e4692d2bbfaeed48d Mon Sep 17 00:00:00 2001
From: Dan Gudmundsson <dgud@erlang.org>
Date: Wed, 8 Nov 2023 16:03:17 +0100
Subject: [PATCH] Retry to early add_table_copy

add_table copy/3 could abort with system_limit as reason
if the "added" node was starting, i.e. alive but not merged
schema yet.

Now abort with node_not_running, which should restart the transaction
automaticly after a short sleep.

Also improve debug (verbose) printouts and saved coredump info.
---
 lib/mnesia/src/mnesia_lib.erl    |  2 +-
 lib/mnesia/src/mnesia_loader.erl |  3 +++
 lib/mnesia/src/mnesia_schema.erl |  3 ++-
 lib/mnesia/src/mnesia_tm.erl     | 30 ++++++++++++++++++++++++++----
 4 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/lib/mnesia/src/mnesia_lib.erl b/lib/mnesia/src/mnesia_lib.erl
index 4dce5cefc0..b1ca9ebf35 100644
--- a/lib/mnesia/src/mnesia_lib.erl
+++ b/lib/mnesia/src/mnesia_lib.erl
@@ -1065,7 +1065,7 @@ save2(DbgInfo) ->
     Key = {'$$$_report', current_pos},
     P =
 	case ?ets_lookup_element(mnesia_gvar, Key, 2) of
-	    30 -> -1;
+            100 -> -1;
 	    I -> I
 	end,
     set({'$$$_report', current_pos}, P+1),
diff --git a/lib/mnesia/src/mnesia_loader.erl b/lib/mnesia/src/mnesia_loader.erl
index b16d00da61..1a65dac222 100644
--- a/lib/mnesia/src/mnesia_loader.erl
+++ b/lib/mnesia/src/mnesia_loader.erl
@@ -226,6 +226,7 @@ do_get_network_copy(Tab, Reason, Ns, Storage, Cs) ->
 		    dbg_out("Table ~tp copied from ~p to ~p~n", [Tab, Node, node()]),
 		    {loaded, ok};
 		Err = {error, _} when element(1, Reason) == dumper ->
+                    verbose("Copy failed: ~tp ~p~n", [Tab, Err]),
 		    {not_loaded,Err};
 		restart ->
 		    try_net_load_table(Tab, Reason, Tail ++ [Node], Cs);
@@ -339,6 +340,7 @@ start_receiver(Tab,Storage,Cs,SenderPid,TabSize,DetsData,{dumper,{add_table_copy
     Init = table_init_fun(SenderPid, Storage),
     case do_init_table(Tab,Storage,Cs,SenderPid,TabSize,DetsData,self(), Init) of
 	Err = {error, _} ->
+            verbose("Init table failed: ~tp ~p~n", [Tab, Err]),
 	    SenderPid ! {copier_done, node()},
 	    Err;
 	Else ->
@@ -363,6 +365,7 @@ wait_on_load_complete(Pid) ->
 	{Pid, Res} ->
 	    Res;
 	{'EXIT', Pid, Reason} ->
+            verbose("Loader crashed : ~tp ~p~n", [Pid, Reason]),
 	    error(Reason);
 	Else ->
 	    Pid ! Else,
diff --git a/lib/mnesia/src/mnesia_schema.erl b/lib/mnesia/src/mnesia_schema.erl
index e0f561937e..27a7b118b5 100644
--- a/lib/mnesia/src/mnesia_schema.erl
+++ b/lib/mnesia/src/mnesia_schema.erl
@@ -2466,13 +2466,14 @@ prepare_op(Tid, {op, add_table_copy, Storage, Node, TabDef}, _WaitFor) ->
 		_  ->
 		    ok
 	    end,
-            mnesia_lib:verbose("~w:~w Adding table~n",[?MODULE,?LINE]),
 
 	    case mnesia_controller:get_network_copy(Tid, Tab, Cs) of
 		{loaded, ok} ->
                     %% Tables are created by mnesia_loader get_network code
                     insert_cstruct(Tid, Cs, true),
 		    {true, optional};
+                {not_loaded, {not_active, schema, Node}} ->
+                    mnesia:abort({node_not_running, Node});
 		{not_loaded, ErrReason} ->
 		    Reason = {system_limit, Tab, {Node, ErrReason}},
 		    mnesia:abort(Reason)
diff --git a/lib/mnesia/src/mnesia_tm.erl b/lib/mnesia/src/mnesia_tm.erl
index 5a070cf0cd..3a080e1d32 100644
--- a/lib/mnesia/src/mnesia_tm.erl
+++ b/lib/mnesia/src/mnesia_tm.erl
@@ -892,27 +892,26 @@ restart(Mod, Tid, Ts, Fun, Args, Factor0, Retries0, Type, Why) ->
 	    return_abort(Fun, Args, Why),
 	    Factor = 1,
 	    SleepTime = mnesia_lib:random_time(Factor, Tid#tid.counter),
-	    dbg_out("Restarting transaction ~w: in ~wms ~w~n", [Tid, SleepTime, Why]),
+	    log_restart("Restarting transaction ~w: in ~wms ~w~n", [Tid, SleepTime, Why]),
 	    timer:sleep(SleepTime),
 	    execute_outer(Mod, Fun, Args, Factor, Retries, Type);
 	{node_not_running, _N} ->   %% Avoids hanging in receive_release_tid_ack
 	    return_abort(Fun, Args, Why),
 	    Factor = 1,
 	    SleepTime = mnesia_lib:random_time(Factor, Tid#tid.counter),
-	    dbg_out("Restarting transaction ~w: in ~wms ~w~n", [Tid, SleepTime, Why]),
+	    log_restart("Restarting transaction ~w: in ~wms ~w~n", [Tid, SleepTime, Why]),
 	    timer:sleep(SleepTime),
 	    execute_outer(Mod, Fun, Args, Factor, Retries, Type);
 	_ ->
 	    SleepTime = mnesia_lib:random_time(Factor0, Tid#tid.counter),
 	    dbg_out("Restarting transaction ~w: in ~wms ~w~n", [Tid, SleepTime, Why]),
-
+            
 	    if
 		Factor0 /= 10 ->
 		    ignore;
 		true ->
 		    %% Our serial may be much larger than other nodes ditto
 		    AllNodes = val({current, db_nodes}),
-		    verbose("Sync serial ~p~n", [Tid]),
 		    rpc:abcast(AllNodes, ?MODULE, {sync_trans_serial, Tid})
 	    end,
 	    intercept_friends(Tid, Ts),
@@ -931,6 +930,24 @@ restart(Mod, Tid, Ts, Fun, Args, Factor0, Retries0, Type, Why) ->
 	    end
     end.
 
+log_restart(F,A) ->
+    case get(transaction_client) of
+        undefined ->
+            dbg_out(F,A);
+        _ ->
+            case get(transaction_count) of
+                undefined ->
+                    put(transaction_count, 1),
+                    verbose(F,A);
+                N when (N rem 10) == 0 ->
+                    put(transaction_count, N+1),
+                    verbose(F,A);
+                N ->
+                    put(transaction_count, N+1),
+                    dbg_out(F,A)
+            end
+    end.
+
 get_restarted(Tid) ->
     case Res = rec() of
 	{restarted, Tid} ->
@@ -2086,6 +2103,7 @@ new_cr_format(#commit{ext=Snmp}=Cr) ->
     Cr#commit{ext=[{snmp,Snmp}]}.
 
 rec_all([Node | Tail], Tid, Res, Pids) ->
+    put({?MODULE, ?FUNCTION_NAME}, {Node, Tail}),
     receive
 	{?MODULE, Node, {vote_yes, Tid}} ->
 	    rec_all(Tail, Tid, Res, Pids);
@@ -2104,8 +2122,12 @@ rec_all([Node | Tail], Tid, Res, Pids) ->
 	    Abort = {do_abort, {bad_commit, Node}},
 	    ?SAFE({?MODULE, Node} ! {Tid, Abort}),
 	    rec_all(Tail, Tid, Abort, Pids)
+    after 15000 ->
+            mnesia_lib:verbose("~p: trans ~p waiting ~p~n", [self(), Tid, Node]),
+            rec_all([Node | Tail], Tid, Res, Pids)
     end;
 rec_all([], _Tid, Res, Pids) ->
+    erase({?MODULE, ?FUNCTION_NAME}),
     {Res, Pids}.
 
 get_transactions() ->
-- 
2.35.3

openSUSE Build Service is sponsored by