File 0463-mnesia-Fix-deadlock-caused-by-add_table_copy.patch of Package erlang

From 2bf7b5b5abbc1097a0b30bea2d390a2a0bec004c Mon Sep 17 00:00:00 2001
From: Dan Gudmundsson <dgud@erlang.org>
Date: Fri, 28 Jun 2019 13:13:13 +0200
Subject: [PATCH] mnesia: Fix deadlock caused by add_table_copy

If add_table_copy was called when a node was starting it deadlock
waiting for mnesia_controller, when schema was not merged.

Abort if that is the case.
---
 lib/mnesia/src/mnesia_controller.erl      | 81 +++++++++++++++++--------------
 lib/mnesia/src/mnesia_schema.erl          |  2 +-
 lib/mnesia/test/mnesia_isolation_test.erl | 44 ++++++++++++++---
 3 files changed, 82 insertions(+), 45 deletions(-)

diff --git a/lib/mnesia/src/mnesia_controller.erl b/lib/mnesia/src/mnesia_controller.erl
index 882de0d613..0f221b0c1f 100644
--- a/lib/mnesia/src/mnesia_controller.erl
+++ b/lib/mnesia/src/mnesia_controller.erl
@@ -331,35 +331,39 @@ release_schema_commit_lock() ->
 
 %% Special for preparation of add table copy
 get_network_copy(Tid, Tab, Cs) ->
-%   We can't let the controller queue this one
-%   because that may cause a deadlock between schema_operations
-%   and initial tableloadings which both takes schema locks.
-%   But we have to get copier_done msgs when the other side
-%   goes down.
-    call({add_other, self()}),
-    Reason = {dumper,{add_table_copy, Tid}},
-    Work = #net_load{table = Tab,reason = Reason,cstruct = Cs},
-    %% I'll need this cause it's linked trough the subscriber
-    %% might be solved by using monitor in subscr instead.
-    process_flag(trap_exit, true),
-    Load = load_table_fun(Work),
-    Res = ?CATCH(Load()),
-    process_flag(trap_exit, false),
-    call({del_other, self()}),
-    case Res of
- 	#loader_done{is_loaded = true} ->
- 	    Tab = Res#loader_done.table_name,
- 	    case Res#loader_done.needs_announce of
- 		true ->
- 		    i_have_tab(Tab);
- 		false ->
- 		    ignore
- 	    end,
- 	    Res#loader_done.reply;
-	#loader_done{} ->
- 	    Res#loader_done.reply;
- 	Else ->
- 	    {not_loaded, Else}
+    %%   We can't let the controller queue this one
+    %%   because that may cause a deadlock between schema_operations
+    %%   and initial tableloadings which both takes schema locks.
+    %%   But we have to get copier_done msgs when the other side
+    %%   goes down.
+    case call({add_other, self()}) of
+        ok ->
+            Reason = {dumper,{add_table_copy, Tid}},
+            Work = #net_load{table = Tab,reason = Reason,cstruct = Cs},
+            %% I'll need this cause it's linked trough the subscriber
+            %% might be solved by using monitor in subscr instead.
+            process_flag(trap_exit, true),
+            Load = load_table_fun(Work),
+            Res = ?CATCH(Load()),
+            process_flag(trap_exit, false),
+            call({del_other, self()}),
+            case Res of
+                #loader_done{is_loaded = true} ->
+                    Tab = Res#loader_done.table_name,
+                    case Res#loader_done.needs_announce of
+                        true ->
+                            i_have_tab(Tab);
+                        false ->
+                            ignore
+                    end,
+                    Res#loader_done.reply;
+                #loader_done{} ->
+                    Res#loader_done.reply;
+                Else ->
+                    {not_loaded, Else}
+            end;
+        {error, Else} ->
+            {not_loaded, Else}
     end.
 
 %% This functions is invoked from the dumper
@@ -772,6 +776,18 @@ handle_call({unannounce_add_table_copy, [Tab, Node], From}, ReplyTo, State) ->
 	end,
     noreply(State2);
 
+handle_call({add_other, Who}, _From, State = #state{others=Others0, schema_is_merged=SM}) ->
+    case SM of
+        true ->
+            Others = [Who|Others0],
+            {reply, ok, State#state{others=Others}};
+        false ->
+            {reply, {error, {not_active,schema,node()}}, State}
+    end;
+handle_call({del_other, Who}, _From, State = #state{others=Others0}) ->
+    Others = lists:delete(Who, Others0),
+    {reply, ok, State#state{others=Others}};
+
 handle_call(Msg, From, State) when State#state.schema_is_merged /= true ->
     %% Buffer early messages
     Msgs = State#state.early_msgs,
@@ -803,13 +819,6 @@ handle_call({block_table, [Tab], From}, _Dummy, State) ->
 handle_call({check_w2r, _Node, Tab}, _From, State) ->
     {reply, val({Tab, where_to_read}), State};
 
-handle_call({add_other, Who}, _From, State = #state{others=Others0}) ->
-    Others = [Who|Others0],
-    {reply, ok, State#state{others=Others}};
-handle_call({del_other, Who}, _From, State = #state{others=Others0}) ->
-    Others = lists:delete(Who, Others0),
-    {reply, ok, State#state{others=Others}};
-
 handle_call(Msg, _From, State) ->
     error("~p got unexpected call: ~p~n", [?SERVER_NAME, Msg]),
     noreply(State).
diff --git a/lib/mnesia/src/mnesia_schema.erl b/lib/mnesia/src/mnesia_schema.erl
index ef38adca1e..4a86aeb375 100644
--- a/lib/mnesia/src/mnesia_schema.erl
+++ b/lib/mnesia/src/mnesia_schema.erl
@@ -697,7 +697,7 @@ schema_coordinator(Client, _Fun, undefined) ->
 schema_coordinator(Client, Fun, Controller) when is_pid(Controller) ->
     %% Do not trap exit in order to automatically die
     %% when the controller dies
-
+    put(transaction_client, Client), %% debug
     link(Controller),
     unlink(Client),
 
diff --git a/lib/mnesia/test/mnesia_isolation_test.erl b/lib/mnesia/test/mnesia_isolation_test.erl
index 49bcec14af..98a7840b72 100644
--- a/lib/mnesia/test/mnesia_isolation_test.erl
+++ b/lib/mnesia/test/mnesia_isolation_test.erl
@@ -1036,29 +1036,57 @@ add_table_copy(Config) when is_list(Config) ->
     Def = [{ram_copies, [ThisNode]}, {attributes, [key, attr1, attr2]}],
     ?match({atomic, ok}, mnesia:create_table(Tab, Def)),
     insert(Tab, 50),
-    {success, [A]} = ?start_activities([ThisNode]), 
+    {success, [A]} = ?start_activities([ThisNode]),
     mnesia_test_lib:start_sync_transactions([A], 0),
 
     A ! fun() -> mnesia:write({Tab, 1, 1, updated}) end,
     ?match_receive({A, ok}),   %% A is executed
 
-    Pid = spawn_link(?MODULE, op, [self(), mnesia, add_table_copy, 
+    Pid = spawn_link(?MODULE, op, [self(), mnesia, add_table_copy,
 				   [Tab, Node2, ram_copies]]),
-   
+
     ?match_receive(timeout),   %% op waits for locks occupied by A
 
     A ! end_trans,             %% Kill A, locks should be released
-    ?match_receive({A,{atomic,end_trans}}),     
-    
-    receive 
+    ?match_receive({A,{atomic,end_trans}}),
+
+    receive
 	Msg -> ?match({Pid, {atomic, ok}}, Msg)
     after
 	timer:seconds(20) -> ?error("Operation timed out", [])
     end,
+    ?match_receive({'EXIT', Pid, normal}),
 
     sys:get_status(whereis(mnesia_locker)), % Explicit sync, release locks is async
-    ?match([], mnesia:system_info(held_locks)), 
-    ?match([], mnesia:system_info(lock_queue)), 
+    ?match([], mnesia:system_info(held_locks)),
+    ?match([], mnesia:system_info(lock_queue)),
+
+    {atomic, ok} = mnesia:del_table_copy(Tab, Node2),
+    Self = self(),
+    New = spawn_link(Node2,
+                     fun () ->
+                             application:stop(mnesia),
+                             Self ! {self(), ok},
+                             io:format(user, "restart mnesia~n", []),
+                             Self ! {self(), catch application:start(mnesia)}
+                     end),
+    receive {New,ok} -> ok end,
+
+    Add = fun Add() ->
+                  case mnesia:add_table_copy(Tab, Node2, disc_copies) of
+                      {atomic, ok} -> ok;
+                      _R -> io:format(user, "aborted with reason ~p~n", [_R]),
+                            timer:sleep(10),
+                            Add()
+                  end
+          end,
+
+    ?match(ok, Add()),
+    ?match_receive({New,ok}),
+
+    sys:get_status(whereis(mnesia_locker)), % Explicit sync, release locks is async
+    ?match([], mnesia:system_info(held_locks)),
+    ?match([], mnesia:system_info(lock_queue)),
     ok.
 
 del_table_copy(suite) -> [];
-- 
2.16.4

openSUSE Build Service is sponsored by