Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
home:Ledest:erlang:19
erlang
1217-kernel-Fix-a-race-condition-in-Global.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File 1217-kernel-Fix-a-race-condition-in-Global.patch of Package erlang
From f2bd4a073961b4788a3b0a4e265b840a20f453eb Mon Sep 17 00:00:00 2001 From: Hans Bolinder <hasse@erlang.org> Date: Wed, 5 May 2021 07:41:59 +0200 Subject: [PATCH] kernel: Fix a race condition in Global See also GH-4448 and GH-3923. A race between locker processes on different nodes has been resolved by using global_name_server as a proxy. --- lib/kernel/src/global.erl | 60 ++++++++++++++++++++++++-------- lib/kernel/test/global_SUITE.erl | 26 ++++++++++---- 2 files changed, 66 insertions(+), 20 deletions(-) diff --git a/lib/kernel/src/global.erl b/lib/kernel/src/global.erl index 3875074d74..491f60688a 100644 --- a/lib/kernel/src/global.erl +++ b/lib/kernel/src/global.erl @@ -80,10 +80,12 @@ %% when communicating with vsn 3 nodes. (-R10B) %% Vsn 5 uses an ordered list of self() and HisTheLocker when locking %% nodes in the own partition. (R11B-) +%% Vsn 6 does not send any message between locker processes on different +%% nodes, but uses the server as a proxy. %% Current version of global does not support vsn 4 or earlier. --define(vsn, 5). +-define(vsn, 6). %%----------------------------------------------------------------- %% connect_all = boolean() - true if we are supposed to set up a @@ -156,6 +158,8 @@ %%% The new_nodes messages has been augmented with the global lock id. %%% %%% R14A (OTP-8527): The deleter process has been removed. +%%% +%%% Erlang/OTP 22.0: The extra process calling erlang:monitor() is removed. start() -> gen_server:start({local, global_name_server}, ?MODULE, [], []). @@ -567,17 +571,24 @@ init([]) -> %% {exchange, Node, ListOfNames, _ListOfNamesExt, Tag} %% {resolved, Node, HisOps, HisKnown, _Unused, ListOfNamesExt, Tag} %% HisKnown = list of known nodes in Node's partition -%% 2. Between lockers on connecting nodes -%% {his_locker, Pid} (from our global) -%% {lock, Bool} loop until both lockers have lock = true, -%% then send to global_name_server {lock_is_set, Node, Tag} -%% 3. Connecting node's global_name_server informs other nodes in the same +%% 2. Between global_name_server and my locker (the local locker) +%% {nodeup, Node, Tag} +%% {his_the_locker, Pid, {Vsn, HisKnown}, MyKnown} +%% {cancel, Node, Tag, Fun | no_fun} +%% {add_to_known, Nodes} +%% {remove_from_known, Nodes} +%% {lock_set, Pid, LockIsSet, HisKnown} (acting as proxy) +%% 3. Between lockers on connecting nodes (via proxy global_name_server) +%% {lock_set, Pid, LockIsSet, HisKnown} +%% loop until both lockers have LockIsSet =:= true, +%% then send to global_name_server {lock_is_set, Node, Tag, LockId} +%% 4. Connecting node's global_name_server informs other nodes in the same %% partition about hitherto unknown nodes in the other partition %% {new_nodes, Node, Ops, ListOfNamesExt, NewNodes, ExtraInfo} -%% 4. Between global_name_server and resolver +%% 5. Between global_name_server and resolver %% {resolve, NameList, Node} to resolver %% {exchange_ops, Node, Tag, Ops, Resolved} from resolver -%% 5. sync protocol, between global_name_servers in different partitions +%% 6. sync protocol, between global_name_servers in different partitions %% {in_sync, Node, IsKnown} %% sent by each node to all new nodes (Node becomes known to them) %%----------------------------------------------------------------- @@ -820,6 +831,11 @@ handle_cast({async_del_lock, _ResourceId, _Pid}, S) -> %% R14A nodes and later do not send async_del_lock messages. {noreply, S}; +handle_cast({lock_set, _Pid, _Set, _HisKnown} = Message, S) -> + #state{the_locker = Locker} = S, + Locker ! Message, + {noreply, S}; + handle_cast(Request, S) -> error_logger:warning_msg("The global_name_server " "received an unexpected message:\n" @@ -1488,7 +1504,13 @@ delete_global_name(_Name, _Pid) -> %% Version 1: used by unpatched R7. %% Version 2: the messages exchanged between the lockers include the known %% nodes (see OTP-3576). -%%----------------------------------------------------------------- + +%% As of version 6 of global, lockers (on different nodes) do not +%% communicate directly with each other. Instead messages are sent via +%% the server. This is in order to avoid race conditions where a +%% {lock_set, ...} message is received after (before) a nodedown +%% (nodeup). +%% ----------------------------------------------------------------- -define(locker_vsn, 2). @@ -1603,7 +1625,7 @@ the_locker_message({lock_set, Pid, true, _HisKnown}, S) -> {true, MyTag, HisVsn} -> LockId = locker_lock_id(Pid, HisVsn), {IsLockSet, S1} = lock_nodes_safely(LockId, [], S), - Pid ! {lock_set, self(), IsLockSet, S1#multi.known}, + send_lock_set(S1, IsLockSet, Pid, HisVsn), Known2 = [node() | S1#multi.known], ?trace({the_locker, spontaneous, {known2, Known2}, {node,Node}, {is_lock_set,IsLockSet}}), @@ -1630,7 +1652,7 @@ the_locker_message({lock_set, Pid, true, _HisKnown}, S) -> end; false -> ?trace({the_locker, not_there, {node,Node}}), - Pid ! {lock_set, self(), false, S#multi.known}, + send_lock_set(S, false, Pid, _HisVsn=5), loop_the_locker(S) end; the_locker_message({add_to_known, Nodes}, S) -> @@ -1672,8 +1694,7 @@ select_node(S) -> case IsLockSet of true -> Known1 = Us ++ S2#multi.known, - ?trace({sending_lock_set, self(), {his,HisTheLocker}}), - HisTheLocker ! {lock_set, self(), true, S2#multi.known}, + send_lock_set(S2, true, HisTheLocker, HisVsn), S3 = lock_is_set(S2, Him, MyTag, Known1, LockId), loop_the_locker(S3); false -> @@ -1681,7 +1702,18 @@ select_node(S) -> end end. -%% Version 5: Both sides use the same requester id. Thereby the nodes +send_lock_set(S, IsLockSet, HisTheLocker, Vsn) -> + ?trace({sending_lock_set, self(), {his,HisTheLocker}}), + Message = {lock_set, self(), IsLockSet, S#multi.known}, + if + Vsn < 6 -> + HisTheLocker ! Message, + ok; + true -> + gen_server:cast({global_name_server, node(HisTheLocker)}, Message) + end. + +%% Version 5-6: Both sides use the same requester id. Thereby the nodes %% common to both sides are locked by both locker processes. This %% means that the lock is still there when the 'new_nodes' message is %% received even if the other side has deleted the lock. diff --git a/lib/kernel/test/global_SUITE.erl b/lib/kernel/test/global_SUITE.erl index 5bff9cc292..3837a44c64 100644 --- a/lib/kernel/test/global_SUITE.erl +++ b/lib/kernel/test/global_SUITE.erl @@ -391,6 +393,7 @@ write_high_level_trace(Nodes, Config) -> Node <- Nodes], Dir = proplists:get_value(priv_dir, Config), DataFile = filename:join([Dir, lists:concat(["global_", ?testcase])]), + io:format("High-level trace on ~p\n", [DataFile]), file:write_file(DataFile, term_to_binary({high_level_trace, When, Data})). lock_global2(Id, Parent) -> @@ -2762,11 +2765,11 @@ many_nodes(Config) when is_list(Config) -> Osname =:= openbsd; Osname =:= darwin -> N_nodes = quite_a_few_nodes(32), - {node_rel(1, N_nodes, this), N_nodes}; + {node_rel(1, N_nodes), N_nodes}; {unix, _} -> - {node_rel(1, 32, this), 32}; + {node_rel(1, 32), 32}; _ -> - {node_rel(1, 32, this), 32} + {node_rel(1, 32), 32} end, Cps = [begin {ok, Cp} = start_node_rel(Name, Rel, Config), Cp end || {Name,Rel} <- Rels], @@ -2814,8 +2817,19 @@ many_nodes(Config) when is_list(Config) -> io:format("~s~n", [Return]), {comment, Return}. -node_rel(From, To, Rel) -> - [{lists:concat([cp, N]), Rel} || N <- lists:seq(From, To)]. +node_rel(From, To) -> + NodeNumbers = lists:seq(From, To), + Release = erlang:system_info(otp_release), + LastRelease = integer_to_list(list_to_integer(Release) - 1) ++ "_latest", + Last = case test_server:is_release_available(LastRelease) of + true -> list_to_atom(LastRelease); + false -> this + end, + [{lists:concat([cp, N]), + case N rem 2 of + 0 -> this; + 1 -> Last + end} || N <- NodeNumbers]. isolated_node(File, GoFile, Nodes, Config) -> Ns = lists:sort(Nodes), @@ -4061,7 +4075,7 @@ init_condition(Config) -> io:format("globally registered names: ~p~n", [global:registered_names()]), io:format("nodes: ~p~n", [nodes()]), io:format("known: ~p~n", [get_known(node()) -- [node()]]), - io:format("Info ~p~n", [setelement(11, global:info(), trace)]), + io:format("Info ~p~n", [setelement(10, global:info(), trace)]), _ = [io:format("~s: ~p~n", [TN, ets:tab2list(T)]) || {TN, T} <- [{"Global Names (ETS)", global_names}, {"Global Names Ext (ETS)", global_names_ext}, -- 2.26.2
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor