File 0210-Make-beam_jump-idempotent.patch of Package erlang

From 46ef798daf9f3190de90f84ce3e5fe3bad93c8c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Fri, 17 Jan 2020 15:06:01 +0100
Subject: [PATCH 2/2] Make beam_jump idempotent

When compiling from a .S file, sometimes the share optimization sub
pass of beam_jump would do an unsafe optimization, producing code that
beam_validator would not consider safe.

Rewrite the share optimization pass to enusure it produces the same
result even if the beam_jump pass is run more than once.
---
 lib/compiler/src/beam_jump.erl | 194 ++++++++++++++++++++++++++++-------------
 1 file changed, 131 insertions(+), 63 deletions(-)

diff --git a/lib/compiler/src/beam_jump.erl b/lib/compiler/src/beam_jump.erl
index e6e245419e..61738e4435 100644
--- a/lib/compiler/src/beam_jump.erl
+++ b/lib/compiler/src/beam_jump.erl
@@ -317,55 +317,84 @@ insert_labels([], Lc, Acc) ->
 %%% (1) We try to share the code for identical code segments by replacing all
 %%% occurrences except the last with jumps to the last occurrence.
 %%%
+%%% We must not share code that raises an exception from outside a
+%%% try/catch block with code inside a try/catch block and vice versa,
+%%% because beam_validator will probably flag it as unsafe
+%%% (ambiguous_catch_try_state). The same goes for a plain catch.
+%%%
 
 share(Is0) ->
     Is1 = eliminate_fallthroughs(Is0, []),
     Is2 = find_fixpoint(fun(Is) ->
-                                share_1(Is, #{}, #{}, [], [])
+                                share_1(Is)
                         end, Is1),
     reverse(Is2).
 
-share_1([{label,L}=Lbl|Is], Dict0, Lbls0, [_|_]=Seq, Acc) ->
-    case maps:find(Seq, Dict0) of
-        error ->
-            Dict = case is_shareable(Seq) of
-                       true ->
-                           maps:put(Seq, L, Dict0);
-                       false ->
-                           Dict0
-                   end,
-            share_1(Is, Dict, Lbls0, [], [[Lbl|Seq]|Acc]);
-        {ok,Label} ->
-            Lbls = maps:put(L, Label, Lbls0),
-            share_1(Is, Dict0, Lbls, [], [[Lbl,{jump,{f,Label}}]|Acc])
+share_1(Is) ->
+    Safe = classify_labels(Is),
+    share_1(Is, Safe, #{}, #{}, [], []).
+
+%% Note that we examine the instructions in reverse execution order.
+share_1([{label,L}=Lbl|Is], Safe, Dict0, Lbls0, [_|_]=Seq, Acc) ->
+    case Dict0 of
+        #{Seq := Label} ->
+            %% This sequence of instructions has been seen previously. Find out
+            %% whether it would be safe to jump the label for previous occurrence.
+            case is_safely_shareable(L, Label, Seq, Safe) of
+                true ->
+                    %% Safe, because either the sequence never raises an exception
+                    %% or the jump to the label will not pass a try/catch or catch
+                    %% boundary.
+                    Lbls = Lbls0#{L => Label},
+                    share_1(Is, Safe, Dict0, Lbls, [],
+                            [[Lbl,{jump,{f,Label}}]|Acc]);
+                false ->
+                    %% Not safe, because the sequence can raise an exception
+                    %% and the jump would pass the boundary going in
+                    %% or out of a try/catch or catch block.
+                    share_1(Is, Safe, Dict0, Lbls0, [], [[Lbl|Seq]|Acc])
+            end;
+        #{} ->
+            %% This is first time we have seen this sequence of instructions.
+            case is_shareable(Seq) of
+                true ->
+                    Dict = Dict0#{Seq => L},
+                    share_1(Is, Safe, Dict, Lbls0, [], [[Lbl|Seq]|Acc]);
+                false ->
+                    %% The sequence begins with an inappropriate instruction.
+                    share_1(Is, Safe, Dict0, Lbls0, [], [[Lbl|Seq]|Acc])
+            end
     end;
-share_1([{func_info,_,_,_}|_]=Is0, _, Lbls, [], Acc0) when Lbls =/= #{} ->
-    lists:foldl(fun(Is, Acc) ->
-        beam_utils:replace_labels(Is, Acc, Lbls, fun(Old) -> Old end)
-    end, Is0, Acc0);
-share_1([{func_info,_,_,_}|_]=Is, _, Lbls, [], Acc) when Lbls =:= #{} ->
-    lists:foldl(fun lists:reverse/2, Is, Acc);
-share_1([{'catch',_,_}=I|Is], Dict0, Lbls0, Seq, Acc) ->
-    {Dict,Lbls} = clean_non_sharable(Dict0, Lbls0),
-    share_1(Is, Dict, Lbls, [I|Seq], Acc);
-share_1([{'try',_,_}=I|Is], Dict0, Lbls0, Seq, Acc) ->
-    {Dict,Lbls} = clean_non_sharable(Dict0, Lbls0),
-    share_1(Is, Dict, Lbls, [I|Seq], Acc);
-share_1([{try_case,_}=I|Is], Dict0, Lbls0, Seq, Acc) ->
-    {Dict,Lbls} = clean_non_sharable(Dict0, Lbls0),
-    share_1(Is, Dict, Lbls, [I|Seq], Acc);
-share_1([{catch_end,_}=I|Is], Dict0, Lbls0, Seq, Acc) ->
-    {Dict,Lbls} = clean_non_sharable(Dict0, Lbls0),
-    share_1(Is, Dict, Lbls, [I|Seq], Acc);
-share_1([{jump,{f,To}}=I,{label,L}=Lbl|Is], Dict0, Lbls0, _Seq, Acc) ->
-    Lbls = maps:put(L, To, Lbls0),
-    share_1(Is, Dict0, Lbls, [], [[Lbl,I]|Acc]);
-share_1([I|Is], Dict, Lbls, Seq, Acc) ->
+share_1([{func_info,_,_,_}|_]=Is0, _Safe, _, Lbls, [], Acc0) ->
+    %% Replace jumps to jumps with a jump to the final destination
+    %% (jump threading). This optimization is done in the main
+    %% optimization pass of this module, but we do it here too because
+    %% it can give more opportunities for sharing code.
+    F = case Lbls =:= #{} of
+            true ->
+                fun lists:reverse/2;
+            false ->
+                fun(Is, Acc) ->
+                        beam_utils:replace_labels(Is, Acc, Lbls,
+                                                  fun(Old) -> Old end)
+                end
+        end,
+    foldl(F, Is0, Acc0);
+share_1([{'catch',_,_}=I|Is], Safe, Dict, _Lbls0, Seq, Acc) ->
+    %% Disable the jump threading optimization because it may be unsafe.
+    share_1(Is, Safe, Dict, #{}, [I|Seq], Acc);
+share_1([{'try',_,_}=I|Is], Safe, Dict, _Lbls, Seq, Acc) ->
+    %% Disable the jump threading optimization because it may be unsafe.
+    share_1(Is, Safe, Dict, #{}, [I|Seq], Acc);
+share_1([{jump,{f,To}}=I,{label,From}=Lbl|Is], Safe, Dict0, Lbls0, _Seq, Acc) ->
+    Lbls = Lbls0#{From => To},
+    share_1(Is, Safe, Dict0, Lbls, [], [[Lbl,I]|Acc]);
+share_1([I|Is], Safe, Dict, Lbls, Seq, Acc) ->
     case is_unreachable_after(I) of
 	false ->
-	    share_1(Is, Dict, Lbls, [I|Seq], Acc);
+	    share_1(Is, Safe, Dict, Lbls, [I|Seq], Acc);
 	true ->
-	    share_1(Is, Dict, Lbls, [I], Acc)
+	    share_1(Is, Safe, Dict, Lbls, [I], Acc)
     end.
 
 is_shareable([{'catch',_,_}|_]) -> false;
@@ -375,34 +404,73 @@ is_shareable([{try_case,_}|_]) -> false;
 is_shareable([{try_end,_}|_]) -> false;
 is_shareable(_) -> true.
 
-clean_non_sharable(Dict0, Lbls0) ->
-    %% We are passing in or out of a 'catch' or 'try' block. Remove
-    %% sequences that should not be shared over the boundaries of the
-    %% block. Since the end of the sequence must match, the only
-    %% possible match between a sequence outside and a sequence inside
-    %% the 'catch'/'try' block is a sequence that ends with an
-    %% instruction that causes an exception. Any sequence that causes
-    %% an exception must contain a line/1 instruction.
-    Dict1 = maps:to_list(Dict0),
-    Lbls1 = maps:to_list(Lbls0),
-    {Dict2,Lbls2} = foldl(fun({K, V}, {Dict,Lbls}) ->
-                                  case sharable_with_try(K) of
-                                      true ->
-                                          {[{K,V}|Dict],lists:keydelete(V, 2, Lbls)};
-                                      false ->
-                                          {Dict,Lbls}
-                                  end
-                          end, {[],Lbls1}, Dict1),
-    {maps:from_list(Dict2),maps:from_list(Lbls2)}.
-
-sharable_with_try([{line,_}|_]) ->
-    %% This sequence may cause an exception and may potentially
+%% There are identical code sequences Seq at labels Lbl1 and Lbl2. Is it
+%% safe to replace the sequence at label Lbl1 with a jump to Lbl2?
+
+is_safely_shareable(Lbl1, Lbl2, Seq, Safe) ->
+    case no_exception(Seq) of
+        true ->
+            %% Safe, because the sequence Seq can't raise an exception.
+            true;
+        false ->
+            %% Safe if both labels are either ouside try/catch or inside
+            %% the same part of the same try/catch or catch block.
+            case Safe of
+                #{Lbl1 := Scope, Lbl2 := Scope} -> true;
+                #{} -> false
+            end
+    end.
+
+no_exception([{line,_}|_]) ->
+    %% This sequence may raise an exception and may potentially
     %% match a sequence on the other side of the 'catch'/'try' block
     %% boundary.
     false;
-sharable_with_try([_|Is]) ->
-    sharable_with_try(Is);
-sharable_with_try([]) -> true.
+no_exception([_|Is]) ->
+    no_exception(Is);
+no_exception([]) -> true.
+
+%%
+%% Classify labels according to where the instructions that branch to
+%% the labels are located. Each label is assigned a scope identifer.
+%% If two labels have different scope identfiers, sharing a sequence
+%% that raises an exception between the labels may not be safe, because
+%% one label is inside a try/catch, and the other label is outside.
+%%
+%% Note that we don't care where the labels themselves are located,
+%% only from where the branches to them are located. This is essential
+%% to ensure that beam_jump is idempotent, ensuring that beam_jump
+%% will not do any unsafe optimizations when when compiling from a .S
+%% file. The move/1 optimization pass below (2) will move instruction
+%% sequences that end in an exception raising instruction to the end
+%% of the function. Thus instruction sequences initially being in
+%% different scopes could be placed next to each other.
+%%
+
+classify_labels(Is) ->
+    classify_labels(Is, 0, #{}).
+
+classify_labels([{'catch',_,_}|Is], Scope, Safe) ->
+    classify_labels(Is, Scope+1, Safe);
+classify_labels([{catch_end,_}|Is], Scope, Safe) ->
+    classify_labels(Is, Scope+1, Safe);
+classify_labels([{'try',_,_}|Is], Scope, Safe) ->
+    classify_labels(Is, Scope+1, Safe);
+classify_labels([{'try_end',_}|Is], Scope, Safe) ->
+    classify_labels(Is, Scope+1, Safe);
+classify_labels([{'try_case',_}|Is], Scope, Safe) ->
+    classify_labels(Is, Scope+1, Safe);
+classify_labels([I|Is], Scope, Safe0) ->
+    Labels = instr_labels(I),
+    Safe = foldl(fun(L, A) ->
+                         case A of
+                             #{L := Scope} -> A;
+                             #{L := _} -> maps:remove(L, A);
+                             #{} -> A#{L => Scope}
+                         end
+                 end, Safe0, Labels),
+    classify_labels(Is, Scope, Safe);
+classify_labels([], _Scope, Safe) -> Safe.
 
 %% Eliminate all fallthroughs. Return the result reversed.
 
-- 
2.16.4

openSUSE Build Service is sponsored by