File 1005-Mitigate-slow-compilation-for-nested-record-access-i.patch of Package erlang

From 9e391fdbb021db44ce6e75f31553e9e07be04017 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Thu, 29 Feb 2024 07:31:47 +0100
Subject: [PATCH] Mitigate slow compilation for nested record access in guards

The code for nested record access is hugely expanded by the
`erl_expand_records` pass, which can cause the `beam_ssa_opt:cse/1`
pass to become really slow. To a lesser extent, the `beam_ssa_bool`
pass can also get slower.

This commit tries to mitigate the issue like so:

* The `beam_ssa_bool` and `beam_ssa_opt:cse/1` passes will now
  evaluate guard BIFs with literal arguments. That will save
  compilation time by quickly discarding unreachable blocks.

* In `beam_ssa_opt:cse/1`, the handling of the maps that keep track of
  expressions previously has been optimized.

Closes #8203
---
 lib/compiler/src/beam_ssa.erl      |  41 +++++++++++
 lib/compiler/src/beam_ssa_bool.erl |  29 +++++++-
 lib/compiler/src/beam_ssa_opt.erl  | 107 +++++++++++++++++++++--------
 3 files changed, 147 insertions(+), 30 deletions(-)

diff --git a/lib/compiler/src/beam_ssa.erl b/lib/compiler/src/beam_ssa.erl
index c862574bb9..f9ce67442f 100644
--- a/lib/compiler/src/beam_ssa.erl
+++ b/lib/compiler/src/beam_ssa.erl
@@ -26,6 +26,7 @@
          clobbers_xregs/1,def/2,def_unused/3,
          definitions/2,
          dominators/2,dominators_from_predecessors/2,common_dominators/3,
+         eval_instr/1,
          flatmapfold_instrs/4,
          fold_blocks/4,
          fold_instrs/4,
@@ -505,6 +506,46 @@ common_dominators(Ls, Dom, Numbering) when is_map(Dom) ->
     Doms = [map_get(L, Dom) || L <- Ls],
     dom_intersection(Doms, Numbering).
 
+
+%% eval_instr(Instr) -> #b_literal{} | any | failed.
+%%  Attempt to evaluate a BIF instruction. Returns a `#b_literal{}`
+%%  record if evaluation succeeded, `failed` if an exception was
+%%  raised, or `any` if the arguments were not literals or the BIF is
+%%  not pure.
+
+-spec eval_instr(b_set()) -> b_literal() | 'any' | 'failed'.
+
+eval_instr(#b_set{op={bif,Bif},args=Args}) ->
+    LitArgs = case Args of
+                  [#b_literal{val=Arg1}] ->
+                      [Arg1];
+                  [#b_literal{val=Arg1},#b_literal{val=Arg2}] ->
+                      [Arg1,Arg2];
+                  [#b_literal{val=Arg1},#b_literal{val=Arg2},#b_literal{val=Arg3}] ->
+                      [Arg1,Arg2,Arg3];
+                  _ ->
+                      none
+              end,
+    case LitArgs of
+        none ->
+            any;
+        _ ->
+            Arity = length(LitArgs),
+            case erl_bifs:is_pure(erlang, Bif, Arity) of
+                true ->
+                    try apply(erlang, Bif, LitArgs) of
+                        Result ->
+                            #b_literal{val=Result}
+                    catch error:_->
+                            failed
+                    end;
+                false ->
+                    any
+            end
+    end;
+eval_instr(_) ->
+    any.
+
 -spec fold_instrs(Fun, Labels, Acc0, Blocks) -> any() when
       Fun :: fun((b_set()|terminator(), any()) -> any()),
       Labels :: [label()],
diff --git a/lib/compiler/src/beam_ssa_bool.erl b/lib/compiler/src/beam_ssa_bool.erl
index 803928f79b..ec14674b34 100644
--- a/lib/compiler/src/beam_ssa_bool.erl
+++ b/lib/compiler/src/beam_ssa_bool.erl
@@ -393,8 +393,33 @@ pre_opt_is([#b_set{dst=Dst,args=Args0}=I0|Is], Reached, Sub0, Acc) ->
                     pre_opt_is(Is, Reached, Sub, Acc)
             end;
         false ->
-            pre_opt_is(Is, Reached, Sub0, [I|Acc])
-        end;
+            case beam_ssa:eval_instr(I) of
+                any ->
+                    pre_opt_is(Is, Reached, Sub0, [I|Acc]);
+                failed ->
+                    case Is of
+                        [#b_set{op={succeeded,guard},dst=SuccDst,args=[Dst]}] ->
+                            %% In a guard. The failure reason doesn't
+                            %% matter, so we can discard this
+                            %% instruction and the `succeeded`
+                            %% instruction. Since the success branch
+                            %% will never be taken, it usually means
+                            %% that one or more blocks can be
+                            %% discarded as well, saving some
+                            %% compilation time.
+                            Sub = Sub0#{SuccDst => #b_literal{val=false}},
+                            {reverse(Acc),Sub};
+                        _ ->
+                            %% In a body. We must preserve the exact
+                            %% failure reason, which is most easily
+                            %% done by keeping the instruction.
+                            pre_opt_is(Is, Reached, Sub0, [I|Acc])
+                    end;
+                #b_literal{}=Lit ->
+                    Sub = Sub0#{Dst => Lit},
+                    pre_opt_is(Is, Reached, Sub, Acc)
+            end
+    end;
 pre_opt_is([], _Reached, Sub, Acc) ->
     {reverse(Acc),Sub}.
 
diff --git a/lib/compiler/src/beam_ssa_opt.erl b/lib/compiler/src/beam_ssa_opt.erl
index e9c2c4892b..b3b458b185 100644
--- a/lib/compiler/src/beam_ssa_opt.erl
+++ b/lib/compiler/src/beam_ssa_opt.erl
@@ -1042,13 +1042,19 @@ ssa_opt_cse({#opt_st{ssa=Linear}=St, FuncDb}) ->
     M = #{0 => #{}, ?EXCEPTION_BLOCK => #{}},
     {St#opt_st{ssa=cse(Linear, #{}, M)}, FuncDb}.
 
-cse([{L,#b_blk{is=Is0,last=Last0}=Blk}|Bs], Sub0, M0) ->
-    Es0 = map_get(L, M0),
-    {Is1,Es,Sub} = cse_is(Is0, Es0, Sub0, []),
-    Last = sub(Last0, Sub),
-    M = cse_successors(Is1, Blk, Es, M0),
-    Is = reverse(Is1),
-    [{L,Blk#b_blk{is=Is,last=Last}}|cse(Bs, Sub, M)];
+cse([{L,#b_blk{is=Is0,last=Last0}=Blk0}|Bs], Sub0, M0) ->
+    case M0 of
+        #{L := Es0} ->
+            {Is1,Es,Sub} = cse_is(Is0, Es0, Sub0, []),
+            Last = sub(Last0, Sub),
+            Blk = Blk0#b_blk{last=Last},
+            M = cse_successors(Is1, Blk, Es, M0),
+            Is = reverse(Is1),
+            [{L,Blk#b_blk{is=Is,last=Last}}|cse(Bs, Sub, M)];
+        #{} ->
+            %% This block is never reached.
+            cse(Bs, Sub0, M0)
+    end;
 cse([], _, _) -> [].
 
 cse_successors([#b_set{op={succeeded,_},args=[Src]},Bif|_], Blk, EsSucc, M0) ->
@@ -1057,9 +1063,8 @@ cse_successors([#b_set{op={succeeded,_},args=[Src]},Bif|_], Blk, EsSucc, M0) ->
             %% The previous instruction only has a valid value at the success branch.
             %% We must remove the substitution for Src from the failure branch.
             #b_blk{last=#b_br{succ=Succ,fail=Fail}} = Blk,
-            M = cse_successors_1([Succ], EsSucc, M0),
-            EsFail = maps:filter(fun(_, Val) -> Val =/= Src end, EsSucc),
-            cse_successors_1([Fail], EsFail, M);
+            M1 = cse_successors_1([Succ], EsSucc, M0),
+            cse_successor_fail(Fail, Src, EsSucc, M1);
         false ->
             %% There can't be any replacement for Src in EsSucc. No need for
             %% any special handling.
@@ -1082,14 +1087,34 @@ cse_successors_1([L|Ls], Es0, M) ->
     end;
 cse_successors_1([], _, M) -> M.
 
+cse_successor_fail(Fail, Src, Es0, M) ->
+    case M of
+        #{Fail := Es1} when map_size(Es1) =:= 0 ->
+            M;
+        #{Fail := Es1} ->
+            Es = maps:filter(fun(Var, Val) ->
+                                 is_map_key(Var, Es1) andalso Val =/= Src
+                             end, Es0),
+            M#{Fail := Es};
+        #{} ->
+            Es = maps:filter(fun(_, Val) -> Val =/= Src end, Es0),
+            M#{Fail => Es}
+    end.
+
 %% Calculate the intersection of the two maps. Both keys and values
 %% must match.
 cse_intersection(M1, M2) ->
+    MapSize1 = map_size(M1),
+    MapSize2 = map_size(M2),
     if
-        map_size(M1) < map_size(M2) ->
+        MapSize1 < MapSize2 ->
             cse_intersection_1(maps:to_list(M1), M2, M1);
+        MapSize1 > MapSize2 ->
+            cse_intersection_1(maps:to_list(M2), M1, M2);
+        M1 =:= M2 ->
+            M2;
         true ->
-            cse_intersection_1(maps:to_list(M2), M1, M2)
+            cse_intersection_1(maps:to_list(M1), M2, M1)
     end.
 
 cse_intersection_1([{Key,Value}|KVs], M, Result) ->
@@ -1138,6 +1163,34 @@ cse_is([#b_set{op=put_map,dst=Dst,args=[_Kind,Map|_]}=I0|Is],
     end;
 cse_is([#b_set{dst=Dst}=I0|Is], Es0, Sub0, Acc) ->
     I = sub(I0, Sub0),
+    case beam_ssa:eval_instr(I) of
+        #b_literal{}=Value ->
+            Sub = Sub0#{Dst => Value},
+            cse_is(Is, Es0, Sub, Acc);
+        failed ->
+            case Is of
+                [#b_set{op={succeeded,guard},dst=SuccDst,args=[Dst]}] ->
+                    %% In a guard. The failure reason doesn't matter,
+                    %% so we can discard this instruction and the
+                    %% `succeeded` instruction. Since the success
+                    %% branch will never be taken, it usually means
+                    %% that one or more blocks can be discarded as
+                    %% well, saving some compilation time.
+                    Sub = Sub0#{SuccDst => #b_literal{val=false}},
+                    {Acc,Es0,Sub};
+                _ ->
+                    %% In a body. We must preserve the exact failure
+                    %% reason, which is most easily done by keeping the
+                    %% instruction.
+                    cse_instr(I, Is, Es0, Sub0, Acc)
+            end;
+        any ->
+            cse_instr(I, Is, Es0, Sub0, Acc)
+    end;
+cse_is([], Es, Sub, Acc) ->
+    {Acc,Es,Sub}.
+
+cse_instr(#b_set{dst=Dst}=I, Is, Es0, Sub0, Acc) ->
     case beam_ssa:clobbers_xregs(I) of
         true ->
             %% Retaining the expressions map across calls and other
@@ -1153,18 +1206,17 @@ cse_is([#b_set{dst=Dst}=I0|Is], Es0, Sub0, Acc) ->
                     cse_is(Is, Es0, Sub0, [I|Acc]);
                 {ok,ExprKey} ->
                     case Es0 of
-                        #{ExprKey:=Src} ->
-                            Sub = Sub0#{Dst=>Src},
+                        #{ExprKey := Src} ->
+                            %% Reuse the result of the previous expression.
+                            Sub = Sub0#{Dst => Src},
                             cse_is(Is, Es0, Sub, Acc);
                         #{} ->
-                            Es1 = Es0#{ExprKey=>Dst},
+                            Es1 = Es0#{ExprKey => Dst},
                             Es = cse_add_inferred_exprs(I, Es1),
                             cse_is(Is, Es, Sub0, [I|Acc])
                     end
             end
-    end;
-cse_is([], Es, Sub, Acc) ->
-    {Acc,Es,Sub}.
+    end.
 
 cse_add_inferred_exprs(#b_set{op=put_list,dst=List,args=[Hd,Tl]}, Es) ->
     Es#{{get_hd,[List]} => Hd,
@@ -1174,16 +1226,6 @@ cse_add_inferred_exprs(#b_set{op=put_tuple,dst=Tuple,args=[E1,E2|_]}, Es) ->
     %% worthwhile (at least not in the sample used by scripts/diffable).
     Es#{{get_tuple_element,[Tuple,#b_literal{val=0}]} => E1,
         {get_tuple_element,[Tuple,#b_literal{val=1}]} => E2};
-cse_add_inferred_exprs(#b_set{op={bif,element},dst=E,
-                              args=[#b_literal{val=N},Tuple]}, Es)
-  when is_integer(N) ->
-    Es#{{get_tuple_element,[Tuple,#b_literal{val=N-1}]} => E};
-cse_add_inferred_exprs(#b_set{op={bif,hd},dst=Hd,args=[List]}, Es) ->
-    Es#{{get_hd,[List]} => Hd};
-cse_add_inferred_exprs(#b_set{op={bif,tl},dst=Tl,args=[List]}, Es) ->
-    Es#{{get_tl,[List]} => Tl};
-cse_add_inferred_exprs(#b_set{op={bif,map_get},dst=Value,args=[Key,Map]}, Es) ->
-    Es#{{get_map_element,[Map,Key]} => Value};
 cse_add_inferred_exprs(#b_set{op=put_map,dst=Map,args=[_,_|Args]}=I, Es0) ->
     Es = cse_add_map_get(Args, Map, Es0),
     Es#{Map => I};
@@ -1194,6 +1236,15 @@ cse_add_map_get([Key,Value|T], Map, Es0) ->
     cse_add_map_get(T, Map, Es);
 cse_add_map_get([], _, Es) -> Es.
 
+cse_expr(#b_set{op={bif,hd},args=[List]}) ->
+    {ok,{get_hd,[List]}};
+cse_expr(#b_set{op={bif,tl},args=[List]}) ->
+    {ok,{get_tl,[List]}};
+cse_expr(#b_set{op={bif,element},args=[#b_literal{val=Index},Tuple]})
+  when is_integer(Index) ->
+    {ok,{get_tuple_element,[Tuple,#b_literal{val=Index-1}]}};
+cse_expr(#b_set{op={bif,map_get},args=[Key,Map]}) ->
+    {ok,{get_map_element,[Map,Key]}};
 cse_expr(#b_set{op=Op,args=Args}=I) ->
     case cse_suitable(I) of
         true -> {ok,{Op,Args}};
-- 
2.35.3
Places

File 1005-Mitigate-slow-compilation-for-nested-record-access-i.patch of Package erlang

Places