File 1005-Mitigate-slow-compilation-for-nested-record-access-i.patch of Package erlang
From 9e391fdbb021db44ce6e75f31553e9e07be04017 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Thu, 29 Feb 2024 07:31:47 +0100
Subject: [PATCH] Mitigate slow compilation for nested record access in guards
The code for nested record access is hugely expanded by the
`erl_expand_records` pass, which can cause the `beam_ssa_opt:cse/1`
pass to become really slow. To a lesser extent, the `beam_ssa_bool`
pass can also get slower.
This commit tries to mitigate the issue like so:
* The `beam_ssa_bool` and `beam_ssa_opt:cse/1` passes will now
evaluate guard BIFs with literal arguments. That will save
compilation time by quickly discarding unreachable blocks.
* In `beam_ssa_opt:cse/1`, the handling of the maps that keep track of
expressions previously has been optimized.
Closes #8203
---
lib/compiler/src/beam_ssa.erl | 41 +++++++++++
lib/compiler/src/beam_ssa_bool.erl | 29 +++++++-
lib/compiler/src/beam_ssa_opt.erl | 107 +++++++++++++++++++++--------
3 files changed, 147 insertions(+), 30 deletions(-)
diff --git a/lib/compiler/src/beam_ssa.erl b/lib/compiler/src/beam_ssa.erl
index c862574bb9..f9ce67442f 100644
--- a/lib/compiler/src/beam_ssa.erl
+++ b/lib/compiler/src/beam_ssa.erl
@@ -26,6 +26,7 @@
clobbers_xregs/1,def/2,def_unused/3,
definitions/2,
dominators/2,dominators_from_predecessors/2,common_dominators/3,
+ eval_instr/1,
flatmapfold_instrs/4,
fold_blocks/4,
fold_instrs/4,
@@ -505,6 +506,46 @@ common_dominators(Ls, Dom, Numbering) when is_map(Dom) ->
Doms = [map_get(L, Dom) || L <- Ls],
dom_intersection(Doms, Numbering).
+
+%% eval_instr(Instr) -> #b_literal{} | any | failed.
+%% Attempt to evaluate a BIF instruction. Returns a `#b_literal{}`
+%% record if evaluation succeeded, `failed` if an exception was
+%% raised, or `any` if the arguments were not literals or the BIF is
+%% not pure.
+
+-spec eval_instr(b_set()) -> b_literal() | 'any' | 'failed'.
+
+eval_instr(#b_set{op={bif,Bif},args=Args}) ->
+ LitArgs = case Args of
+ [#b_literal{val=Arg1}] ->
+ [Arg1];
+ [#b_literal{val=Arg1},#b_literal{val=Arg2}] ->
+ [Arg1,Arg2];
+ [#b_literal{val=Arg1},#b_literal{val=Arg2},#b_literal{val=Arg3}] ->
+ [Arg1,Arg2,Arg3];
+ _ ->
+ none
+ end,
+ case LitArgs of
+ none ->
+ any;
+ _ ->
+ Arity = length(LitArgs),
+ case erl_bifs:is_pure(erlang, Bif, Arity) of
+ true ->
+ try apply(erlang, Bif, LitArgs) of
+ Result ->
+ #b_literal{val=Result}
+ catch error:_->
+ failed
+ end;
+ false ->
+ any
+ end
+ end;
+eval_instr(_) ->
+ any.
+
-spec fold_instrs(Fun, Labels, Acc0, Blocks) -> any() when
Fun :: fun((b_set()|terminator(), any()) -> any()),
Labels :: [label()],
diff --git a/lib/compiler/src/beam_ssa_bool.erl b/lib/compiler/src/beam_ssa_bool.erl
index 803928f79b..ec14674b34 100644
--- a/lib/compiler/src/beam_ssa_bool.erl
+++ b/lib/compiler/src/beam_ssa_bool.erl
@@ -393,8 +393,33 @@ pre_opt_is([#b_set{dst=Dst,args=Args0}=I0|Is], Reached, Sub0, Acc) ->
pre_opt_is(Is, Reached, Sub, Acc)
end;
false ->
- pre_opt_is(Is, Reached, Sub0, [I|Acc])
- end;
+ case beam_ssa:eval_instr(I) of
+ any ->
+ pre_opt_is(Is, Reached, Sub0, [I|Acc]);
+ failed ->
+ case Is of
+ [#b_set{op={succeeded,guard},dst=SuccDst,args=[Dst]}] ->
+ %% In a guard. The failure reason doesn't
+ %% matter, so we can discard this
+ %% instruction and the `succeeded`
+ %% instruction. Since the success branch
+ %% will never be taken, it usually means
+ %% that one or more blocks can be
+ %% discarded as well, saving some
+ %% compilation time.
+ Sub = Sub0#{SuccDst => #b_literal{val=false}},
+ {reverse(Acc),Sub};
+ _ ->
+ %% In a body. We must preserve the exact
+ %% failure reason, which is most easily
+ %% done by keeping the instruction.
+ pre_opt_is(Is, Reached, Sub0, [I|Acc])
+ end;
+ #b_literal{}=Lit ->
+ Sub = Sub0#{Dst => Lit},
+ pre_opt_is(Is, Reached, Sub, Acc)
+ end
+ end;
pre_opt_is([], _Reached, Sub, Acc) ->
{reverse(Acc),Sub}.
diff --git a/lib/compiler/src/beam_ssa_opt.erl b/lib/compiler/src/beam_ssa_opt.erl
index e9c2c4892b..b3b458b185 100644
--- a/lib/compiler/src/beam_ssa_opt.erl
+++ b/lib/compiler/src/beam_ssa_opt.erl
@@ -1042,13 +1042,19 @@ ssa_opt_cse({#opt_st{ssa=Linear}=St, FuncDb}) ->
M = #{0 => #{}, ?EXCEPTION_BLOCK => #{}},
{St#opt_st{ssa=cse(Linear, #{}, M)}, FuncDb}.
-cse([{L,#b_blk{is=Is0,last=Last0}=Blk}|Bs], Sub0, M0) ->
- Es0 = map_get(L, M0),
- {Is1,Es,Sub} = cse_is(Is0, Es0, Sub0, []),
- Last = sub(Last0, Sub),
- M = cse_successors(Is1, Blk, Es, M0),
- Is = reverse(Is1),
- [{L,Blk#b_blk{is=Is,last=Last}}|cse(Bs, Sub, M)];
+cse([{L,#b_blk{is=Is0,last=Last0}=Blk0}|Bs], Sub0, M0) ->
+ case M0 of
+ #{L := Es0} ->
+ {Is1,Es,Sub} = cse_is(Is0, Es0, Sub0, []),
+ Last = sub(Last0, Sub),
+ Blk = Blk0#b_blk{last=Last},
+ M = cse_successors(Is1, Blk, Es, M0),
+ Is = reverse(Is1),
+ [{L,Blk#b_blk{is=Is,last=Last}}|cse(Bs, Sub, M)];
+ #{} ->
+ %% This block is never reached.
+ cse(Bs, Sub0, M0)
+ end;
cse([], _, _) -> [].
cse_successors([#b_set{op={succeeded,_},args=[Src]},Bif|_], Blk, EsSucc, M0) ->
@@ -1057,9 +1063,8 @@ cse_successors([#b_set{op={succeeded,_},args=[Src]},Bif|_], Blk, EsSucc, M0) ->
%% The previous instruction only has a valid value at the success branch.
%% We must remove the substitution for Src from the failure branch.
#b_blk{last=#b_br{succ=Succ,fail=Fail}} = Blk,
- M = cse_successors_1([Succ], EsSucc, M0),
- EsFail = maps:filter(fun(_, Val) -> Val =/= Src end, EsSucc),
- cse_successors_1([Fail], EsFail, M);
+ M1 = cse_successors_1([Succ], EsSucc, M0),
+ cse_successor_fail(Fail, Src, EsSucc, M1);
false ->
%% There can't be any replacement for Src in EsSucc. No need for
%% any special handling.
@@ -1082,14 +1087,34 @@ cse_successors_1([L|Ls], Es0, M) ->
end;
cse_successors_1([], _, M) -> M.
+cse_successor_fail(Fail, Src, Es0, M) ->
+ case M of
+ #{Fail := Es1} when map_size(Es1) =:= 0 ->
+ M;
+ #{Fail := Es1} ->
+ Es = maps:filter(fun(Var, Val) ->
+ is_map_key(Var, Es1) andalso Val =/= Src
+ end, Es0),
+ M#{Fail := Es};
+ #{} ->
+ Es = maps:filter(fun(_, Val) -> Val =/= Src end, Es0),
+ M#{Fail => Es}
+ end.
+
%% Calculate the intersection of the two maps. Both keys and values
%% must match.
cse_intersection(M1, M2) ->
+ MapSize1 = map_size(M1),
+ MapSize2 = map_size(M2),
if
- map_size(M1) < map_size(M2) ->
+ MapSize1 < MapSize2 ->
cse_intersection_1(maps:to_list(M1), M2, M1);
+ MapSize1 > MapSize2 ->
+ cse_intersection_1(maps:to_list(M2), M1, M2);
+ M1 =:= M2 ->
+ M2;
true ->
- cse_intersection_1(maps:to_list(M2), M1, M2)
+ cse_intersection_1(maps:to_list(M1), M2, M1)
end.
cse_intersection_1([{Key,Value}|KVs], M, Result) ->
@@ -1138,6 +1163,34 @@ cse_is([#b_set{op=put_map,dst=Dst,args=[_Kind,Map|_]}=I0|Is],
end;
cse_is([#b_set{dst=Dst}=I0|Is], Es0, Sub0, Acc) ->
I = sub(I0, Sub0),
+ case beam_ssa:eval_instr(I) of
+ #b_literal{}=Value ->
+ Sub = Sub0#{Dst => Value},
+ cse_is(Is, Es0, Sub, Acc);
+ failed ->
+ case Is of
+ [#b_set{op={succeeded,guard},dst=SuccDst,args=[Dst]}] ->
+ %% In a guard. The failure reason doesn't matter,
+ %% so we can discard this instruction and the
+ %% `succeeded` instruction. Since the success
+ %% branch will never be taken, it usually means
+ %% that one or more blocks can be discarded as
+ %% well, saving some compilation time.
+ Sub = Sub0#{SuccDst => #b_literal{val=false}},
+ {Acc,Es0,Sub};
+ _ ->
+ %% In a body. We must preserve the exact failure
+ %% reason, which is most easily done by keeping the
+ %% instruction.
+ cse_instr(I, Is, Es0, Sub0, Acc)
+ end;
+ any ->
+ cse_instr(I, Is, Es0, Sub0, Acc)
+ end;
+cse_is([], Es, Sub, Acc) ->
+ {Acc,Es,Sub}.
+
+cse_instr(#b_set{dst=Dst}=I, Is, Es0, Sub0, Acc) ->
case beam_ssa:clobbers_xregs(I) of
true ->
%% Retaining the expressions map across calls and other
@@ -1153,18 +1206,17 @@ cse_is([#b_set{dst=Dst}=I0|Is], Es0, Sub0, Acc) ->
cse_is(Is, Es0, Sub0, [I|Acc]);
{ok,ExprKey} ->
case Es0 of
- #{ExprKey:=Src} ->
- Sub = Sub0#{Dst=>Src},
+ #{ExprKey := Src} ->
+ %% Reuse the result of the previous expression.
+ Sub = Sub0#{Dst => Src},
cse_is(Is, Es0, Sub, Acc);
#{} ->
- Es1 = Es0#{ExprKey=>Dst},
+ Es1 = Es0#{ExprKey => Dst},
Es = cse_add_inferred_exprs(I, Es1),
cse_is(Is, Es, Sub0, [I|Acc])
end
end
- end;
-cse_is([], Es, Sub, Acc) ->
- {Acc,Es,Sub}.
+ end.
cse_add_inferred_exprs(#b_set{op=put_list,dst=List,args=[Hd,Tl]}, Es) ->
Es#{{get_hd,[List]} => Hd,
@@ -1174,16 +1226,6 @@ cse_add_inferred_exprs(#b_set{op=put_tuple,dst=Tuple,args=[E1,E2|_]}, Es) ->
%% worthwhile (at least not in the sample used by scripts/diffable).
Es#{{get_tuple_element,[Tuple,#b_literal{val=0}]} => E1,
{get_tuple_element,[Tuple,#b_literal{val=1}]} => E2};
-cse_add_inferred_exprs(#b_set{op={bif,element},dst=E,
- args=[#b_literal{val=N},Tuple]}, Es)
- when is_integer(N) ->
- Es#{{get_tuple_element,[Tuple,#b_literal{val=N-1}]} => E};
-cse_add_inferred_exprs(#b_set{op={bif,hd},dst=Hd,args=[List]}, Es) ->
- Es#{{get_hd,[List]} => Hd};
-cse_add_inferred_exprs(#b_set{op={bif,tl},dst=Tl,args=[List]}, Es) ->
- Es#{{get_tl,[List]} => Tl};
-cse_add_inferred_exprs(#b_set{op={bif,map_get},dst=Value,args=[Key,Map]}, Es) ->
- Es#{{get_map_element,[Map,Key]} => Value};
cse_add_inferred_exprs(#b_set{op=put_map,dst=Map,args=[_,_|Args]}=I, Es0) ->
Es = cse_add_map_get(Args, Map, Es0),
Es#{Map => I};
@@ -1194,6 +1236,15 @@ cse_add_map_get([Key,Value|T], Map, Es0) ->
cse_add_map_get(T, Map, Es);
cse_add_map_get([], _, Es) -> Es.
+cse_expr(#b_set{op={bif,hd},args=[List]}) ->
+ {ok,{get_hd,[List]}};
+cse_expr(#b_set{op={bif,tl},args=[List]}) ->
+ {ok,{get_tl,[List]}};
+cse_expr(#b_set{op={bif,element},args=[#b_literal{val=Index},Tuple]})
+ when is_integer(Index) ->
+ {ok,{get_tuple_element,[Tuple,#b_literal{val=Index-1}]}};
+cse_expr(#b_set{op={bif,map_get},args=[Key,Map]}) ->
+ {ok,{get_map_element,[Map,Key]}};
cse_expr(#b_set{op=Op,args=Args}=I) ->
case cse_suitable(I) of
true -> {ok,{Op,Args}};
--
2.35.3