File 2451-Explicitly-pass-in-rpo-as-arguments.patch of Package erlang
From 0ce3fb91c92d0f2989bdcb273f75854d2e4eb5b3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Valim?= <jose.valim@dashbit.co>
Date: Fri, 30 Oct 2020 11:57:35 +0100
Subject: [PATCH] Explicitly pass in rpo as arguments
Many functions in beam_ssa would receive labels
and compute the rpo from labels. This lead to
multiple calls to rpo which can be inneficient
in some scenarios.
This commit changes these functions to receive
the rpo as argument and shares the RPO in the most
trivial cases found so far. For example, the
`beam_ssa_bool` pass has been made 15-20% faster
when compiling some large modules.
beam_ssa:fold_po/2 has been removed as it is not
used anywhere.
---
lib/compiler/src/beam_ssa.erl | 176 ++++++++--------------
lib/compiler/src/beam_ssa_bool.erl | 26 ++--
lib/compiler/src/beam_ssa_bsm.erl | 30 ++--
lib/compiler/src/beam_ssa_opt.erl | 12 +-
lib/compiler/src/beam_ssa_pre_codegen.erl | 33 ++--
lib/compiler/src/beam_ssa_recv.erl | 3 +-
6 files changed, 126 insertions(+), 154 deletions(-)
diff --git a/lib/compiler/src/beam_ssa.erl b/lib/compiler/src/beam_ssa.erl
index 2a02eb3cd0..d5fe4b7d9c 100644
--- a/lib/compiler/src/beam_ssa.erl
+++ b/lib/compiler/src/beam_ssa.erl
@@ -24,14 +24,14 @@
between/4,
clobbers_xregs/1,def/2,def_unused/3,
definitions/1,
- dominators/1,dominators/2,common_dominators/3,
- flatmapfold_instrs_rpo/4,
- fold_po/3,fold_po/4,fold_rpo/3,fold_rpo/4,
- fold_instrs_rpo/4,
+ dominators/2,dominators_from_predecessors/2,common_dominators/3,
+ flatmapfold_instrs/4,
+ fold_blocks/4,
+ fold_instrs/4,
is_loop_header/1,
linearize/1,
- mapfold_blocks_rpo/4,
- mapfold_instrs_rpo/4,
+ mapfold_blocks/4,
+ mapfold_instrs/4,
merge_blocks/1,
normalize/1,
no_side_effect/1,
@@ -41,7 +41,7 @@
split_blocks/3,
successors/1,successors/2,
trim_unreachable/1,
- used/1,uses/1,uses/2]).
+ used/1,uses/2]).
-export_type([b_module/0,b_function/0,b_blk/0,b_set/0,
b_ret/0,b_br/0,b_switch/0,terminator/0,
@@ -361,7 +361,7 @@ def_unused(Ls, Unused, Blocks) ->
Preds = cerl_sets:from_list(Top),
def_unused_1(Blks, Preds, [], Unused).
-%% dominators(BlockMap) -> {Dominators,Numbering}.
+%% dominators(Labels, BlockMap) -> {Dominators,Numbering}.
%% Calculate the dominator tree, returning a map where each entry
%% in the map is a list that gives the path from that block to
%% the top of the dominator tree. (Note that the suffixes of the
@@ -374,19 +374,19 @@ def_unused(Ls, Unused, Blocks) ->
%% Cooper, Keith D.; Harvey, Timothy J; Kennedy, Ken (2001).
%% A Simple, Fast Dominance Algorithm.
--spec dominators(Blocks) -> Result when
+-spec dominators(Labels, Blocks) -> Result when
+ Labels :: [label()],
Blocks :: block_map(),
Result :: {dominator_map(), numbering_map()}.
-dominators(Blocks) ->
+dominators(Labels, Blocks) ->
Preds = predecessors(Blocks),
- dominators(Blocks, Preds).
+ dominators_from_predecessors(Labels, Preds).
--spec dominators(Blocks, Preds) -> Result when
- Blocks :: block_map(),
+-spec dominators_from_predecessors(Labels, Preds) -> Result when
+ Labels :: [label()],
Preds :: predecessor_map(),
Result :: {dominator_map(), numbering_map()}.
-dominators(Blocks, Preds) ->
- Top0 = rpo(Blocks),
+dominators_from_predecessors(Top0, Preds) ->
Df = maps:from_list(number(Top0, 0)),
[{0,[]}|Top] = [{L,map_get(L, Preds)} || L <- Top0],
@@ -404,115 +404,72 @@ common_dominators(Ls, Dom, Numbering) ->
Doms = [map_get(L, Dom) || L <- Ls],
dom_intersection(Doms, Numbering).
--spec fold_instrs_rpo(Fun, From, Acc0, Blocks) -> any() when
+-spec fold_instrs(Fun, Labels, Acc0, Blocks) -> any() when
Fun :: fun((b_blk()|terminator(), any()) -> any()),
- From :: [label()],
+ Labels :: [label()],
Acc0 :: any(),
Blocks :: block_map().
-fold_instrs_rpo(Fun, From, Acc0, Blocks) ->
- Top = rpo(From, Blocks),
- fold_instrs_rpo_1(Top, Fun, Blocks, Acc0).
+fold_instrs(Fun, Labels, Acc0, Blocks) ->
+ fold_instrs_1(Labels, Fun, Blocks, Acc0).
+
+%% mapfold_blocks(Fun, [Label], Acc, BlockMap) -> {BlockMap,Acc}.
+%% Like mapfold_instrs but at the block level to support lookahead
+%% and scope-dependent transformations.
-%% Like mapfold_instrs_rpo but at the block level to support lookahead and
-%% scope-dependent transformations.
--spec mapfold_blocks_rpo(Fun, From, Acc, Blocks) -> Result when
+-spec mapfold_blocks(Fun, Labels, Acc, Blocks) -> Result when
Fun :: fun((label(), b_blk(), any()) -> {b_blk(), any()}),
- From :: [label()],
+ Labels :: [label()],
Acc :: any(),
Blocks :: block_map(),
Result :: {block_map(), any()}.
-mapfold_blocks_rpo(Fun, From, Acc, Blocks) ->
- Successors = rpo(From, Blocks),
+mapfold_blocks(Fun, Labels, Acc, Blocks) ->
foldl(fun(Lbl, A) ->
- mapfold_blocks_rpo_1(Fun, Lbl, A)
- end, {Blocks, Acc}, Successors).
+ mapfold_blocks_1(Fun, Lbl, A)
+ end, {Blocks, Acc}, Labels).
-mapfold_blocks_rpo_1(Fun, Lbl, {Blocks0, Acc0}) ->
+mapfold_blocks_1(Fun, Lbl, {Blocks0, Acc0}) ->
Block0 = map_get(Lbl, Blocks0),
{Block, Acc} = Fun(Lbl, Block0, Acc0),
Blocks = Blocks0#{Lbl:=Block},
{Blocks, Acc}.
--spec mapfold_instrs_rpo(Fun, From, Acc0, Blocks0) -> {Blocks,Acc} when
+-spec mapfold_instrs(Fun, Labels, Acc0, Blocks0) -> {Blocks,Acc} when
Fun :: fun((b_blk()|terminator(), any()) -> any()),
- From :: [label()],
+ Labels :: [label()],
Acc0 :: any(),
Acc :: any(),
Blocks0 :: block_map(),
Blocks :: block_map().
-mapfold_instrs_rpo(Fun, From, Acc0, Blocks) ->
- Top = rpo(From, Blocks),
- mapfold_instrs_rpo_1(Top, Fun, Blocks, Acc0).
+mapfold_instrs(Fun, Labels, Acc0, Blocks) ->
+ mapfold_instrs_1(Labels, Fun, Blocks, Acc0).
--spec flatmapfold_instrs_rpo(Fun, From, Acc0, Blocks0) -> {Blocks,Acc} when
+-spec flatmapfold_instrs(Fun, Labels, Acc0, Blocks0) -> {Blocks,Acc} when
Fun :: fun((b_blk()|terminator(), any()) -> any()),
- From :: [label()],
+ Labels :: [label()],
Acc0 :: any(),
Acc :: any(),
Blocks0 :: block_map(),
Blocks :: block_map().
-flatmapfold_instrs_rpo(Fun, From, Acc0, Blocks) ->
- Top = rpo(From, Blocks),
- flatmapfold_instrs_rpo_1(Top, Fun, Blocks, Acc0).
+flatmapfold_instrs(Fun, Labels, Acc0, Blocks) ->
+ flatmapfold_instrs_1(Labels, Fun, Blocks, Acc0).
-type fold_fun() :: fun((label(), b_blk(), any()) -> any()).
-%% fold_rpo(Fun, [Label], Acc0, Blocks) -> Acc.
-%% Fold over all blocks a reverse postorder traversal of the block
-%% graph; that is, first visit a block, then visit its successors.
-
--spec fold_rpo(Fun, Acc0, Blocks) -> any() when
- Fun :: fold_fun(),
- Acc0 :: any(),
- Blocks :: #{label():=b_blk()}.
-
-fold_rpo(Fun, Acc0, Blocks) ->
- fold_rpo(Fun, [0], Acc0, Blocks).
-
-%% fold_rpo(Fun, [Label], Acc0, Blocks) -> Acc. Fold over all blocks
-%% reachable from a given set of labels in a reverse postorder
-%% traversal of the block graph; that is, first visit a block, then
-%% visit its successors.
+%% fold_blocks(Fun, [Label], Acc0, Blocks) -> Acc. Fold over all blocks
+%% from a given set of labels in a reverse postorder traversal of the
+%% block graph; that is, first visit a block, then visit its successors.
--spec fold_rpo(Fun, Labels, Acc0, Blocks) -> any() when
+-spec fold_blocks(Fun, Labels, Acc0, Blocks) -> any() when
Fun :: fold_fun(),
Labels :: [label()],
Acc0 :: any(),
Blocks :: #{label():=b_blk()}.
-fold_rpo(Fun, From, Acc0, Blocks) ->
- Top = rpo(From, Blocks),
- fold_rpo_1(Top, Fun, Blocks, Acc0).
-
-%% fold_po(Fun, Acc0, Blocks) -> Acc.
-%% Fold over all blocks in a postorder traversal of the block graph;
-%% that is, first visit all successors of block, then the block
-%% itself.
-
--spec fold_po(Fun, Acc0, Blocks) -> any() when
- Fun :: fold_fun(),
- Acc0 :: any(),
- Blocks :: #{label():=b_blk()}.
-
-%% fold_po(Fun, From, Acc0, Blocks) -> Acc.
-%% Fold over the blocks reachable from the block numbers given
-%% by From in a postorder traversal of the block graph.
-
-fold_po(Fun, Acc0, Blocks) ->
- fold_po(Fun, [0], Acc0, Blocks).
-
--spec fold_po(Fun, Labels, Acc0, Blocks) -> any() when
- Fun :: fold_fun(),
- Labels :: [label()],
- Acc0 :: any(),
- Blocks :: block_map().
-
-fold_po(Fun, From, Acc0, Blocks) ->
- Top = reverse(rpo(From, Blocks)),
- fold_rpo_1(Top, Fun, Blocks, Acc0).
+fold_blocks(Fun, Labels, Acc0, Blocks) ->
+ fold_blocks_1(Labels, Fun, Blocks, Acc0).
%% linearize(Blocks) -> [{BlockLabel,#b_blk{}}].
%% Linearize the intermediate representation of the code.
@@ -610,7 +567,7 @@ rename_vars(Rename, From, Blocks) when is_map(Rename)->
Count :: beam_ssa:label().
split_blocks(P, Blocks, Count) ->
- Ls = beam_ssa:rpo(Blocks),
+ Ls = rpo(Blocks),
split_blocks_1(Ls, P, Blocks, Count).
-spec trim_unreachable(SSA0) -> SSA when
@@ -646,21 +603,22 @@ used(_) -> [].
-spec definitions(Blocks :: block_map()) -> definition_map().
definitions(Blocks) ->
- fold_instrs_rpo(fun(#b_set{ dst = Var }=I, Acc) ->
+ Top = rpo(Blocks),
+ fold_instrs(fun(#b_set{ dst = Var }=I, Acc) ->
Acc#{Var => I};
(_Terminator, Acc) ->
Acc
- end, [0], #{}, Blocks).
-
--spec uses(Blocks :: block_map()) -> usage_map().
-uses(Blocks) ->
- uses([0], Blocks).
+ end, Top, #{}, Blocks).
--spec uses(From, Blocks) -> usage_map() when
- From :: [label()],
+%% uses(Labels, BlockMap) -> UsageMap
+%% Traverse the blocks given by labels and builds a usage map
+%% with variables as keys and a list of labels-instructions
+%% tuples as values.
+-spec uses(Labels, Blocks) -> usage_map() when
+ Labels :: [label()],
Blocks :: block_map().
-uses(From, Blocks) ->
- fold_rpo(fun fold_uses_block/3, From, #{}, Blocks).
+uses(Labels, Blocks) ->
+ fold_blocks(fun fold_uses_block/3, Labels, #{}, Blocks).
fold_uses_block(Lbl, #b_blk{is=Is,last=Last}, UseMap0) ->
F = fun(I, UseMap) ->
@@ -765,37 +723,37 @@ number([L|Ls], N) ->
[{L,N}|number(Ls, N+1)];
number([], _) -> [].
-fold_rpo_1([L|Ls], Fun, Blocks, Acc0) ->
+fold_blocks_1([L|Ls], Fun, Blocks, Acc0) ->
Block = map_get(L, Blocks),
Acc = Fun(L, Block, Acc0),
- fold_rpo_1(Ls, Fun, Blocks, Acc);
-fold_rpo_1([], _, _, Acc) -> Acc.
+ fold_blocks_1(Ls, Fun, Blocks, Acc);
+fold_blocks_1([], _, _, Acc) -> Acc.
-fold_instrs_rpo_1([L|Ls], Fun, Blocks, Acc0) ->
+fold_instrs_1([L|Ls], Fun, Blocks, Acc0) ->
#b_blk{is=Is,last=Last} = map_get(L, Blocks),
Acc1 = foldl(Fun, Acc0, Is),
Acc = Fun(Last, Acc1),
- fold_instrs_rpo_1(Ls, Fun, Blocks, Acc);
-fold_instrs_rpo_1([], _, _, Acc) -> Acc.
+ fold_instrs_1(Ls, Fun, Blocks, Acc);
+fold_instrs_1([], _, _, Acc) -> Acc.
-mapfold_instrs_rpo_1([L|Ls], Fun, Blocks0, Acc0) ->
+mapfold_instrs_1([L|Ls], Fun, Blocks0, Acc0) ->
#b_blk{is=Is0,last=Last0} = Block0 = map_get(L, Blocks0),
{Is,Acc1} = mapfoldl(Fun, Acc0, Is0),
{Last,Acc} = Fun(Last0, Acc1),
Block = Block0#b_blk{is=Is,last=Last},
Blocks = Blocks0#{L:=Block},
- mapfold_instrs_rpo_1(Ls, Fun, Blocks, Acc);
-mapfold_instrs_rpo_1([], _, Blocks, Acc) ->
+ mapfold_instrs_1(Ls, Fun, Blocks, Acc);
+mapfold_instrs_1([], _, Blocks, Acc) ->
{Blocks,Acc}.
-flatmapfold_instrs_rpo_1([L|Ls], Fun, Blocks0, Acc0) ->
+flatmapfold_instrs_1([L|Ls], Fun, Blocks0, Acc0) ->
#b_blk{is=Is0,last=Last0} = Block0 = map_get(L, Blocks0),
{Is,Acc1} = flatmapfoldl(Fun, Acc0, Is0),
{[Last],Acc} = Fun(Last0, Acc1),
Block = Block0#b_blk{is=Is,last=Last},
Blocks = Blocks0#{L:=Block},
- flatmapfold_instrs_rpo_1(Ls, Fun, Blocks, Acc);
-flatmapfold_instrs_rpo_1([], _, Blocks, Acc) ->
+ flatmapfold_instrs_1(Ls, Fun, Blocks, Acc);
+flatmapfold_instrs_1([], _, Blocks, Acc) ->
{Blocks,Acc}.
linearize_1([L|Ls], Blocks, Seen0, Acc0) ->
diff --git a/lib/compiler/src/beam_ssa_bool.erl b/lib/compiler/src/beam_ssa_bool.erl
index 208f27a891..e0f54dcf28 100644
--- a/lib/compiler/src/beam_ssa_bool.erl
+++ b/lib/compiler/src/beam_ssa_bool.erl
@@ -99,16 +99,16 @@
%%
%% Attempts have been made to simplify this pass and replace it with
%% simpler transforms in the hope of avoiding much of the work
-%% performed by bool_opt/2. Targeting boolean expressions in guards
+%% performed by bool_opt/3. Targeting boolean expressions in guards
%% and rewriting them along the patterns shown in the examples above
%% can achieve the same results in many cases, but does not by any
-%% means reach the level of quality achieved by bool_opt/2.
+%% means reach the level of quality achieved by bool_opt/3.
%%
%% An analysis of the instances where the simpler transforms fail to
-%% reach parity with bool_opt/2 indicates that the information they
+%% reach parity with bool_opt/3 indicates that the information they
%% lack in order to improve their result would require more or less
%% the same control flow graph analysis and simplification as
-%% bool_opt/2 already does.
+%% bool_opt/3 already does.
%%
%% This optimization pass must be first to be run after conversion
%% to SSA code, both for correctness and effectiveness reasons.
@@ -148,10 +148,11 @@ opt_function(#b_function{bs=Blocks0,cnt=Count0}=F) ->
DefVars = interesting_defs(Blocks1),
if
map_size(DefVars) > 1 ->
- Dom = beam_ssa:dominators(Blocks1),
- Uses = beam_ssa:uses(Blocks1),
+ RPO = beam_ssa:rpo(Blocks1),
+ Dom = beam_ssa:dominators(RPO, Blocks1),
+ Uses = beam_ssa:uses(RPO, Blocks1),
St0 = #st{defs=DefVars,count=Count1,dom=Dom,uses=Uses},
- {Blocks2,St} = bool_opt(Blocks1, St0),
+ {Blocks2,St} = bool_opt(RPO, Blocks1, St0),
Count = St#st.count,
%% When merging blocks, phi nodes must have the same
@@ -194,7 +195,7 @@ opt_function(#b_function{bs=Blocks0,cnt=Count0}=F) ->
{'true_or_any',beam_ssa:label()} |
'=:='.
--type pre_sub_map() :: #{'uses' => {'uses',beam_ssa:block_map() | list()},
+-type pre_sub_map() :: #{'uses' => {'uses',[beam_ssa:label()],beam_ssa:block_map() | list()},
var() => pre_sub_val()}.
pre_opt(Blocks, Count) ->
@@ -202,7 +203,7 @@ pre_opt(Blocks, Count) ->
%% Collect information to help the pre_opt pass to optimize
%% `switch` instructions.
- Sub0 = #{uses => {uses,Blocks}},
+ Sub0 = #{uses => {uses,Top,Blocks}},
Sub1 = get_phi_info(Top, Blocks, Sub0),
Sub = maps:remove(uses, Sub1),
@@ -280,8 +281,8 @@ get_phi_info_single_use(Var, Sub) ->
#{Var:=[_]} -> true;
#{Var:=[_|_]} -> false
end,Sub};
- {uses,Blocks} ->
- Uses = beam_ssa:uses(Blocks),
+ {uses,Top,Blocks} ->
+ Uses = beam_ssa:uses(Top, Blocks),
get_phi_info_single_use(Var, Sub#{uses => Uses})
end.
@@ -565,9 +566,6 @@ interesting_defs_is([], _L, Acc) -> Acc.
%%% interior '=:=' instruction we will visit the blocks in postorder.
%%%
-bool_opt(Blocks, St) ->
- bool_opt(beam_ssa:rpo(Blocks), Blocks, St).
-
bool_opt([L|Ls], Blocks0, St0) ->
{Blocks,St1} = bool_opt(Ls, Blocks0, St0),
case Blocks of
diff --git a/lib/compiler/src/beam_ssa_bsm.erl b/lib/compiler/src/beam_ssa_bsm.erl
index f33b6bc6f2..9984c3586f 100644
--- a/lib/compiler/src/beam_ssa_bsm.erl
+++ b/lib/compiler/src/beam_ssa_bsm.erl
@@ -303,11 +303,12 @@ get_fa(#b_function{ anno = Anno }) ->
promotions = #{} :: promotion_map() }).
alias_matched_binaries(Blocks0, Counter, AliasMap) when AliasMap =/= #{} ->
- {Dominators, _} = beam_ssa:dominators(Blocks0),
+ RPO = beam_ssa:rpo(Blocks0),
+ {Dominators, _} = beam_ssa:dominators(RPO, Blocks0),
State0 = #amb{ dominators = Dominators,
match_aliases = AliasMap,
cnt = Counter },
- {Blocks, State} = beam_ssa:mapfold_blocks_rpo(fun amb_1/3, [0], State0,
+ {Blocks, State} = beam_ssa:mapfold_blocks(fun amb_1/3, RPO, State0,
Blocks0),
{amb_insert_promotions(Blocks, State), State#amb.cnt};
alias_matched_binaries(Blocks, Counter, _AliasMap) ->
@@ -449,13 +450,15 @@ combine_matches({Fs0, ModInfo}) ->
combine_matches(#b_function{bs=Blocks0,cnt=Counter0}=F, ModInfo) ->
case funcinfo_get(F, has_bsm_ops, ModInfo) of
true ->
- {Dominators, _} = beam_ssa:dominators(Blocks0),
+ RPO = beam_ssa:rpo(Blocks0),
+ {Dominators, _} = beam_ssa:dominators(RPO, Blocks0),
{Blocks1, State} =
- beam_ssa:mapfold_blocks_rpo(
+ beam_ssa:mapfold_blocks(
fun(Lbl, #b_blk{is=Is0}=Block0, State0) ->
{Is, State} = cm_1(Is0, [], Lbl, State0),
{Block0#b_blk{is=Is}, State}
- end, [0],
+ end,
+ RPO,
#cm{ definitions = beam_ssa:definitions(Blocks0),
dominators = Dominators,
blocks = Blocks0 },
@@ -669,7 +672,8 @@ aca_handle_convergence(Src, State0, Last0, Blocks0) ->
ordsets:from_list(SuccPath),
ordsets:from_list(FailPath)),
- case maps:is_key(Src, beam_ssa:uses(ConvergedPaths, Blocks0)) of
+ ConvergedLabels = beam_ssa:rpo(ConvergedPaths, Blocks0),
+ case maps:is_key(Src, beam_ssa:uses(ConvergedLabels, Blocks0)) of
true ->
case shortest(SuccPath, FailPath) of
left ->
@@ -792,7 +796,7 @@ aca_cs_arg(Arg, VRs) ->
%% contexts to us.
allow_context_passthrough({Fs, ModInfo0}) ->
- FsUses = [{F, beam_ssa:uses(F#b_function.bs)} || F <- Fs],
+ FsUses = [{F, beam_ssa:uses(beam_ssa:rpo(Bs), Bs)} || #b_function{bs=Bs}=F <- Fs],
ModInfo = acp_forward_params(FsUses, ModInfo0),
{Fs, ModInfo}.
@@ -851,8 +855,9 @@ skip_outgoing_tail_extraction(#b_function{bs=Blocks0}=F, ModInfo) ->
State0 = #sote{ definitions = beam_ssa:definitions(Blocks0),
mod_info = ModInfo },
- {Blocks1, State} = beam_ssa:mapfold_instrs_rpo(
- fun sote_rewrite_calls/2, [0], State0, Blocks0),
+ RPO = beam_ssa:rpo(Blocks0),
+ {Blocks1, State} = beam_ssa:mapfold_instrs(
+ fun sote_rewrite_calls/2, RPO, State0, Blocks0),
{Blocks, Counter} = alias_matched_binaries(Blocks1,
F#b_function.cnt,
@@ -918,12 +923,13 @@ annotate_context_parameters(F, ModInfo) ->
collect_opt_info(Fs) ->
foldl(fun(#b_function{bs=Blocks}=F, Acc0) ->
- UseMap = beam_ssa:uses(Blocks),
+ RPO = beam_ssa:rpo(Blocks),
+ UseMap = beam_ssa:uses(RPO, Blocks),
Where = beam_ssa:get_anno(location, F, []),
- beam_ssa:fold_instrs_rpo(
+ beam_ssa:fold_instrs(
fun(I, Acc) ->
collect_opt_info_1(I, Where, UseMap, Acc)
- end, [0], Acc0, Blocks)
+ end, RPO, Acc0, Blocks)
end, [], Fs).
collect_opt_info_1(#b_set{op=Op,anno=Anno,dst=Dst}=I, Where, UseMap, Acc0) ->
diff --git a/lib/compiler/src/beam_ssa_opt.erl b/lib/compiler/src/beam_ssa_opt.erl
index 3b150510dd..e08f60d7b3 100644
--- a/lib/compiler/src/beam_ssa_opt.erl
+++ b/lib/compiler/src/beam_ssa_opt.erl
@@ -356,9 +356,10 @@ fdb_fs([#b_function{ args=Args,bs=Bs }=F | Fs], Exports, FuncDb0) ->
arg_types=ArgTypes }}
end,
- FuncDb = beam_ssa:fold_rpo(fun(_L, #b_blk{is=Is}, FuncDb) ->
+ RPO = beam_ssa:rpo(Bs),
+ FuncDb = beam_ssa:fold_blocks(fun(_L, #b_blk{is=Is}, FuncDb) ->
fdb_is(Is, Id, FuncDb)
- end, FuncDb1, Bs),
+ end, RPO, FuncDb1, Bs),
fdb_fs(Fs, Exports, FuncDb);
fdb_fs([], _Exports, FuncDb) ->
@@ -2183,7 +2184,9 @@ replace_last([_], Repl) -> [Repl];
replace_last([I|Is], Repl) -> [I|replace_last(Is, Repl)].
opt_ne_single_use(Var, {uses,Linear}) ->
- Uses = beam_ssa:uses(maps:from_list(Linear)),
+ Blocks = maps:from_list(Linear),
+ RPO = beam_ssa:rpo(Blocks),
+ Uses = beam_ssa:uses(RPO, Blocks),
opt_ne_single_use(Var, Uses);
opt_ne_single_use(Var, Uses) when is_map(Uses) ->
{case Uses of
@@ -2271,8 +2274,9 @@ do_ssa_opt_sink(Defs, #opt_st{ssa=Linear}=St) ->
%% Calculate dominators.
Blocks0 = maps:from_list(Linear),
+ RPO = beam_ssa:rpo(Blocks0),
Preds = beam_ssa:predecessors(Blocks0),
- {Dom, Numbering} = beam_ssa:dominators(Blocks0, Preds),
+ {Dom, Numbering} = beam_ssa:dominators_from_predecessors(RPO, Preds),
%% It is not safe to move get_tuple_element instructions to blocks
%% that begin with certain instructions. It is also unsafe to move
diff --git a/lib/compiler/src/beam_ssa_pre_codegen.erl b/lib/compiler/src/beam_ssa_pre_codegen.erl
index a99216eb93..57fbc33558 100644
--- a/lib/compiler/src/beam_ssa_pre_codegen.erl
+++ b/lib/compiler/src/beam_ssa_pre_codegen.erl
@@ -196,7 +196,8 @@ add_extra_annos(F, Annos) ->
assert_no_critical_edges(#st{ssa=Blocks}=St) ->
F = fun assert_no_ces/3,
- beam_ssa:fold_rpo(F, Blocks, Blocks),
+ RPO = beam_ssa:rpo(Blocks),
+ beam_ssa:fold_blocks(F, RPO, Blocks, Blocks),
St.
assert_no_ces(_, #b_blk{is=[#b_set{op=phi,args=[_,_]=Phis}|_]}, Blocks) ->
@@ -226,7 +227,8 @@ fix_bs(#st{ssa=Blocks,cnt=Count0,use_bsm3=UseBSM3}=St) ->
(_, A) ->
A
end,
- case beam_ssa:fold_instrs_rpo(F, [0], [],Blocks) of
+ RPO = beam_ssa:rpo(Blocks),
+ case beam_ssa:fold_instrs(F, RPO, [], Blocks) of
[] ->
%% No binary matching in this function.
St;
@@ -1132,7 +1134,8 @@ fix_tuples(#st{ssa=Blocks0,cnt=Count0}=St) ->
#b_set{dst=Ignore,op=put_tuple_elements,args=Args}],C};
(I, C) -> {[I],C}
end,
- {Blocks,Count} = beam_ssa:flatmapfold_instrs_rpo(F, [0], Count0, Blocks0),
+ RPO = beam_ssa:rpo(Blocks0),
+ {Blocks,Count} = beam_ssa:flatmapfold_instrs(F, RPO, Count0, Blocks0),
St#st{ssa=Blocks,cnt=Count}.
%%%
@@ -1275,8 +1278,8 @@ is_single_use(V, Uses) ->
%% a stack frame or set up a stack frame with a different size.
place_frames(#st{ssa=Blocks}=St) ->
- {Doms,_} = beam_ssa:dominators(Blocks),
Ls = beam_ssa:rpo(Blocks),
+ {Doms,_} = beam_ssa:dominators(Ls, Blocks),
Tried = gb_sets:empty(),
Frames0 = [],
{Frames,_} = place_frames_1(Ls, Blocks, Doms, Tried, Frames0),
@@ -1661,7 +1664,8 @@ find_loop_exit([_,_|_]=RmBlocks, Blocks) ->
%% we always find a common block if there is one (shared by at
%% least two clauses), we must analyze the path from all
%% remove_message blocks.
- {Dominators,_} = beam_ssa:dominators(Blocks),
+ RPO = beam_ssa:rpo(Blocks),
+ {Dominators,_} = beam_ssa:dominators(RPO, Blocks),
RmSet = cerl_sets:from_list(RmBlocks),
Rpo = beam_ssa:rpo(RmBlocks, Blocks),
find_loop_exit_1(Rpo, RmSet, Dominators, Blocks);
@@ -2504,30 +2508,31 @@ turn_yregs_1(Def, FrameSize, Regs) ->
reserve_regs(#st{args=Args,ssa=Blocks,intervals=Intervals,res=Res0}=St) ->
%% Reserve x0, x1, and so on for the function arguments.
Res1 = reserve_arg_regs(Args, 0, Res0),
+ RPO = beam_ssa:rpo(Blocks),
%% Reserve Z registers (dummy registers) for instructions with no
%% return values (e.g. remove_message) or pseudo-return values
%% (e.g. landingpad).
- Res2 = reserve_zregs(Blocks, Intervals, Res1),
+ Res2 = reserve_zregs(RPO, Blocks, Intervals, Res1),
%% Reserve float registers.
- Res3 = reserve_fregs(Blocks, Res2),
+ Res3 = reserve_fregs(RPO, Blocks, Res2),
%% Reserve all remaining unreserved variables as X registers.
Res = maps:from_list(Res3),
- St#st{res=reserve_xregs(Blocks, Res)}.
+ St#st{res=reserve_xregs(RPO, Blocks, Res)}.
reserve_arg_regs([#b_var{}=Arg|Is], N, Acc) ->
reserve_arg_regs(Is, N+1, [{Arg,{x,N}}|Acc]);
reserve_arg_regs([], _, Acc) -> Acc.
-reserve_zregs(Blocks, Intervals, Res) ->
+reserve_zregs(RPO, Blocks, Intervals, Res) ->
ShortLived0 = [V || {V,[{Start,End}]} <- Intervals, Start+2 =:= End],
ShortLived = cerl_sets:from_list(ShortLived0),
F = fun(_, #b_blk{is=Is,last=Last}, A) ->
reserve_zreg(Is, Last, ShortLived, A)
end,
- beam_ssa:fold_rpo(F, [0], Res, Blocks).
+ beam_ssa:fold_blocks(F, RPO, Res, Blocks).
reserve_zreg([#b_set{op={bif,tuple_size},dst=Dst},
#b_set{op={bif,'=:='},args=[Dst,Val],dst=Bool}],
@@ -2593,11 +2598,11 @@ reserve_test_zreg(#b_var{}=V, ShortLived, A) ->
false -> A
end.
-reserve_fregs(Blocks, Res) ->
+reserve_fregs(RPO, Blocks, Res) ->
F = fun(_, #b_blk{is=Is}, A) ->
reserve_freg(Is, A)
end,
- beam_ssa:fold_rpo(F, [0], Res, Blocks).
+ beam_ssa:fold_blocks(F, RPO, Res, Blocks).
reserve_freg([#b_set{op={float,Op},dst=V}|Is], Res) ->
case Op of
@@ -2623,8 +2628,8 @@ reserve_freg([], Res) -> Res.
%% All remaining variables are reserved as X registers. Linear scan
%% will allocate the lowest free X register for the variable.
-reserve_xregs(Blocks, Res) ->
- Ls = reverse(beam_ssa:rpo(Blocks)),
+reserve_xregs(RPO, Blocks, Res) ->
+ Ls = reverse(RPO),
reserve_xregs(Ls, Blocks, #{}, Res).
reserve_xregs([L|Ls], Blocks, XsMap0, Res0) ->
diff --git a/lib/compiler/src/beam_ssa_recv.erl b/lib/compiler/src/beam_ssa_recv.erl
index 117526ecfc..756a38adde 100644
--- a/lib/compiler/src/beam_ssa_recv.erl
+++ b/lib/compiler/src/beam_ssa_recv.erl
@@ -191,7 +191,8 @@ ref_in_tuple(Tuple, Blocks) ->
when Tup =:= Tuple -> {yes,Ref};
(_, A) -> A
end,
- beam_ssa:fold_instrs_rpo(F, [0], no, Blocks).
+ RPO = beam_ssa:rpo(Blocks),
+ beam_ssa:fold_instrs(F, RPO, no, Blocks).
opt_ref_used(L, Ref, Blocks) ->
Vs = #{Ref=>ref,ref=>Ref,ref_matched=>false},
--
2.26.2