File 3391-Extend-CSE-to-eliminate-list-and-tuple-matching.patch of Package erlang
From 2f7dc2b740e006912126f0e5fab7d4f08a05e89a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Thu, 4 Mar 2021 19:51:00 +0100
Subject: [PATCH] Extend CSE to eliminate list and tuple matching
In generated code and in code involving inlining/macros, a newly
constructed list, tuple, or map could be matched in the same function.
Here is an example of generated code:
%% : sqrsum dup * swap dup * + ;
sqrsum(SP_0) ->
[X1|_] = SP_0,
SP_1 = [X1|SP_0], % dup
[X2,X3|SP_2] = SP_1,
SP_3 = [(X2*X3)|SP_2], % *
[X4,X5|SP_4] = SP_3,
SP_5 = [X5,X4|SP_4], % swap
[X6|_] = SP_5,
SP_6 = [X6|SP_5], % dup
[X7,X8|SP_7] = SP_6,
SP_8 = [(X7*X8)|SP_7], % *
[X9,X10|SP_9] = SP_8,
[(X9+X10)|SP_9]. % +
Here is the main part of the BEAM code for the function:
{test,is_nonempty_list,{f,5},[{x,0}]}.
{get_list,{x,0},{x,1},{x,0}}.
{gc_bif,'*',{f,0},2,[{x,1},{x,1}],{x,1}}.
{test,is_nonempty_list,{f,4},[{x,0}]}.
{test_heap,4,2}.
{get_list,{x,0},{x,2},{x,0}}.
{put_list,{x,1},{x,0},{x,0}}.
{put_list,{x,2},{x,0},{x,0}}.
{get_list,{x,0},{x,1},{x,0}}.
{gc_bif,'*',{f,0},2,[{x,1},{x,1}],{x,1}}.
{test,is_nonempty_list,{f,3},[{x,0}]}.
{get_list,{x,0},{x,2},{x,0}}.
{gc_bif,'+',{f,0},3,[{x,1},{x,2}],{x,1}}.
{test_heap,2,2}.
{put_list,{x,1},{x,0},{x,0}}.
return.
We can see that there are redundant list construction and
list matching instructions. This commit extends the
existing Common Sub Expression elimination optimization
so that the redundant instructions are eliminated:
{test,is_nonempty_list,{f,4},[{x,0}]}.
{get_list,{x,0},{x,1},{x,0}}.
{gc_bif,'*',{f,0},2,[{x,1},{x,1}],{x,1}}.
{test,is_nonempty_list,{f,3},[{x,0}]}.
{get_list,{x,0},{x,2},{x,0}}.
{gc_bif,'*',{f,0},3,[{x,2},{x,2}],{x,2}}.
{gc_bif,'+',{f,0},3,[{x,2},{x,1}],{x,1}}.
{test_heap,2,2}.
{put_list,{x,1},{x,0},{x,0}}.
return.
While at it, also optimize the intersection of expression maps.
---
lib/compiler/src/beam_ssa_opt.erl | 62 ++++++++++++++++++++++++++-----
1 file changed, 52 insertions(+), 10 deletions(-)
diff --git a/lib/compiler/src/beam_ssa_opt.erl b/lib/compiler/src/beam_ssa_opt.erl
index 1369445e5b..3ffa8ecc0a 100644
--- a/lib/compiler/src/beam_ssa_opt.erl
+++ b/lib/compiler/src/beam_ssa_opt.erl
@@ -898,7 +898,7 @@ is_tagged_tuple_4([], _, _) -> no.
%%%
ssa_opt_cse({#opt_st{ssa=Linear}=St, FuncDb}) ->
- M = #{0=>#{}},
+ M = #{0 => #{}, ?EXCEPTION_BLOCK => #{}},
{St#opt_st{ssa=cse(Linear, #{}, M)}, FuncDb}.
cse([{L,#b_blk{is=Is0,last=Last0}=Blk}|Bs], Sub0, M0) ->
@@ -934,20 +934,32 @@ cse_successors_1([L|Ls], Es0, M) ->
%% since the intersection will be empty.
cse_successors_1(Ls, Es0, M);
#{L:=Es1} ->
- %% Calculate the intersection of the two maps.
- %% Both keys and values must match.
- Es = maps:filter(fun(Key, Value) ->
- case Es1 of
- #{Key:=Value} -> true;
- #{} -> false
- end
- end, Es0),
+ Es = cse_intersection(Es0, Es1),
cse_successors_1(Ls, Es0, M#{L:=Es});
#{} ->
cse_successors_1(Ls, Es0, M#{L=>Es0})
end;
cse_successors_1([], _, M) -> M.
+%% Calculate the intersection of the two maps. Both keys and values
+%% must match.
+cse_intersection(M1, M2) ->
+ if
+ map_size(M1) < map_size(M2) ->
+ cse_intersection_1(maps:to_list(M1), M2, M1);
+ true ->
+ cse_intersection_1(maps:to_list(M2), M1, M2)
+ end.
+
+cse_intersection_1([{Key,Value}|KVs], M, Result) ->
+ case M of
+ #{Key := Value} ->
+ cse_intersection_1(KVs, M, Result);
+ #{} ->
+ cse_intersection_1(KVs, M, maps:remove(Key, Result))
+ end;
+cse_intersection_1([], _, Result) -> Result.
+
cse_is([#b_set{op={succeeded,_},dst=Bool,args=[Src]}=I0|Is], Es, Sub0, Acc) ->
I = sub(I0, Sub0),
case I of
@@ -980,7 +992,8 @@ cse_is([#b_set{dst=Dst}=I0|Is], Es0, Sub0, Acc) ->
Sub = Sub0#{Dst=>Src},
cse_is(Is, Es0, Sub, Acc);
#{} ->
- Es = Es0#{ExprKey=>Dst},
+ Es1 = Es0#{ExprKey=>Dst},
+ Es = cse_add_inferred_exprs(I, Es1),
cse_is(Is, Es, Sub0, [I|Acc])
end
end
@@ -988,6 +1001,33 @@ cse_is([#b_set{dst=Dst}=I0|Is], Es0, Sub0, Acc) ->
cse_is([], Es, Sub, Acc) ->
{Acc,Es,Sub}.
+cse_add_inferred_exprs(#b_set{op=put_list,dst=List,args=[Hd,Tl]}, Es) ->
+ Es#{{get_hd,[List]} => Hd,
+ {get_tl,[List]} => Tl};
+cse_add_inferred_exprs(#b_set{op=put_tuple,dst=Tuple,args=[E1,E2|_]}, Es) ->
+ %% Adding tuple elements beyond the first two does not seem to be
+ %% worthwhile (at least not in the sample used by scripts/diffable).
+ Es#{{get_tuple_element,[Tuple,#b_literal{val=0}]} => E1,
+ {get_tuple_element,[Tuple,#b_literal{val=1}]} => E2};
+cse_add_inferred_exprs(#b_set{op={bif,element},dst=E,
+ args=[#b_literal{val=N},Tuple]}, Es)
+ when is_integer(N) ->
+ Es#{{get_tuple_element,[Tuple,#b_literal{val=N-1}]} => E};
+cse_add_inferred_exprs(#b_set{op={bif,hd},dst=Hd,args=[List]}, Es) ->
+ Es#{{get_hd,[List]} => Hd};
+cse_add_inferred_exprs(#b_set{op={bif,tl},dst=Tl,args=[List]}, Es) ->
+ Es#{{get_tl,[List]} => Tl};
+cse_add_inferred_exprs(#b_set{op={bif,map_get},dst=Value,args=[Key,Map]}, Es) ->
+ Es#{{get_map_element,[Map,Key]} => Value};
+cse_add_inferred_exprs(#b_set{op=put_map,dst=Map,args=Args}, Es) ->
+ cse_add_map_get(Args, Map, Es);
+cse_add_inferred_exprs(_, Es) -> Es.
+
+cse_add_map_get([Key,Value|T], Map, Es0) ->
+ Es = Es0#{{get_map_element,[Map,Key]} => Value},
+ cse_add_map_get(T, Map, Es);
+cse_add_map_get([], _, Es) -> Es.
+
cse_expr(#b_set{op=Op,args=Args}=I) ->
case cse_suitable(I) of
true -> {ok,{Op,Args}};
@@ -999,6 +1039,8 @@ cse_suitable(#b_set{op=get_tl}) -> true;
cse_suitable(#b_set{op=put_list}) -> true;
cse_suitable(#b_set{op=get_tuple_element}) -> true;
cse_suitable(#b_set{op=put_tuple}) -> true;
+cse_suitable(#b_set{op=get_map_element}) -> true;
+cse_suitable(#b_set{op=put_map}) -> true;
cse_suitable(#b_set{op={bif,tuple_size}}) ->
%% Doing CSE for tuple_size/1 can prevent the
%% creation of test_arity and select_tuple_arity
--
2.26.2