File 3391-Extend-CSE-to-eliminate-list-and-tuple-matching.patch of Package erlang

From 2f7dc2b740e006912126f0e5fab7d4f08a05e89a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Thu, 4 Mar 2021 19:51:00 +0100
Subject: [PATCH] Extend CSE to eliminate list and tuple matching

In generated code and in code involving inlining/macros, a newly
constructed list, tuple, or map could be matched in the same function.

Here is an example of generated code:

    %% : sqrsum  dup *  swap  dup *  + ;
    sqrsum(SP_0) ->
        [X1|_] = SP_0,
        SP_1 = [X1|SP_0],         % dup

        [X2,X3|SP_2] = SP_1,
        SP_3 = [(X2*X3)|SP_2],    % *

        [X4,X5|SP_4] = SP_3,
        SP_5 = [X5,X4|SP_4],      % swap

        [X6|_] = SP_5,
        SP_6 = [X6|SP_5],         % dup

        [X7,X8|SP_7] = SP_6,
        SP_8 = [(X7*X8)|SP_7],    % *

        [X9,X10|SP_9] = SP_8,
        [(X9+X10)|SP_9].          % +

Here is the main part of the BEAM code for the function:

    {test,is_nonempty_list,{f,5},[{x,0}]}.
    {get_list,{x,0},{x,1},{x,0}}.
    {gc_bif,'*',{f,0},2,[{x,1},{x,1}],{x,1}}.
    {test,is_nonempty_list,{f,4},[{x,0}]}.
    {test_heap,4,2}.
    {get_list,{x,0},{x,2},{x,0}}.
    {put_list,{x,1},{x,0},{x,0}}.
    {put_list,{x,2},{x,0},{x,0}}.
    {get_list,{x,0},{x,1},{x,0}}.
    {gc_bif,'*',{f,0},2,[{x,1},{x,1}],{x,1}}.
    {test,is_nonempty_list,{f,3},[{x,0}]}.
    {get_list,{x,0},{x,2},{x,0}}.
    {gc_bif,'+',{f,0},3,[{x,1},{x,2}],{x,1}}.
    {test_heap,2,2}.
    {put_list,{x,1},{x,0},{x,0}}.
    return.

We can see that there are redundant list construction and
list matching instructions. This commit extends the
existing Common Sub Expression elimination optimization
so that the redundant instructions are eliminated:

    {test,is_nonempty_list,{f,4},[{x,0}]}.
    {get_list,{x,0},{x,1},{x,0}}.
    {gc_bif,'*',{f,0},2,[{x,1},{x,1}],{x,1}}.
    {test,is_nonempty_list,{f,3},[{x,0}]}.
    {get_list,{x,0},{x,2},{x,0}}.
    {gc_bif,'*',{f,0},3,[{x,2},{x,2}],{x,2}}.
    {gc_bif,'+',{f,0},3,[{x,2},{x,1}],{x,1}}.
    {test_heap,2,2}.
    {put_list,{x,1},{x,0},{x,0}}.
    return.

While at it, also optimize the intersection of expression maps.
---
 lib/compiler/src/beam_ssa_opt.erl | 62 ++++++++++++++++++++++++++-----
 1 file changed, 52 insertions(+), 10 deletions(-)

diff --git a/lib/compiler/src/beam_ssa_opt.erl b/lib/compiler/src/beam_ssa_opt.erl
index 1369445e5b..3ffa8ecc0a 100644
--- a/lib/compiler/src/beam_ssa_opt.erl
+++ b/lib/compiler/src/beam_ssa_opt.erl
@@ -898,7 +898,7 @@ is_tagged_tuple_4([], _, _) -> no.
 %%%
 
 ssa_opt_cse({#opt_st{ssa=Linear}=St, FuncDb}) ->
-    M = #{0=>#{}},
+    M = #{0 => #{}, ?EXCEPTION_BLOCK => #{}},
     {St#opt_st{ssa=cse(Linear, #{}, M)}, FuncDb}.
 
 cse([{L,#b_blk{is=Is0,last=Last0}=Blk}|Bs], Sub0, M0) ->
@@ -934,20 +934,32 @@ cse_successors_1([L|Ls], Es0, M) ->
             %% since the intersection will be empty.
             cse_successors_1(Ls, Es0, M);
         #{L:=Es1} ->
-            %% Calculate the intersection of the two maps.
-            %% Both keys and values must match.
-            Es = maps:filter(fun(Key, Value) ->
-                                     case Es1 of
-                                         #{Key:=Value} -> true;
-                                         #{} -> false
-                                     end
-                             end, Es0),
+            Es = cse_intersection(Es0, Es1),
             cse_successors_1(Ls, Es0, M#{L:=Es});
         #{} ->
             cse_successors_1(Ls, Es0, M#{L=>Es0})
     end;
 cse_successors_1([], _, M) -> M.
 
+%% Calculate the intersection of the two maps. Both keys and values
+%% must match.
+cse_intersection(M1, M2) ->
+    if
+        map_size(M1) < map_size(M2) ->
+            cse_intersection_1(maps:to_list(M1), M2, M1);
+        true ->
+            cse_intersection_1(maps:to_list(M2), M1, M2)
+    end.
+
+cse_intersection_1([{Key,Value}|KVs], M, Result) ->
+    case M of
+        #{Key := Value} ->
+            cse_intersection_1(KVs, M, Result);
+        #{} ->
+            cse_intersection_1(KVs, M, maps:remove(Key, Result))
+    end;
+cse_intersection_1([], _, Result) -> Result.
+
 cse_is([#b_set{op={succeeded,_},dst=Bool,args=[Src]}=I0|Is], Es, Sub0, Acc) ->
     I = sub(I0, Sub0),
     case I of
@@ -980,7 +992,8 @@ cse_is([#b_set{dst=Dst}=I0|Is], Es0, Sub0, Acc) ->
                             Sub = Sub0#{Dst=>Src},
                             cse_is(Is, Es0, Sub, Acc);
                         #{} ->
-                            Es = Es0#{ExprKey=>Dst},
+                            Es1 = Es0#{ExprKey=>Dst},
+                            Es = cse_add_inferred_exprs(I, Es1),
                             cse_is(Is, Es, Sub0, [I|Acc])
                     end
             end
@@ -988,6 +1001,33 @@ cse_is([#b_set{dst=Dst}=I0|Is], Es0, Sub0, Acc) ->
 cse_is([], Es, Sub, Acc) ->
     {Acc,Es,Sub}.
 
+cse_add_inferred_exprs(#b_set{op=put_list,dst=List,args=[Hd,Tl]}, Es) ->
+    Es#{{get_hd,[List]} => Hd,
+        {get_tl,[List]} => Tl};
+cse_add_inferred_exprs(#b_set{op=put_tuple,dst=Tuple,args=[E1,E2|_]}, Es) ->
+    %% Adding tuple elements beyond the first two does not seem to be
+    %% worthwhile (at least not in the sample used by scripts/diffable).
+    Es#{{get_tuple_element,[Tuple,#b_literal{val=0}]} => E1,
+        {get_tuple_element,[Tuple,#b_literal{val=1}]} => E2};
+cse_add_inferred_exprs(#b_set{op={bif,element},dst=E,
+                              args=[#b_literal{val=N},Tuple]}, Es)
+  when is_integer(N) ->
+    Es#{{get_tuple_element,[Tuple,#b_literal{val=N-1}]} => E};
+cse_add_inferred_exprs(#b_set{op={bif,hd},dst=Hd,args=[List]}, Es) ->
+    Es#{{get_hd,[List]} => Hd};
+cse_add_inferred_exprs(#b_set{op={bif,tl},dst=Tl,args=[List]}, Es) ->
+    Es#{{get_tl,[List]} => Tl};
+cse_add_inferred_exprs(#b_set{op={bif,map_get},dst=Value,args=[Key,Map]}, Es) ->
+    Es#{{get_map_element,[Map,Key]} => Value};
+cse_add_inferred_exprs(#b_set{op=put_map,dst=Map,args=Args}, Es) ->
+    cse_add_map_get(Args, Map, Es);
+cse_add_inferred_exprs(_, Es) -> Es.
+
+cse_add_map_get([Key,Value|T], Map, Es0) ->
+    Es = Es0#{{get_map_element,[Map,Key]} => Value},
+    cse_add_map_get(T, Map, Es);
+cse_add_map_get([], _, Es) -> Es.
+
 cse_expr(#b_set{op=Op,args=Args}=I) ->
     case cse_suitable(I) of
         true -> {ok,{Op,Args}};
@@ -999,6 +1039,8 @@ cse_suitable(#b_set{op=get_tl}) -> true;
 cse_suitable(#b_set{op=put_list}) -> true;
 cse_suitable(#b_set{op=get_tuple_element}) -> true;
 cse_suitable(#b_set{op=put_tuple}) -> true;
+cse_suitable(#b_set{op=get_map_element}) -> true;
+cse_suitable(#b_set{op=put_map}) -> true;
 cse_suitable(#b_set{op={bif,tuple_size}}) ->
     %% Doing CSE for tuple_size/1 can prevent the
     %% creation of test_arity and select_tuple_arity
-- 
2.26.2

openSUSE Build Service is sponsored by