File 1861-Improve-register-allocation-for-try-.-catch-construc.patch of Package erlang

From 8b814a7419db1f710f2a853f25a51269ce16133d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Mon, 27 Jan 2025 15:09:04 +0100
Subject: [PATCH] Improve register allocation for try ... catch constructs

Consider this function:

    do_try(F) ->
        try
            F()
        catch
            C:R:Stk ->
                {'EXIT',C,R,Stk}
        end.

The compiler in Erlang/OTP 27 generates the following code for the
body of the function:

        {allocate,3,1}.    % Allocate three Y registers.
        {init_yregs,{list,[{y,0},{y,1}]}}.
        {'try',{y,2},{f,3}}.
        {call_fun,0}.
        {try_end,{y,2}}.
        {deallocate,3}.
        return.
      {label,3}.
        {try_case,{y,2}}.
        %% {y,2} is free, but is not being reused.
        {move,{x,1},{y,0}}.
        {move,{x,0},{y,1}}.
        {move,{x,2},{x,0}}.
        build_stacktrace.
        {test_heap,5,1}.
        {put_tuple2,{x,0},{list,[{atom,'EXIT'},{y,1},{y,0},{x,0}]}}.
        {deallocate,3}.
        return.

Note that a stackframe with three Y registers will be allocated. It
would be sufficient to allocate two Y registers if `{y,2}` could be
reused following the `{try_case,{y,2}}` instruction.

This commit extends an existing optimization to ensure that `{y,2}`
can be reused, resulting in the following code:

        {allocate,2,1}.   % Allocate two Y registers.
        {init_yregs,{list,[{y,0}]}}.
        {'try',{y,1},{f,3}}.
        {line,[{location,"t.erl",7}]}.
        {call_fun,0}.
        {try_end,{y,1}}.
        {deallocate,2}.
        return.
      {label,3}.
        {try_case,{y,1}}.
        {move,{x,0},{y,0}}.
        {move,{x,1},{y,1}}.   % Reusing {y,1}.
        {move,{x,2},{x,0}}.
        build_stacktrace.
        {test_heap,5,1}.
        {put_tuple2,{x,0},{list,[{atom,'EXIT'},{y,0},{y,1},{x,0}]}}.
        {deallocate,2}.
        return.

This optimization improves the code in more than 200 modules out of
the sample of about 1000 modules compiled by `./scripts/diffable`.
---
 lib/compiler/src/beam_ssa_pre_codegen.erl | 65 +++++++++++++++++++++--
 1 file changed, 62 insertions(+), 3 deletions(-)

diff --git a/lib/compiler/src/beam_ssa_pre_codegen.erl b/lib/compiler/src/beam_ssa_pre_codegen.erl
index a985b7c8c3..43aea9f949 100644
--- a/lib/compiler/src/beam_ssa_pre_codegen.erl
+++ b/lib/compiler/src/beam_ssa_pre_codegen.erl
@@ -1990,6 +1990,11 @@ copy_retval_is([#b_set{op=call,dst=#b_var{}=Dst}=I0|Is], RC, Yregs,
         false ->
             copy_retval_is(Is, RC, Yregs, none, Count1, [I1|Acc])
     end;
+copy_retval_is([#b_set{op=landingpad,args=[#b_literal{val='try'}|_]=Args0}=I0|Is],
+               _RC, Yregs, Copy, Count, Acc0) ->
+    I = I0#b_set{args=copy_sub_args(Args0, Copy)},
+    Acc = [I|acc_copy(Acc0, Copy)],
+    copy_landingpad(Is, Yregs, Count, Acc, []);
 copy_retval_is([#b_set{args=Args0}=I0|Is], RC, Yregs, Copy, Count, Acc) ->
     I = I0#b_set{args=copy_sub_args(Args0, Copy)},
     case beam_ssa:clobbers_xregs(I) of
@@ -2008,6 +2013,53 @@ copy_retval_is([], RC, _, Copy, Count, Acc) ->
             {reverse(Acc, [Copy]),Count}
     end.
 
+%% Consider this function:
+%%
+%%     do_try(F) ->
+%%        try F()
+%%        catch
+%%            C:R:Stk ->
+%%                {'EXIT',C,R,Stk}
+%%        end.
+%%
+%% That would result in the following SSA code for the `catch` clause:
+%%
+%%     z0/_16 = landingpad `'try'`, y2/_14
+%%     y1/_4 = extract z0/_16, `0`
+%%     y0/_3 = extract z0/_16, `1`
+%%     x0/_2 = extract z0/_16, `2`
+%%     z0/_17 = kill_try_tag y2/_14
+%%     x0/Stk = build_stacktrace x0/_2
+%%
+%% Note that three Y registers are required. That can be reduced to
+%% two Y registers if we rewrite the code like so:
+%%
+%%      x0/_37 = extract z0/_16, `0`
+%%      x1/_38 = extract z0/_16, `1`
+%%      x2/_2 = extract z0/_16, `2`
+%%      z0/_17 = kill_try_tag y1/_14
+%%      y1/_3 = copy x1/_38
+%%      y0/_4 = copy x0/_37
+%%
+
+copy_landingpad([I0|Is], Yregs, Count0, Acc0, Copies0) ->
+    case I0 of
+        #b_set{dst=Dst,op=extract} ->
+            case sets:is_element(Dst, Yregs) of
+                true ->
+                    {NewDst,Count} = new_var(Count0),
+                    Copies = [#b_set{op=copy,dst=Dst,args=[NewDst]}|Copies0],
+                    I = I0#b_set{dst=NewDst},
+                    Acc = [I|Acc0],
+                    copy_landingpad(Is, Yregs, Count, Acc, Copies);
+                false ->
+                    Acc = [I0|Acc0],
+                    copy_landingpad(Is, Yregs, Count0, Acc, Copies0)
+            end;
+        #b_set{op=kill_try_tag} ->
+            {reverse(Acc0, [I0|Copies0 ++ Is]),Count0}
+    end.
+
 %%
 %% Consider this code:
 %%
@@ -2770,15 +2822,22 @@ reserve_xregs_is([gc|Is], Res, Xs0, Used) ->
     Xs = res_xregs_prune(Xs0, Used, Res),
     reserve_xregs_is(Is, Res, Xs, Used);
 reserve_xregs_is([#b_set{op=Op,dst=Dst,args=Args}=I|Is], Res0, Xs0, Used0) ->
-    Res = reserve_xreg(Dst, Xs0, Res0),
+    Res1 = reserve_xreg(Dst, Xs0, Res0),
     Used1 = ordsets:union(Used0, beam_ssa:used(I)),
     Used = ordsets:del_element(Dst, Used1),
     case Op of
         call ->
             Xs = reserve_call_args(tl(Args)),
-            reserve_xregs_is(Is, Res, Xs, Used);
+            reserve_xregs_is(Is, Res1, Xs, Used);
+        extract ->
+            %% Avoid potential register shuffling by pinning the
+            %% destination variable to the X register where the
+            %% runtime system will place it.
+            [_,#b_literal{val=Reg}] = Args,
+            Res = Res1#{Dst => {x,Reg}},
+            reserve_xregs_is(Is, Res, Xs0, Used);
         _ ->
-            reserve_xregs_is(Is, Res, Xs0, Used)
+            reserve_xregs_is(Is, Res1, Xs0, Used)
     end;
 reserve_xregs_is([], Res, Xs, _Used) ->
     {Res,Xs}.
-- 
2.43.0

openSUSE Build Service is sponsored by