File 2941-Reduce-register-shuffling.patch of Package erlang
From 3643376f3f789f2f4920af43978d13d400d746cf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Tue, 16 Feb 2021 03:53:22 +0100
Subject: [PATCH] Reduce register shuffling
Consider this code:
send_deadline_cont(SockRef, Bin, Cont, Deadline, HasWritten) ->
SelectHandle = make_ref(),
Sent = prim_socket:send(SockRef, Bin, Cont, SelectHandle),
{SockRef, Bin, SelectHandle, Deadline, HasWritten, Sent}.
Looking closer at the BEAM code between the two calls we see four
`move` instructions followed by three `swap` instructions:
{call_ext,0,{extfunc,erlang,make_ref,0}}.
{move,{y,2},{x,3}}.
{move,{y,3},{x,2}}.
{move,{x,0},{y,2}}.
{move,{y,4},{x,1}}.
{swap,{x,2},{x,1}}.
{swap,{x,3},{x,2}}.
{swap,{x,0},{x,3}}.
{call_ext,4,{extfunc,prim_socket,send,4}}.
It turns out that the sequence of `move` and `swap` instructions is
longer than it needs to be, because the initial two `move`
instructions move the terms in two of the Y registers to the wrong X
registers. Two of the `swap` instructions are needed merely to move
the terms to the right registers.
This commit updates optimizations in `beam_ssa_pre_codegen` and
`beam_block` to eliminate the extra instructions:
{call_ext,0,{extfunc,erlang,make_ref,0}}.
{move,{y,2},{x,2}}.
{move,{y,3},{x,1}}.
{move,{y,4},{x,3}}.
{move,{x,0},{y,2}}.
{swap,{x,0},{x,3}}.
{call_ext,4,{extfunc,prim_socket,send,4}}.
By counting the number of inserted and deleted lines in the diff between
a run of the `diffable` before this change and a run after this change,
we can see that this commit results in 1099 fewer instructions in total.
---
lib/compiler/src/beam_block.erl | 51 ++++++++++--
lib/compiler/src/beam_ssa_pre_codegen.erl | 95 ++++++++++++++++-------
2 files changed, 112 insertions(+), 34 deletions(-)
diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl
index 6bc334963f..e8d024b20b 100644
--- a/lib/compiler/src/beam_block.erl
+++ b/lib/compiler/src/beam_block.erl
@@ -22,7 +22,7 @@
-module(beam_block).
-export([module/2]).
--import(lists, [keysort/2,reverse/1,splitwith/2]).
+-import(lists, [keysort/2,reverse/1,reverse/2,splitwith/2]).
-spec module(beam_utils:module_code(), [compile:option()]) ->
{'ok',beam_utils:module_code()}.
@@ -51,20 +51,55 @@ function({function,Name,Arity,CLabel,Is0}) ->
%%% for the moves generated by a sequence of SSA instructions.
%%% Therefore, this optimization is needed.
%%%
+%%% We'll need to handle non-consecutive sequences of moves, such
+%%% as the following instruction sequence:
+%%%
+%%% move y2, x2
+%%% move x0, y2
+%%% move y1, x1
+%%% init_yregs [y1]
+%%% move x2, x0
+%%%
+%%% The first two `move` instructions and the last `move` instruction
+%%% should be combined to a `swap` instruction:
+%%%
+%%% swap y2, x0
+%%% move y1, x1
+%%% init_yregs [y1]
+%%%
+%%% (Provided that x2 is killed in the code that follows.)
+%%%
-swap_opt([{move,Reg1,{x,X}=Temp}=Move1,
- {move,Reg2,Reg1}=Move2,
- {move,Temp,Reg2}=Move3|Is]) when Reg1 =/= Temp ->
- case is_unused(X, Is) of
- true ->
+swap_opt([{move,Reg1,{x,_}=Temp}=Move1,
+ {move,Reg2,Reg1}=Move2|Is0]) when Reg1 =/= Temp ->
+ case swap_opt_end(Is0, Temp, Reg2, []) of
+ {yes,Is} ->
[{swap,Reg1,Reg2}|swap_opt(Is)];
- false ->
- [Move1|swap_opt([Move2,Move3|Is])]
+ no ->
+ [Move1|swap_opt([Move2|Is0])]
end;
swap_opt([I|Is]) ->
[I|swap_opt(Is)];
swap_opt([]) -> [].
+swap_opt_end([{move,S,D}=I|Is], Temp, Dst, Acc) ->
+ case {S,D} of
+ {Temp,Dst} ->
+ {x,X} = Temp,
+ case is_unused(X, Is) of
+ true -> {yes,reverse(Acc, Is)};
+ false -> no
+ end;
+ {Temp,_} -> no;
+ {Dst,_} -> no;
+ {_,Temp} -> no;
+ {_,Dst} -> no;
+ {_,_} -> swap_opt_end(Is, Temp, Dst, [I|Acc])
+ end;
+swap_opt_end([{init_yregs,_}=I|Is], Temp, Dst, Acc) ->
+ swap_opt_end(Is, Temp, Dst, [I|Acc]);
+swap_opt_end(_, _, _, _) -> no.
+
is_unused(X, [{call,A,_}|_]) when A =< X -> true;
is_unused(X, [{call_ext,A,_}|_]) when A =< X -> true;
is_unused(X, [{make_fun2,_,_,_,A}|_]) when A =< X -> true;
diff --git a/lib/compiler/src/beam_ssa_pre_codegen.erl b/lib/compiler/src/beam_ssa_pre_codegen.erl
index 2337ec2b19..6ff4e3001f 100644
--- a/lib/compiler/src/beam_ssa_pre_codegen.erl
+++ b/lib/compiler/src/beam_ssa_pre_codegen.erl
@@ -2064,46 +2064,89 @@ copy_retval_is([], RC, _, Copy, Count, Acc) ->
%%
%% Consider this code:
%%
-%% Var = ...
+%% P = ...
+%% Q = ...
%% ...
-%% A1 = call foo/0
-%% A = copy A1
-%% B = call bar/1, Var
+%% A = call foo/0
+%% A1 = copy A
+%% B = call bar/2, P, Q
%%
-%% If the Var variable is no longer used after this code, its Y register
-%% can't be reused for A. To allow the Y register to be reused
-%% we will need to insert 'copy' instructions for arguments that are
-%% in Y registers:
+%% If the P or Q variables are no longer used after this code, one of
+%% their Y registers can't be reused for A. To allow one of the Y registers to
+%% be reused we will need to insert 'copy' instructions for arguments
+%% that are in Y registers:
%%
-%% Var = ...
+%% P = ...
+%% Q = ...
%% ...
%% A1 = call foo/0
-%% Var1 = copy Var
+%% Q1 = copy Q
+%% P1 = copy P
+%% A = copy A1
+%% B = call bar/2, P1, Q1
+%%
+%% Note that copies of the arguments are done in reverse order to help the
+%% reserve_xregs/3 function place the copies into the X registers they will
+%% need to be in.
+%%
+%% For this example, P1 needs to be in x0 and Q1 needs to be in x1. If we
+%% would copy the arguments in order the registers would be assigned like
+%% this:
+%%
+%% x0/A1 = call foo/0
+%% x1/P1 = copy P
+%% x2/Q1 = copy Q
+%% A = copy A1
+%% B = call bar/2, P1, Q1
+%%
+%% That is, both P1 and Q1 would be misplaced and would have to be
+%% moved to their correct registers before the call. However, with the
+%% copies in reverse order and with a little help from
+%% reserve_xregs/3, at least the Q1 variable can be can be placed in
+%% the correct register:
+%%
+%% x0/A1 = call foo/0
+%% x1/Q1 = copy Q
+%% x2/P1 = copy P
%% A = copy A1
-%% B = call bar/1, Var1
+%% B = call bar/2, P1, Q1
+%%
+%% In general, all but the first argument can be placed in their correct registers.
%%
place_retval_copy(I, _Yregs, none, Count, Acc) ->
+ %% There is no copy of a previous return value, so there is nothing
+ %% to gain by copying the function arguments.
{I,Count,Acc};
-place_retval_copy(#b_set{args=[F|Args0]}=I, Yregs, Copy, Count0, Acc0) ->
- #b_set{dst=Avoid} = Copy,
- {Args,Acc1,Count} = copy_func_args(Args0, Yregs, Avoid, Acc0, [], Count0),
- Acc = [Copy|Acc1],
- {I#b_set{args=[F|Args]},Count,Acc}.
-
-copy_func_args([#b_var{name=AName}=A|As], Yregs, Avoid, CopyAcc, Acc, Count0) ->
+place_retval_copy(#b_set{args=[F|Args0]}=I0, Yregs0, RetCopy, Count0, Acc0) ->
+ %% Copy function arguments, but make sure that we don't make an extra
+ %% copy of the previous return value.
+ #b_set{dst=Avoid} = RetCopy,
+ Yregs = sets:del_element(Avoid, Yregs0),
+ {Args,Acc1,Count} = copy_func_args(Args0, Yregs, Acc0, Count0),
+ I = I0#b_set{args=[F|Args]},
+
+ %% Place the copy instruction for the previous return value after the
+ %% copy instruction for the arguments.
+ Acc = [RetCopy|Acc1],
+ {I,Count,Acc}.
+
+copy_func_args(Args, Yregs, Acc, Count) ->
+ copy_func_args_1(reverse(Args), Yregs, Acc, [], Count).
+
+copy_func_args_1([#b_var{name=AName}=A|As], Yregs, InstrAcc, ArgAcc, Count0) ->
case sets:is_element(A, Yregs) of
- true when A =/= Avoid ->
+ true ->
{NewVar,Count} = new_var(AName, Count0),
Copy = #b_set{op=copy,dst=NewVar,args=[A]},
- copy_func_args(As, Yregs, Avoid, [Copy|CopyAcc], [NewVar|Acc], Count);
- _ ->
- copy_func_args(As, Yregs, Avoid, CopyAcc, [A|Acc], Count0)
+ copy_func_args_1(As, Yregs, [Copy|InstrAcc], [NewVar|ArgAcc], Count);
+ false ->
+ copy_func_args_1(As, Yregs, InstrAcc, [A|ArgAcc], Count0)
end;
-copy_func_args([A|As], Yregs, Avoid, CopyAcc, Acc, Count) ->
- copy_func_args(As, Yregs, Avoid, CopyAcc, [A|Acc], Count);
-copy_func_args([], _Yregs, _Avoid, CopyAcc, Acc, Count) ->
- {reverse(Acc),CopyAcc,Count}.
+copy_func_args_1([A|As], Yregs, InstrAcc, ArgAcc, Count) ->
+ copy_func_args_1(As, Yregs, InstrAcc, [A|ArgAcc], Count);
+copy_func_args_1([], _Yregs, InstrAcc, ArgAcc, Count) ->
+ {ArgAcc,InstrAcc,Count}.
acc_copy(Acc, none) -> Acc;
acc_copy(Acc, #b_set{}=Copy) -> [Copy|Acc].
--
2.26.2