Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
home:Ledest:erlang:23
erlang
2941-Reduce-register-shuffling.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File 2941-Reduce-register-shuffling.patch of Package erlang
From 3643376f3f789f2f4920af43978d13d400d746cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org> Date: Tue, 16 Feb 2021 03:53:22 +0100 Subject: [PATCH] Reduce register shuffling Consider this code: send_deadline_cont(SockRef, Bin, Cont, Deadline, HasWritten) -> SelectHandle = make_ref(), Sent = prim_socket:send(SockRef, Bin, Cont, SelectHandle), {SockRef, Bin, SelectHandle, Deadline, HasWritten, Sent}. Looking closer at the BEAM code between the two calls we see four `move` instructions followed by three `swap` instructions: {call_ext,0,{extfunc,erlang,make_ref,0}}. {move,{y,2},{x,3}}. {move,{y,3},{x,2}}. {move,{x,0},{y,2}}. {move,{y,4},{x,1}}. {swap,{x,2},{x,1}}. {swap,{x,3},{x,2}}. {swap,{x,0},{x,3}}. {call_ext,4,{extfunc,prim_socket,send,4}}. It turns out that the sequence of `move` and `swap` instructions is longer than it needs to be, because the initial two `move` instructions move the terms in two of the Y registers to the wrong X registers. Two of the `swap` instructions are needed merely to move the terms to the right registers. This commit updates optimizations in `beam_ssa_pre_codegen` and `beam_block` to eliminate the extra instructions: {call_ext,0,{extfunc,erlang,make_ref,0}}. {move,{y,2},{x,2}}. {move,{y,3},{x,1}}. {move,{y,4},{x,3}}. {move,{x,0},{y,2}}. {swap,{x,0},{x,3}}. {call_ext,4,{extfunc,prim_socket,send,4}}. By counting the number of inserted and deleted lines in the diff between a run of the `diffable` before this change and a run after this change, we can see that this commit results in 1099 fewer instructions in total. --- lib/compiler/src/beam_block.erl | 51 ++++++++++-- lib/compiler/src/beam_ssa_pre_codegen.erl | 95 ++++++++++++++++------- 2 files changed, 112 insertions(+), 34 deletions(-) diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl index 6bc334963f..e8d024b20b 100644 --- a/lib/compiler/src/beam_block.erl +++ b/lib/compiler/src/beam_block.erl @@ -22,7 +22,7 @@ -module(beam_block). -export([module/2]). --import(lists, [keysort/2,reverse/1,splitwith/2]). +-import(lists, [keysort/2,reverse/1,reverse/2,splitwith/2]). -spec module(beam_utils:module_code(), [compile:option()]) -> {'ok',beam_utils:module_code()}. @@ -51,20 +51,55 @@ function({function,Name,Arity,CLabel,Is0}) -> %%% for the moves generated by a sequence of SSA instructions. %%% Therefore, this optimization is needed. %%% +%%% We'll need to handle non-consecutive sequences of moves, such +%%% as the following instruction sequence: +%%% +%%% move y2, x2 +%%% move x0, y2 +%%% move y1, x1 +%%% init_yregs [y1] +%%% move x2, x0 +%%% +%%% The first two `move` instructions and the last `move` instruction +%%% should be combined to a `swap` instruction: +%%% +%%% swap y2, x0 +%%% move y1, x1 +%%% init_yregs [y1] +%%% +%%% (Provided that x2 is killed in the code that follows.) +%%% -swap_opt([{move,Reg1,{x,X}=Temp}=Move1, - {move,Reg2,Reg1}=Move2, - {move,Temp,Reg2}=Move3|Is]) when Reg1 =/= Temp -> - case is_unused(X, Is) of - true -> +swap_opt([{move,Reg1,{x,_}=Temp}=Move1, + {move,Reg2,Reg1}=Move2|Is0]) when Reg1 =/= Temp -> + case swap_opt_end(Is0, Temp, Reg2, []) of + {yes,Is} -> [{swap,Reg1,Reg2}|swap_opt(Is)]; - false -> - [Move1|swap_opt([Move2,Move3|Is])] + no -> + [Move1|swap_opt([Move2|Is0])] end; swap_opt([I|Is]) -> [I|swap_opt(Is)]; swap_opt([]) -> []. +swap_opt_end([{move,S,D}=I|Is], Temp, Dst, Acc) -> + case {S,D} of + {Temp,Dst} -> + {x,X} = Temp, + case is_unused(X, Is) of + true -> {yes,reverse(Acc, Is)}; + false -> no + end; + {Temp,_} -> no; + {Dst,_} -> no; + {_,Temp} -> no; + {_,Dst} -> no; + {_,_} -> swap_opt_end(Is, Temp, Dst, [I|Acc]) + end; +swap_opt_end([{init_yregs,_}=I|Is], Temp, Dst, Acc) -> + swap_opt_end(Is, Temp, Dst, [I|Acc]); +swap_opt_end(_, _, _, _) -> no. + is_unused(X, [{call,A,_}|_]) when A =< X -> true; is_unused(X, [{call_ext,A,_}|_]) when A =< X -> true; is_unused(X, [{make_fun2,_,_,_,A}|_]) when A =< X -> true; diff --git a/lib/compiler/src/beam_ssa_pre_codegen.erl b/lib/compiler/src/beam_ssa_pre_codegen.erl index 2337ec2b19..6ff4e3001f 100644 --- a/lib/compiler/src/beam_ssa_pre_codegen.erl +++ b/lib/compiler/src/beam_ssa_pre_codegen.erl @@ -2064,46 +2064,89 @@ copy_retval_is([], RC, _, Copy, Count, Acc) -> %% %% Consider this code: %% -%% Var = ... +%% P = ... +%% Q = ... %% ... -%% A1 = call foo/0 -%% A = copy A1 -%% B = call bar/1, Var +%% A = call foo/0 +%% A1 = copy A +%% B = call bar/2, P, Q %% -%% If the Var variable is no longer used after this code, its Y register -%% can't be reused for A. To allow the Y register to be reused -%% we will need to insert 'copy' instructions for arguments that are -%% in Y registers: +%% If the P or Q variables are no longer used after this code, one of +%% their Y registers can't be reused for A. To allow one of the Y registers to +%% be reused we will need to insert 'copy' instructions for arguments +%% that are in Y registers: %% -%% Var = ... +%% P = ... +%% Q = ... %% ... %% A1 = call foo/0 -%% Var1 = copy Var +%% Q1 = copy Q +%% P1 = copy P +%% A = copy A1 +%% B = call bar/2, P1, Q1 +%% +%% Note that copies of the arguments are done in reverse order to help the +%% reserve_xregs/3 function place the copies into the X registers they will +%% need to be in. +%% +%% For this example, P1 needs to be in x0 and Q1 needs to be in x1. If we +%% would copy the arguments in order the registers would be assigned like +%% this: +%% +%% x0/A1 = call foo/0 +%% x1/P1 = copy P +%% x2/Q1 = copy Q +%% A = copy A1 +%% B = call bar/2, P1, Q1 +%% +%% That is, both P1 and Q1 would be misplaced and would have to be +%% moved to their correct registers before the call. However, with the +%% copies in reverse order and with a little help from +%% reserve_xregs/3, at least the Q1 variable can be can be placed in +%% the correct register: +%% +%% x0/A1 = call foo/0 +%% x1/Q1 = copy Q +%% x2/P1 = copy P %% A = copy A1 -%% B = call bar/1, Var1 +%% B = call bar/2, P1, Q1 +%% +%% In general, all but the first argument can be placed in their correct registers. %% place_retval_copy(I, _Yregs, none, Count, Acc) -> + %% There is no copy of a previous return value, so there is nothing + %% to gain by copying the function arguments. {I,Count,Acc}; -place_retval_copy(#b_set{args=[F|Args0]}=I, Yregs, Copy, Count0, Acc0) -> - #b_set{dst=Avoid} = Copy, - {Args,Acc1,Count} = copy_func_args(Args0, Yregs, Avoid, Acc0, [], Count0), - Acc = [Copy|Acc1], - {I#b_set{args=[F|Args]},Count,Acc}. - -copy_func_args([#b_var{name=AName}=A|As], Yregs, Avoid, CopyAcc, Acc, Count0) -> +place_retval_copy(#b_set{args=[F|Args0]}=I0, Yregs0, RetCopy, Count0, Acc0) -> + %% Copy function arguments, but make sure that we don't make an extra + %% copy of the previous return value. + #b_set{dst=Avoid} = RetCopy, + Yregs = sets:del_element(Avoid, Yregs0), + {Args,Acc1,Count} = copy_func_args(Args0, Yregs, Acc0, Count0), + I = I0#b_set{args=[F|Args]}, + + %% Place the copy instruction for the previous return value after the + %% copy instruction for the arguments. + Acc = [RetCopy|Acc1], + {I,Count,Acc}. + +copy_func_args(Args, Yregs, Acc, Count) -> + copy_func_args_1(reverse(Args), Yregs, Acc, [], Count). + +copy_func_args_1([#b_var{name=AName}=A|As], Yregs, InstrAcc, ArgAcc, Count0) -> case sets:is_element(A, Yregs) of - true when A =/= Avoid -> + true -> {NewVar,Count} = new_var(AName, Count0), Copy = #b_set{op=copy,dst=NewVar,args=[A]}, - copy_func_args(As, Yregs, Avoid, [Copy|CopyAcc], [NewVar|Acc], Count); - _ -> - copy_func_args(As, Yregs, Avoid, CopyAcc, [A|Acc], Count0) + copy_func_args_1(As, Yregs, [Copy|InstrAcc], [NewVar|ArgAcc], Count); + false -> + copy_func_args_1(As, Yregs, InstrAcc, [A|ArgAcc], Count0) end; -copy_func_args([A|As], Yregs, Avoid, CopyAcc, Acc, Count) -> - copy_func_args(As, Yregs, Avoid, CopyAcc, [A|Acc], Count); -copy_func_args([], _Yregs, _Avoid, CopyAcc, Acc, Count) -> - {reverse(Acc),CopyAcc,Count}. +copy_func_args_1([A|As], Yregs, InstrAcc, ArgAcc, Count) -> + copy_func_args_1(As, Yregs, InstrAcc, [A|ArgAcc], Count); +copy_func_args_1([], _Yregs, InstrAcc, ArgAcc, Count) -> + {ArgAcc,InstrAcc,Count}. acc_copy(Acc, none) -> Acc; acc_copy(Acc, #b_set{}=Copy) -> [Copy|Acc]. -- 2.26.2
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor