Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
home:Ledest:erlang:26
erlang
1104-Optimize-bsr-for-small-operands.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File 1104-Optimize-bsr-for-small-operands.patch of Package erlang
From 55813dd0a871bf1aa2619e12e98fb8dc4315c97a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org> Date: Wed, 2 Aug 2023 07:05:02 +0200 Subject: [PATCH 4/7] Optimize bsr for small operands Inline the code for right shift a small operand any number steps. We used to call a helper routine when the shift count exceeded the number of bits in a small. --- erts/emulator/beam/jit/arm/instr_arith.cpp | 42 ++++++++-- erts/emulator/beam/jit/x86/instr_arith.cpp | 29 ++++++- erts/emulator/test/small_SUITE.erl | 91 +++++++++++++++++++++- 3 files changed, 149 insertions(+), 13 deletions(-) diff --git a/erts/emulator/beam/jit/arm/instr_arith.cpp b/erts/emulator/beam/jit/arm/instr_arith.cpp index a14ad5cbaf..dbdb05b86a 100644 --- a/erts/emulator/beam/jit/arm/instr_arith.cpp +++ b/erts/emulator/beam/jit/arm/instr_arith.cpp @@ -1485,34 +1485,62 @@ void BeamModuleAssembler::emit_i_bsr(const ArgLabel &Fail, if (RHS.isSmall()) { Sint shift = RHS.as<ArgSmall>().getSigned(); - if (shift >= 0 && shift < SMALL_BITS - 1) { + if (shift >= 0) { + arm::Gp small_tag = TMP1; if (always_small(LHS)) { comment("skipped test for small left operand because it is " "always small"); need_generic = false; + mov_imm(small_tag, _TAG_IMMED1_SMALL); } else if (always_one_of<BeamTypeId::Number>(LHS)) { comment("simplified test for small operand since it is a " "number"); emit_is_not_boxed(generic, lhs.reg); + mov_imm(small_tag, _TAG_IMMED1_SMALL); } else { - a.and_(TMP1, lhs.reg, imm(_TAG_IMMED1_MASK)); - a.cmp(TMP1, imm(_TAG_IMMED1_SMALL)); + a.and_(small_tag, lhs.reg, imm(_TAG_IMMED1_MASK)); + a.cmp(small_tag, imm(_TAG_IMMED1_SMALL)); a.b_ne(generic); } /* We don't need to clear the mask after shifting because * _TAG_IMMED1_SMALL will set all the bits anyway. */ ERTS_CT_ASSERT(_TAG_IMMED1_MASK == _TAG_IMMED1_SMALL); - a.asr(TMP1, lhs.reg, imm(shift)); - a.orr(dst.reg, TMP1, imm(_TAG_IMMED1_SMALL)); + shift = std::min<Sint>(shift, 63); + a.orr(dst.reg, small_tag, lhs.reg, arm::asr(shift)); if (need_generic) { a.b(next); } } else { - /* Constant shift is negative or too big to fit the `asr` - * instruction; fall back to the generic path. */ + /* Constant shift is negative; fall back to the generic + * path. */ } + } else { + auto rhs = load_source(RHS, ARG3); + + /* Ensure that both operands are small and that the shift + * count is positive. */ + ERTS_CT_ASSERT(_TAG_IMMED1_SMALL == _TAG_IMMED1_MASK); + a.ands(TMP1, rhs.reg, imm((1ull << 63) | _TAG_IMMED1_MASK)); + a.and_(TMP1, lhs.reg, TMP1); + a.ccmp(TMP1, + imm(_TAG_IMMED1_SMALL), + imm(NZCV::kNone), + arm::CondCode::kPL); + a.b_ne(generic); + + /* Calculate shift count. */ + a.asr(TMP1, rhs.reg, imm(_TAG_IMMED1_SIZE)); + mov_imm(TMP2, 63); + a.cmp(TMP1, TMP2); + a.csel(TMP1, TMP1, TMP2, imm(arm::CondCode::kLE)); + + /* Shift right. */ + ERTS_CT_ASSERT(_TAG_IMMED1_MASK == _TAG_IMMED1_SMALL); + a.asr(dst.reg, lhs.reg, TMP1); + a.orr(dst.reg, dst.reg, imm(_TAG_IMMED1_SMALL)); + a.b(next); } a.bind(generic); diff --git a/erts/emulator/beam/jit/x86/instr_arith.cpp b/erts/emulator/beam/jit/x86/instr_arith.cpp index 56c4eb06d3..35976cc048 100644 --- a/erts/emulator/beam/jit/x86/instr_arith.cpp +++ b/erts/emulator/beam/jit/x86/instr_arith.cpp @@ -1512,13 +1512,14 @@ void BeamModuleAssembler::emit_i_bsr(const ArgSource &LHS, const ArgRegister &Dst) { Label generic = a.newLabel(), next = a.newLabel(); bool need_generic = true; + bool need_register_load = true; mov_arg(ARG2, LHS); if (RHS.isSmall()) { Sint shift = RHS.as<ArgSmall>().getSigned(); - if (shift >= 0 && shift < SMALL_BITS - 1) { + if (shift >= 0) { if (always_small(LHS)) { comment("skipped test for small left operand because it is " "always small"); @@ -1532,6 +1533,7 @@ void BeamModuleAssembler::emit_i_bsr(const ArgSource &LHS, /* We don't need to clear the mask after shifting because * _TAG_IMMED1_SMALL will set all the bits anyway. */ ERTS_CT_ASSERT(_TAG_IMMED1_MASK == _TAG_IMMED1_SMALL); + shift = std::min<Sint>(shift, 63); a.sar(RET, imm(shift)); a.or_(RET, imm(_TAG_IMMED1_SMALL)); @@ -1539,14 +1541,33 @@ void BeamModuleAssembler::emit_i_bsr(const ArgSource &LHS, a.short_().jmp(next); } } else { - /* Constant shift is negative or too big to fit the `sar` - * instruction, fall back to the generic path. */ + /* Constant shift is negative; fall back to the generic + * path. */ } + } else if (hasCpuFeature(CpuFeatures::X86::kBMI2)) { + mov_arg(RET, RHS); + need_register_load = false; + + emit_are_both_small(generic, LHS, ARG2, RHS, RET); + + a.mov(ARG1, RET); + a.sar(ARG1, imm(_TAG_IMMED1_SIZE)); + a.js(generic); + + mov_imm(RET, 63); + a.cmp(ARG1, RET); + a.cmova(ARG1, RET); + + a.sarx(RET, ARG2, ARG1); + a.or_(RET, imm(_TAG_IMMED1_SMALL)); + a.short_().jmp(next); } a.bind(generic); if (need_generic) { - mov_arg(RET, RHS); + if (need_register_load) { + mov_arg(RET, RHS); + } if (Fail.get() != 0) { safe_fragment_call(ga->get_i_bsr_guard_shared()); diff --git a/erts/emulator/test/small_SUITE.erl b/erts/emulator/test/small_SUITE.erl index bb4d69e355..aa732c1e24 100644 --- a/erts/emulator/test/small_SUITE.erl +++ b/erts/emulator/test/small_SUITE.erl @@ -25,7 +25,7 @@ -export([edge_cases/1, addition/1, subtraction/1, negation/1, multiplication/1, mul_add/1, division/1, - test_bitwise/1, test_bsl/1, + test_bitwise/1, test_bsl/1, test_bsr/1, element/1, range_optimization/1]). -export([mul_add/0, division/0]). @@ -43,7 +43,7 @@ groups() -> [{p, [parallel], [edge_cases, addition, subtraction, negation, multiplication, mul_add, division, - test_bitwise, test_bsl, + test_bitwise, test_bsl, test_bsr, element, range_optimization]}]. @@ -1219,6 +1219,93 @@ test_bsl([{Name,{N,S}}|T], Mod) -> test_bsl([], _) -> ok. +test_bsr(_Config) -> + _ = rand:uniform(), %Seed generator + io:format("Seed: ~p", [rand:export_seed()]), + Mod = list_to_atom(lists:concat([?MODULE,"_",?FUNCTION_NAME])), + Pairs = bsr_gen_pairs(), + Fs0 = gen_func_names(Pairs, 0), + Fs = [gen_bsr_function(F) || F <- Fs0], + Tree = ?Q(["-module('@Mod@').", + "-compile([export_all,nowarn_export_all]).", + "id(I) -> I."]) ++ Fs, + %% merl:print(Tree), + {ok,_Bin} = merl:compile_and_load(Tree, []), + test_bsr(Fs0, Mod), + unload(Mod), + ok. + +bsr_gen_pairs() -> + {_MinSmall, MaxSmall} = determine_small_limits(0), + SmallBits = num_bits(MaxSmall), + + {Powers,Shifts} = + if + SmallBits < 32 -> + {lists:seq(15, SmallBits+2), + lists:seq(0, 7) ++ lists:seq(24, 36)}; + true -> + {lists:seq(30, SmallBits+2), + lists:seq(0, 7) ++ lists:seq(56, 72)} + end, + + [{N,S} || + P <- Powers, + N <- [rand:uniform(1 bsl P), (1 bsl P)-1], + S <- Shifts]. + +gen_bsr_function({Name,{N,S}}) -> + Mask = (1 bsl num_bits(N)) - 1, + ?Q("'@Name@'(N0, fixed, More) -> + Res = N0 bsr _@S@, + if + More -> + N = N0 band _@Mask@, + Res = N0 bsr _@S@, + Res = N bsr _@S@; + true -> + Res + end; + '@Name@'(N0, S, More) -> + Res = id(N0 bsr S), + if + More -> + N = N0 band _@Mask@, + Res = id(N0 bsr S), + Res = id(N bsr S), + if + S >= 0 -> + Res = id(N bsr S); + true -> + Res + end; + true -> + Res + end. "). + +test_bsr([{Name,{N,S}}|T], Mod) -> + try + Res = N bsr S, + Res = Mod:Name(N, fixed, true), + Res = Mod:Name(N, S, true), + + NegRes = -N bsr S, + NegRes = Mod:Name(-N, fixed, false), + + NegRes = -N bsr S, + NegRes = Mod:Name(-N, S, false), + + BslRes = N bsr -S, + BslRes = Mod:Name(N, -S, false) + catch + C:R:Stk -> + io:format("~p failed. numbers: ~p ~p\n", [Name,N,S]), + erlang:raise(C, R, Stk) + end, + test_bsr(T, Mod); +test_bsr([], _) -> + ok. + element(_Config) -> %% Test element_1: Can't fail for integer arguments. zero = element_1(0), -- 2.35.3
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor