Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
home:Ledest:erlang:26
erlang
1155-Optimize-select_val-with-consecutive-ident...
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File 1155-Optimize-select_val-with-consecutive-identical-label.patch of Package erlang
From 57bc32d84cbce65a4e6103ac88c3c22ad9a059fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org> Date: Sat, 22 Jul 2023 08:27:50 +0200 Subject: [PATCH 25/25] Optimize select_val with consecutive identical labels --- erts/emulator/beam/jit/arm/beam_asm.hpp | 7 +- erts/emulator/beam/jit/arm/instr_select.cpp | 150 +++++++++++++------- erts/emulator/beam/jit/x86/beam_asm.hpp | 6 +- erts/emulator/beam/jit/x86/instr_select.cpp | 58 ++++---- erts/emulator/test/beam_SUITE.erl | 47 +++++- 5 files changed, 174 insertions(+), 94 deletions(-) diff --git a/erts/emulator/beam/jit/arm/beam_asm.hpp b/erts/emulator/beam/jit/arm/beam_asm.hpp index 11b796dfbf..7ab52fb5c0 100644 --- a/erts/emulator/beam/jit/arm/beam_asm.hpp +++ b/erts/emulator/beam/jit/arm/beam_asm.hpp @@ -1289,9 +1289,10 @@ protected: Label fail, const Span<ArgVal> &args); - bool emit_optimized_three_way_select(arm::Gp reg, - Label fail, - const Span<ArgVal> &args); + void emit_optimized_two_way_select(arm::Gp reg, + const ArgVal &value1, + const ArgVal &value2, + const ArgVal &label); #ifdef DEBUG void emit_tuple_assertion(const ArgSource &Src, arm::Gp tuple_reg); diff --git a/erts/emulator/beam/jit/arm/instr_select.cpp b/erts/emulator/beam/jit/arm/instr_select.cpp index 9037d63bf1..494c8e5233 100644 --- a/erts/emulator/beam/jit/arm/instr_select.cpp +++ b/erts/emulator/beam/jit/arm/instr_select.cpp @@ -198,17 +198,51 @@ void BeamModuleAssembler::emit_linear_search(arm::Gp comparand, const Span<ArgVal> &args) { int count = args.size() / 2; + ASSERT(count < 128); + check_pending_stubs(); + for (int i = 0; i < count; i++) { const ArgVal &value = args[i]; const ArgVal &label = args[i + count]; + int j; + int n = 1; - if ((i % 128) == 0) { - /* Checking veneers on the first element is intentional. */ - check_pending_stubs(); + for (j = i + 1; j < count && args[j + count] == label; j++) { + n++; } + if (n < 2) { + cmp_arg(comparand, value); + a.b_eq(resolve_beam_label(label, disp1MB)); + } else { + int in_range = 1; + + for (j = i + 1; j < n; j++) { + if (!(value.isWord() && value.as<ArgWord>().get() + in_range == + args[j].as<ArgWord>().get())) { + break; + } + in_range++; + } - cmp_arg(comparand, value); - a.b_eq(resolve_beam_label(label, disp1MB)); + if (in_range > 2) { + uint64_t first = value.as<ArgWord>().get(); + + if (first == 0) { + a.cmp(comparand, imm(in_range)); + } else { + sub(TMP6, comparand, first); + a.cmp(TMP6, imm(in_range)); + } + a.b_lo(resolve_beam_label(label, disp1MB)); + i += in_range - 1; + } else { + emit_optimized_two_way_select(comparand, + value, + args[i + 1], + label); + i++; + } + } } /* An invalid label means fallthrough to the next instruction. */ @@ -239,7 +273,7 @@ void BeamModuleAssembler::emit_i_select_tuple_arity(const ArgRegister &Src, } Label fail = rawLabels[Fail.get()]; - emit_linear_search(TMP1, fail, args); + emit_binsearch_nodes(TMP1, 0, args.size() / 2 - 1, fail, args); } void BeamModuleAssembler::emit_i_select_val_lins(const ArgSource &Src, @@ -284,16 +318,11 @@ void BeamModuleAssembler::emit_i_select_val_lins(const ArgSource &Src, auto shift = plan.second; if (base == 0 && shift == 0) { - if (!emit_optimized_three_way_select(src.reg, fail, args)) { - emit_linear_search(src.reg, fail, args); - } + emit_linear_search(src.reg, fail, args); } else { auto untagged = emit_select_untag(Src, args, src.reg, next, base, shift); - - if (!emit_optimized_three_way_select(ARG1, fail, untagged)) { - emit_linear_search(ARG1, fail, untagged); - } + emit_linear_search(ARG1, fail, untagged); } if (!Fail.isLabel()) { @@ -467,7 +496,7 @@ void BeamModuleAssembler::emit_i_jump_on_val(const ArgSource &Src, a.bind(data); if (embedInText) { for (const ArgVal &arg : args) { - ASSERT(arg.getType() == ArgVal::Label); + ASSERT(arg.getType() == ArgVal::Label); a.embedLabel(rawLabels[arg.as<ArgLabel>().get()]); } } @@ -478,52 +507,65 @@ void BeamModuleAssembler::emit_i_jump_on_val(const ArgSource &Src, } /* - * Attempt to optimize the case when a select_val has exactly two - * values which only differ by one bit and they both branch to the - * same label. + * Optimize the case when a select_val has exactly two values that + * both branch to the same label. * - * The optimization makes use of the observation that (V == X || V == - * Y) is equivalent to (V | (X ^ Y)) == (X | Y) when (X ^ Y) has only - * one bit set. + * If the values only differ by one bit, the optimization makes use of + * the observation that (V == X || V == Y) is equivalent to (V | (X ^ + * Y)) == (X | Y) when (X ^ Y) has only one bit set. + * + * If more than one bit differ, one conditional branch instruction can + * still be eliminated by using the CCMP instruction. * * Return true if the optimization was possible. */ -bool BeamModuleAssembler::emit_optimized_three_way_select( - arm::Gp reg, - Label fail, - const Span<ArgVal> &args) { - if (args.size() != 4 || (args[2] != args[3])) { - return false; - } - - uint64_t x = args[0].isImmed() ? args[0].as<ArgImmed>().get() - : args[0].as<ArgWord>().get(); - uint64_t y = args[1].isImmed() ? args[1].as<ArgImmed>().get() - : args[1].as<ArgWord>().get(); - uint64_t combined = x | y; +void BeamModuleAssembler::emit_optimized_two_way_select(arm::Gp reg, + const ArgVal &value1, + const ArgVal &value2, + const ArgVal &label) { + uint64_t x = value1.isImmed() ? value1.as<ArgImmed>().get() + : value1.as<ArgWord>().get(); + uint64_t y = value2.isImmed() ? value2.as<ArgImmed>().get() + : value2.as<ArgWord>().get(); uint64_t diff = x ^ y; - ArgWord val(combined); - - if ((diff & (diff - 1)) != 0) { - return false; - } - - comment("(Src == 0x%x || Src == 0x%x) <=> (Src | 0x%x) == 0x%x", - x, - y, - diff, - combined); + /* Be sure to use a register not used by any caller. */ + arm::Gp tmp = TMP6; - a.orr(TMP1, reg, imm(diff)); - cmp_arg(TMP1, val); - a.b_eq(resolve_beam_label(args[2], disp1MB)); - - /* An invalid label means fallthrough to the next instruction. */ - if (fail.isValid()) { - a.b(resolve_label(fail, disp128MB)); - mark_unreachable_check_pending_stubs(); + if (x + 1 == y) { + comment("(Src == %ld || Src == %ld) <=> (Src - %ld) < 2", x, y, x); + if (x == 0) { + a.cmp(reg, imm(2)); + } else { + sub(tmp, reg, x); + a.cmp(tmp, imm(2)); + } + a.b_lo(resolve_beam_label(label, disp1MB)); + } else if ((diff & (diff - 1)) == 0) { + uint64_t combined = x | y; + ArgWord val(combined); + + comment("(Src == 0x%x || Src == 0x%x) <=> (Src | 0x%x) == 0x%x", + x, + y, + diff, + combined); + + a.orr(tmp, reg, imm(diff)); + cmp_arg(tmp, val); + a.b_eq(resolve_beam_label(label, disp1MB)); + } else { + if (x < 32) { + cmp(reg, y); + a.ccmp(reg, imm(x), imm(NZCV::kEqual), imm(arm::CondCode::kNE)); + } else if (-y < 32) { + cmp(reg, x); + a.ccmn(reg, imm(-y), imm(NZCV::kEqual), imm(arm::CondCode::kNE)); + } else { + cmp(reg, x); + a.mov(tmp, y); + a.ccmp(reg, tmp, imm(NZCV::kEqual), imm(arm::CondCode::kNE)); + } + a.b_eq(resolve_beam_label(label, disp1MB)); } - - return true; } diff --git a/erts/emulator/beam/jit/x86/beam_asm.hpp b/erts/emulator/beam/jit/x86/beam_asm.hpp index 907209bb30..648a4bbc73 100644 --- a/erts/emulator/beam/jit/x86/beam_asm.hpp +++ b/erts/emulator/beam/jit/x86/beam_asm.hpp @@ -1364,8 +1364,10 @@ protected: const ArgVal &Fail, const Span<ArgVal> &args); - bool emit_optimized_three_way_select(const ArgVal &Fail, - const Span<ArgVal> &args); + bool emit_optimized_two_way_select(bool destructive, + const ArgVal &value1, + const ArgVal &value2, + const ArgVal &label); #ifdef DEBUG void emit_tuple_assertion(const ArgSource &Src, x86::Gp tuple_reg); diff --git a/erts/emulator/beam/jit/x86/instr_select.cpp b/erts/emulator/beam/jit/x86/instr_select.cpp index 529e1f4f01..fe1736fc7b 100644 --- a/erts/emulator/beam/jit/x86/instr_select.cpp +++ b/erts/emulator/beam/jit/x86/instr_select.cpp @@ -31,6 +31,16 @@ void BeamModuleAssembler::emit_linear_search(x86::Gp comparand, const ArgImmed &value = args[i]; const ArgLabel &label = args[i + count]; + if (i < count - 1 && label == args[i + count + 1]) { + if (emit_optimized_two_way_select(i == count - 2, + value, + args[i + 1], + label)) { + i++; + continue; + } + } + cmp_arg(comparand, value, ARG1); a.je(resolve_beam_label(label)); } @@ -86,10 +96,6 @@ void BeamModuleAssembler::emit_i_select_val_lins(const ArgSource &Src, mov_arg(ARG2, Src); - if (emit_optimized_three_way_select(Fail, args)) { - return; - } - emit_linear_search(ARG2, Fail, args); } @@ -157,9 +163,7 @@ void BeamModuleAssembler::emit_binsearch_nodes(size_t Left, args.begin() + Left + count, args.begin() + count + Left + remaining); - if (!emit_optimized_three_way_select(Fail, shrunk)) { - emit_linear_search(ARG2, Fail, shrunk); - } + emit_linear_search(ARG2, Fail, shrunk); return; } @@ -249,18 +253,17 @@ void BeamModuleAssembler::emit_i_jump_on_val(const ArgSource &Src, * one bit set. * * ARG2 contains the value. - * Return true if the optimization was possible, in - * which case ARG1 should be considered trashed. + * + * Return true if the optimization was possible, in which case ARG1 + * and RET should be considered trashed. If the destructive argument + * is true, ARG2 will also be trashed. */ -bool BeamModuleAssembler::emit_optimized_three_way_select( - const ArgVal &Fail, - const Span<ArgVal> &args) { - if (args.size() != 4 || (args[2] != args[3])) { - return false; - } - - uint64_t x = args[0].as<ArgImmed>().get(); - uint64_t y = args[1].as<ArgImmed>().get(); +bool BeamModuleAssembler::emit_optimized_two_way_select(bool destructive, + const ArgVal &value1, + const ArgVal &value2, + const ArgVal &label) { + uint64_t x = value1.as<ArgImmed>().get(); + uint64_t y = value2.as<ArgImmed>().get(); uint64_t combined = x | y; uint64_t diff = x ^ y; ArgVal val(ArgVal::Immediate, combined); @@ -273,22 +276,15 @@ bool BeamModuleAssembler::emit_optimized_three_way_select( diff, combined); - if (Support::isInt32((Sint)diff)) { + if (destructive && Support::isInt32((Sint)diff)) { a.or_(ARG2, imm(diff)); + cmp_arg(ARG2, val, RET); } else { - a.mov(ARG1, imm(diff)); - a.or_(ARG2, ARG1); - } - - cmp_arg(ARG2, val, ARG1); - a.je(resolve_beam_label(args[2])); - - if (Fail.isLabel()) { - a.jmp(resolve_beam_label(Fail)); - } else { - /* NIL means fallthrough to the next instruction. */ - ASSERT(Fail.isNil()); + mov_imm(RET, diff); + a.or_(RET, ARG2); + cmp_arg(RET, val, ARG1); } + a.je(resolve_beam_label(label)); return true; } diff --git a/erts/emulator/test/beam_SUITE.erl b/erts/emulator/test/beam_SUITE.erl index c38aa9a1ab..eff8c6b426 100644 --- a/erts/emulator/test/beam_SUITE.erl +++ b/erts/emulator/test/beam_SUITE.erl @@ -24,7 +24,7 @@ init_per_group/2,end_per_group/2, packed_registers/1, apply_last/1, apply_last_bif/1, heap_sizes/1, big_lists/1, fconv/1, - select_val/1, + select_val/1, select_tuple_arity/1, swap_temp_apply/1, beam_init_yregs/1]). -export([applied/2,swap_temp_applied/1]). @@ -36,9 +36,9 @@ suite() -> [{ct_hooks,[ts_install_cth]}]. all() -> [packed_registers, apply_last, apply_last_bif, - heap_sizes, big_lists, select_val, - swap_temp_apply, - beam_init_yregs]. + heap_sizes, big_lists, + select_val, select_tuple_arity, + swap_temp_apply, beam_init_yregs]. groups() -> []. @@ -397,6 +397,45 @@ make_func(Name, List) -> Cs = [?Q(["(_@I@) -> _@Body@"]) || {I,Body} <- List], erl_syntax:function(erl_syntax:atom(Name), Cs). +select_tuple_arity(_Config) -> + Mod = ?FUNCTION_NAME, + + {Vs,Cs} = make_tuple_tests(300, [], []), + Name = erl_syntax:atom(match_tuple), + F = erl_syntax:function(Name, Cs), + Code = ?Q(["-module('@Mod@').\n" + "-export([match_tuple/1]).\n"]) ++ [F], + + merl:compile_and_load(Code, []), + + %% %% Uncomment the following line to print the generated code. + %% merl:print(Code), + + verify_tuple_match(Vs, Mod), + + %% Clean up. + true = code:delete(Mod), + false = code:purge(Mod), + + ok. + +make_tuple_tests(0, Clauses, Acc) -> + {Acc,Clauses}; +make_tuple_tests(Size, Clauses, Acc) -> + V = erlang:phash2(Size), + Es = lists:duplicate(Size, erl_syntax:underscore()), + Tuple = erl_syntax:tuple(Es), + Value = erl_syntax:integer(V), + Clause = erl_syntax:clause([Tuple], [], [Value]), + make_tuple_tests(Size-1, [Clause|Clauses], [{Size,V}|Acc]). + +verify_tuple_match([{Size,Result}|T], Mod) -> + Tuple = erlang:make_tuple(Size, a), + Result = Mod:match_tuple(Tuple), + verify_tuple_match(T, Mod); +verify_tuple_match([], _) -> + ok. + swap_temp_apply(_Config) -> {swap_temp_applied,42} = do_swap_temp_apply(41), not_an_integer = do_swap_temp_apply(not_an_integer), -- 2.35.3
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor