File 1137-AArch64-Inline-more-code-for-element-2.patch of Package erlang
From f51aedea765e5ee44322e7e118d9e658b03a5baf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Sun, 17 Sep 2023 08:09:02 +0200
Subject: [PATCH 17/25] AArch64: Inline more code for element/2
Inline the code for `element/2` when the first argument (the position)
is not a literal but the second argument is known to be a tuple. That
typically requires 13 native instructions.
---
.../beam/jit/arm/instr_guard_bifs.cpp | 98 ++++++++++++++++---
erts/emulator/test/tuple_SUITE.erl | 23 +++++
2 files changed, 106 insertions(+), 15 deletions(-)
diff --git a/erts/emulator/beam/jit/arm/instr_guard_bifs.cpp b/erts/emulator/beam/jit/arm/instr_guard_bifs.cpp
index 3077c8df84..75985eb623 100644
--- a/erts/emulator/beam/jit/arm/instr_guard_bifs.cpp
+++ b/erts/emulator/beam/jit/arm/instr_guard_bifs.cpp
@@ -582,10 +582,6 @@ void BeamModuleAssembler::emit_bif_byte_size(const ArgLabel &Fail,
* the operation fails.
*/
void BeamGlobalAssembler::emit_bif_element_helper(Label fail) {
- a.and_(TMP1, ARG1, imm(_TAG_IMMED1_MASK));
- a.cmp(TMP1, imm(_TAG_IMMED1_SMALL));
- a.b_ne(fail);
-
/* Ensure that ARG2 contains a tuple. */
emit_is_boxed(fail, ARG2);
arm::Gp boxed_ptr = emit_ptr_val(TMP1, ARG2);
@@ -595,13 +591,15 @@ void BeamGlobalAssembler::emit_bif_element_helper(Label fail) {
a.tst(TMP2, imm(_TAG_HEADER_MASK));
a.b_ne(fail);
+ a.and_(TMP3, ARG1, imm(_TAG_IMMED1_MASK));
+ a.cmp(TMP3, imm(_TAG_IMMED1_SMALL));
+ a.ccmp(ARG1, make_small(0), imm(NZCV::kZF), imm(arm::CondCode::kEQ));
+ a.b_eq(fail);
+
/* Ensure that the position points within the tuple. */
- a.lsr(TMP2, TMP2, imm(_HEADER_ARITY_OFFS));
a.asr(TMP3, ARG1, imm(_TAG_IMMED1_SIZE));
- a.cmp(TMP3, imm(1));
- a.b_mi(fail);
- a.cmp(TMP2, TMP3);
- a.b_lo(fail);
+ a.cmp(TMP3, TMP2, arm::lsr(_HEADER_ARITY_OFFS));
+ a.b_hi(fail);
a.ldr(ARG1, arm::Mem(TMP1, TMP3, arm::lsl(3)));
a.ret(a64::x30);
@@ -719,29 +717,99 @@ void BeamModuleAssembler::emit_bif_element(const ArgLabel &Fail,
const_position = Pos.isSmall() && Pos.as<ArgSmall>().getSigned() > 0 &&
Pos.as<ArgSmall>().getSigned() <= (Sint)MAX_ARITYVAL;
- if (const_position && exact_type<BeamTypeId::Tuple>(Tuple)) {
- comment("simplified element/2 because arguments are known types");
+ if (const_position) {
+ if (exact_type<BeamTypeId::Tuple>(Tuple)) {
+ comment("simplified element/2 because arguments are known types");
+ } else {
+ comment("simplified element/2 because position is constant");
+ }
auto tuple = load_source(Tuple, ARG2);
auto dst = init_destination(Dst, ARG1);
Uint position = Pos.as<ArgSmall>().getUnsigned();
- arm::Gp boxed_ptr = emit_ptr_val(TMP1, tuple.reg);
+ arm::Gp boxed_ptr;
+ Label fail = a.newLabel();
+
+ if (exact_type<BeamTypeId::Tuple>(Tuple)) {
+ boxed_ptr = emit_ptr_val(TMP1, tuple.reg);
+ a.ldur(TMP2, emit_boxed_val(boxed_ptr));
+ ERTS_CT_ASSERT(make_arityval_zero() == 0);
+ cmp(TMP2, position << _HEADER_ARITY_OFFS);
+ } else {
+ if (Fail.get() != 0) {
+ emit_is_boxed(resolve_beam_label(Fail, dispUnknown),
+ Tuple,
+ tuple.reg);
+ } else {
+ emit_is_boxed(fail, Tuple, tuple.reg);
+ }
+ boxed_ptr = emit_ptr_val(TMP1, tuple.reg);
+ a.ldur(TMP2, emit_boxed_val(boxed_ptr));
+ mov_imm(TMP3, position << _HEADER_ARITY_OFFS);
+ ERTS_CT_ASSERT(make_arityval_zero() == 0);
+ a.tst(TMP2, imm(_TAG_HEADER_MASK));
+ a.ccmp(TMP2, TMP3, imm(NZCV::kNone), imm(arm::CondCode::kEQ));
+ }
- a.ldur(TMP2, emit_boxed_val(boxed_ptr));
- ERTS_CT_ASSERT(make_arityval_zero() == 0);
- cmp(TMP2, position << _HEADER_ARITY_OFFS);
if (Fail.get() != 0) {
a.b_lo(resolve_beam_label(Fail, disp1MB));
+ a.bind(fail);
} else {
Label good = a.newLabel();
+
a.b_hs(good);
+
+ a.bind(fail);
mov_arg(ARG1, Pos);
mov_var(ARG2, tuple);
fragment_call(ga->get_handle_element_error_shared());
+
a.bind(good);
}
+ /* Fetch the element. */
safe_ldur(dst.reg, emit_boxed_val(boxed_ptr, position << 3));
flush_var(dst);
+ } else if (exact_type<BeamTypeId::Tuple>(Tuple) && Fail.get() == 0) {
+ auto [pos, tuple] = load_sources(Pos, ARG1, Tuple, ARG2);
+ auto dst = init_destination(Dst, ARG1);
+ arm::Gp boxed_ptr = emit_ptr_val(TMP1, tuple.reg);
+ Label fail = a.newLabel();
+ Label good = a.newLabel();
+
+ lea(TMP1, emit_boxed_val(boxed_ptr));
+ a.ldr(TMP2, arm::Mem(TMP1));
+
+ if (always_one_of<BeamTypeId::Integer, BeamTypeId::AlwaysBoxed>(Pos)) {
+ ERTS_CT_ASSERT(_TAG_PRIMARY_MASK - TAG_PRIMARY_LIST ==
+ TAG_PRIMARY_BOXED);
+ a.tst(pos.reg, imm(TAG_PRIMARY_LIST));
+ a.ccmp(pos.reg,
+ make_small(0),
+ imm(NZCV::kZF),
+ imm(arm::CondCode::kNE));
+ } else {
+ a.and_(TMP3, pos.reg, imm(_TAG_IMMED1_MASK));
+ a.cmp(TMP3, imm(_TAG_IMMED1_SMALL));
+ a.ccmp(pos.reg,
+ make_small(0),
+ imm(NZCV::kZF),
+ imm(arm::CondCode::kEQ));
+ }
+ a.b_eq(fail);
+
+ /* Ensure that the position points within the tuple. */
+ a.asr(TMP3, pos.reg, imm(_TAG_IMMED1_SIZE));
+ a.cmp(TMP3, TMP2, arm::lsr(_HEADER_ARITY_OFFS));
+ a.b_ls(good);
+
+ a.bind(fail);
+ mov_arg(ARG1, Pos);
+ mov_var(ARG2, tuple);
+ fragment_call(ga->get_handle_element_error_shared());
+
+ a.bind(good);
+ a.ldr(dst.reg, arm::Mem(TMP1, TMP3, arm::lsl(3)));
+ flush_var(dst);
} else {
/* Too much code to inline. Call a helper fragment. */
mov_arg(ARG1, Pos);
diff --git a/erts/emulator/test/tuple_SUITE.erl b/erts/emulator/test/tuple_SUITE.erl
index e665c55003..648488a5ab 100644
--- a/erts/emulator/test/tuple_SUITE.erl
+++ b/erts/emulator/test/tuple_SUITE.erl
@@ -141,6 +141,7 @@ t_element(Config) when is_list(Config) ->
{'EXIT', {badarg, _}} = (catch element(1, id({}))),
{'EXIT', {badarg, _}} = (catch element(1, id([a,b]))),
{'EXIT', {badarg, _}} = (catch element(1, id(42))),
+ {'EXIT', {badarg, _}} = (catch element(false, id({a,b}))),
{'EXIT', {badarg, _}} = (catch element(id(1.5), id({a,b}))),
%% Make sure that the loader does not reject the module when
@@ -150,8 +151,30 @@ t_element(Config) when is_list(Config) ->
{'EXIT', {badarg, _}} = (catch element(1 bsl 32, id({a,b,c}))),
{'EXIT', {badarg, _}} = (catch element(1 bsl 64, id({a,b,c}))),
+ %% Test known tuple and unknown position.
+ true = is_tuple(Tuple),
+ {'EXIT', {badarg, _}} = catch element(id(false), Tuple),
+ {'EXIT', {badarg, _}} = catch element(id(-1), Tuple),
+ {'EXIT', {badarg, _}} = catch element(id(0), Tuple),
+ {'EXIT', {badarg, _}} = catch element(id(1 bsl 64), Tuple),
+
+ %% Test a known tuple and position that is a known integer.
+ {'EXIT', {badarg, _}} = catch element(known_integer(-1), Tuple),
+ {'EXIT', {badarg, _}} = catch element(known_integer(0), Tuple),
+ {'EXIT', {badarg, _}} = catch element(known_integer(1 bsl 64), Tuple),
+ {'EXIT', {badarg, _}} = catch element(known_integer(tuple_size(Tuple)+1), Tuple),
+
+ %% Test unknown tuple and unknown position.
+ {'EXIT', {badarg, _}} = catch element(id(false), id(Tuple)),
+ {'EXIT', {badarg, _}} = catch element(id(-1), id(Tuple)),
+ {'EXIT', {badarg, _}} = catch element(id(0), id(Tuple)),
+ {'EXIT', {badarg, _}} = catch element(id(1 bsl 64), id(Tuple)),
+
ok.
+known_integer(I) when is_integer(I) ->
+ I.
+
get_elements([Element|Rest], Tuple, Pos) ->
Element = element(Pos, Tuple),
get_elements(Rest, Tuple, Pos+1);
--
2.35.3