File 2103-jit-Optimize-relational-guard-tests.patch of Package erlang
From f8a9ccda6c4a69ac6cb029f82e473bf0ab14f533 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Sun, 1 May 2022 07:32:09 +0200
Subject: [PATCH 3/3] jit: Optimize relational guard tests
Optimize the generation of native code for common combinations of
relational operators in guards.
---
erts/emulator/beam/jit/arm/beam_asm.hpp | 21 +-
.../beam/jit/arm/beam_asm_global.hpp.pl | 2 +
erts/emulator/beam/jit/arm/instr_common.cpp | 329 +++++++++++++++++
erts/emulator/beam/jit/arm/ops.tab | 30 ++
erts/emulator/beam/jit/x86/beam_asm.hpp | 18 +
.../beam/jit/x86/beam_asm_global.hpp.pl | 2 +
erts/emulator/beam/jit/x86/instr_common.cpp | 336 ++++++++++++++++++
erts/emulator/beam/jit/x86/ops.tab | 30 ++
erts/emulator/test/op_SUITE.erl | 117 +++++-
9 files changed, 874 insertions(+), 11 deletions(-)
diff --git a/erts/emulator/beam/jit/arm/beam_asm.hpp b/erts/emulator/beam/jit/arm/beam_asm.hpp
index bf1fdc5c48..eb3baeefb2 100644
--- a/erts/emulator/beam/jit/arm/beam_asm.hpp
+++ b/erts/emulator/beam/jit/arm/beam_asm.hpp
@@ -739,6 +739,17 @@ protected:
}
}
+ void cmp(arm::Gp src, int64_t val) {
+ if (Support::isUInt12(val)) {
+ a.cmp(src, imm(val));
+ } else if (Support::isUInt12(-val)) {
+ a.cmn(src, imm(-val));
+ } else {
+ mov_imm(SUPER_TMP, val);
+ a.cmp(src, SUPER_TMP);
+ }
+ }
+
void ldur(arm::Gp reg, arm::Mem mem) {
safe_9bit_imm(a64::Inst::kIdLdur, reg, mem);
}
@@ -1589,14 +1600,8 @@ protected:
if (arg.isImmed() || arg.isWord()) {
Sint val = arg.isImmed() ? arg.as<ArgImmed>().get()
: arg.as<ArgWord>().get();
-
- if (Support::isUInt12(val)) {
- a.cmp(gp, imm(val));
- return;
- } else if (Support::isUInt12(-val)) {
- a.cmn(gp, imm(-val));
- return;
- }
+ cmp(gp, val);
+ return;
}
auto tmp = load_source(arg, SUPER_TMP);
diff --git a/erts/emulator/beam/jit/arm/beam_asm_global.hpp.pl b/erts/emulator/beam/jit/arm/beam_asm_global.hpp.pl
index f42d16e853..0099e6cc94 100644
--- a/erts/emulator/beam/jit/arm/beam_asm_global.hpp.pl
+++ b/erts/emulator/beam/jit/arm/beam_asm_global.hpp.pl
@@ -88,6 +88,8 @@ my @beam_global_funcs = qw(
int_div_rem_body_shared
int_div_rem_guard_shared
internal_hash_helper
+ is_in_range_shared
+ is_ge_lt_shared
minus_body_shared
new_map_shared
update_map_assoc_shared
diff --git a/erts/emulator/beam/jit/arm/instr_common.cpp b/erts/emulator/beam/jit/arm/instr_common.cpp
index 2a1d08a1a1..70097efdc4 100644
--- a/erts/emulator/beam/jit/arm/instr_common.cpp
+++ b/erts/emulator/beam/jit/arm/instr_common.cpp
@@ -1467,6 +1467,335 @@ void BeamModuleAssembler::emit_is_ge(const ArgLabel &Fail,
}
}
+/*
+ * ARG1 = Src
+ * ARG2 = Min
+ * ARG3 = Max
+ *
+ * Result is returned in the flags.
+ */
+void BeamGlobalAssembler::emit_is_in_range_shared() {
+ Label immediate = a.newLabel(), generic_compare = a.newLabel(),
+ float_done = a.newLabel(), done = a.newLabel();
+
+ /* Is the source a float? */
+ emit_is_boxed(immediate, ARG1);
+
+ arm::Gp boxed_ptr = emit_ptr_val(TMP1, ARG1);
+ a.ldur(TMP2, emit_boxed_val(boxed_ptr));
+
+ mov_imm(TMP3, HEADER_FLONUM);
+ a.cmp(TMP2, TMP3);
+ a.b_ne(generic_compare);
+
+ a.ldur(a64::d0, emit_boxed_val(boxed_ptr, sizeof(Eterm)));
+ a.asr(TMP1, ARG2, imm(_TAG_IMMED1_SIZE));
+ a.scvtf(a64::d1, TMP1);
+
+ a.fcmpe(a64::d0, a64::d1);
+ a.b_mi(float_done);
+
+ a.asr(TMP1, ARG3, imm(_TAG_IMMED1_SIZE));
+ a.scvtf(a64::d1, TMP1);
+ a.fcmpe(a64::d0, a64::d1);
+ a.b_gt(float_done);
+ a.tst(ZERO, ZERO);
+
+ a.bind(float_done);
+ a.ret(a64::x30);
+
+ a.bind(immediate);
+ {
+ /*
+ * Src is an immediate (such as ATOM) but not SMALL.
+ * That means that Src must be greater than the upper
+ * limit.
+ */
+ mov_imm(TMP1, 1);
+ a.cmp(TMP1, imm(0));
+ a.ret(a64::x30);
+ }
+
+ a.bind(generic_compare);
+ {
+ emit_enter_runtime_frame();
+ emit_enter_runtime();
+
+ a.stp(ARG1, ARG3, TMP_MEM1q);
+
+ comment("erts_cmp_compound(X, Y, 0, 0);");
+ mov_imm(ARG3, 0);
+ mov_imm(ARG4, 0);
+ runtime_call<4>(erts_cmp_compound);
+ a.tst(ARG1, ARG1);
+ a.b_mi(done);
+
+ a.ldp(ARG1, ARG2, TMP_MEM1q);
+
+ comment("erts_cmp_compound(X, Y, 0, 0);");
+ mov_imm(ARG3, 0);
+ mov_imm(ARG4, 0);
+ runtime_call<4>(erts_cmp_compound);
+ a.tst(ARG1, ARG1);
+
+ a.bind(done);
+ emit_leave_runtime();
+ emit_leave_runtime_frame();
+
+ a.ret(a64::x30);
+ }
+}
+
+/*
+ * 1121 occurrences in OTP at the time of writing.
+ */
+void BeamModuleAssembler::emit_is_in_range(ArgLabel const &Small,
+ ArgLabel const &Large,
+ ArgRegister const &Src,
+ ArgConstant const &Min,
+ ArgConstant const &Max) {
+ Label next = a.newLabel(), generic = a.newLabel();
+ bool need_generic = true;
+ auto src = load_source(Src, ARG1);
+
+ if (always_small(Src)) {
+ need_generic = false;
+ comment("skipped test for small operand since it always small");
+ } else if (always_one_of(Src, BEAM_TYPE_INTEGER | BEAM_TYPE_MASK_BOXED)) {
+ /* The only possible kind of immediate is a small and all
+ * other values are boxed, so we can test for smalls by
+ * testing boxed. */
+ comment("simplified small test since all other types are boxed");
+ ERTS_CT_ASSERT(_TAG_PRIMARY_MASK - TAG_PRIMARY_BOXED == (1 << 0));
+ if (Small == Large &&
+ always_one_of(Src, BEAM_TYPE_MASK_BOXED - BEAM_TYPE_FLOAT)) {
+ /* Src is never a float and the failure labels are
+ * equal. Therefore, since a bignum will never be within
+ * the range, we can fail immediately if Src is not a
+ * small. */
+ need_generic = false;
+ a.tbz(src.reg, imm(0), resolve_beam_label(Small, disp32K));
+ } else {
+ /* Src can be a float or the failures labels are distinct.
+ * We need to call the generic routine if Src is not a small. */
+ a.tbz(src.reg, imm(0), generic);
+ }
+ } else if (Small == Large) {
+ /* We can save one instruction if we incorporate the test for
+ * small into the range check. */
+ ERTS_CT_ASSERT(_TAG_IMMED1_SMALL == _TAG_IMMED1_MASK);
+ comment("simplified small & range tests since failure labels are "
+ "equal");
+ sub(TMP1, src.reg, Min.as<ArgImmed>().get());
+
+ /* Since we have subtracted the (tagged) lower bound, the
+ * tag bits of the difference is 0 if and only if Src is
+ * a small. Testing for a tag of 0 can be done in two
+ * instructions. */
+ a.tst(TMP1, imm(_TAG_IMMED1_MASK));
+ a.b_ne(generic);
+
+ /* Now do the range check. */
+ cmp(TMP1, Max.as<ArgImmed>().get() - Min.as<ArgImmed>().get());
+ a.b_hi(resolve_beam_label(Small, disp1MB));
+
+ /* Bypass the test code. */
+ goto test_done;
+ } else {
+ /* We have no applicable type information and the failure
+ * labels are distinct. Emit the standard test for small
+ * and call the generic routine if Src is not a small. */
+ a.and_(TMP1, src.reg, imm(_TAG_IMMED1_MASK));
+ a.cmp(TMP1, imm(_TAG_IMMED1_SMALL));
+ a.b_ne(generic);
+ }
+
+ /* We have now established that the operand is small. */
+ if (Small == Large) {
+ comment("simplified range test since failure labels are equal");
+ sub(TMP1, src.reg, Min.as<ArgImmed>().get());
+ cmp(TMP1, Max.as<ArgImmed>().get() - Min.as<ArgImmed>().get());
+ a.b_hi(resolve_beam_label(Small, disp1MB));
+ } else {
+ cmp(src.reg, Min.as<ArgImmed>().get());
+ a.b_lt(resolve_beam_label(Small, disp1MB));
+ cmp(src.reg, Max.as<ArgImmed>().get());
+ a.b_gt(resolve_beam_label(Large, disp1MB));
+ }
+
+test_done:
+ if (need_generic) {
+ a.b(next);
+ }
+
+ a.bind(generic);
+ if (!need_generic) {
+ comment("skipped generic comparison because it is not needed");
+ } else {
+ mov_var(ARG1, src);
+ mov_arg(ARG2, Min);
+ mov_arg(ARG3, Max);
+ fragment_call(ga->get_is_in_range_shared());
+ if (Small == Large) {
+ a.b_ne(resolve_beam_label(Small, disp1MB));
+ } else {
+ a.b_lt(resolve_beam_label(Small, disp1MB));
+ a.b_gt(resolve_beam_label(Large, disp1MB));
+ }
+ }
+
+ a.bind(next);
+}
+
+/*
+ * ARG1 = Src
+ * ARG2 = A
+ * ARG3 = B
+ *
+ * Result is returned in the flags.
+ */
+void BeamGlobalAssembler::emit_is_ge_lt_shared() {
+ Label done = a.newLabel();
+
+ emit_enter_runtime_frame();
+ emit_enter_runtime();
+
+ a.stp(ARG1, ARG3, TMP_MEM1q);
+
+ comment("erts_cmp_compound(Src, A, 0, 0);");
+ mov_imm(ARG3, 0);
+ mov_imm(ARG4, 0);
+ runtime_call<4>(erts_cmp_compound);
+ a.tst(ARG1, ARG1);
+ a.b_mi(done);
+
+ comment("erts_cmp_compound(B, Src, 0, 0);");
+ a.ldp(ARG2, ARG1, TMP_MEM1q);
+ mov_imm(ARG3, 0);
+ mov_imm(ARG4, 0);
+ runtime_call<4>(erts_cmp_compound);
+ a.cmp(ARG1, imm(0));
+
+ /* Make sure that ARG1 is -1, 0, or 1. */
+ a.cset(ARG1, imm(arm::CondCode::kNE));
+ a.csinv(ARG1, ARG1, ZERO, imm(arm::CondCode::kGE));
+
+ /* Prepare return value and flags. */
+ a.adds(ARG1, ARG1, imm(1));
+
+ /* We now have:
+ * ARG1 == 0 if B < SRC
+ * ARG1 > 0 if B => SRC
+ * and flags set accordingly. */
+
+ a.bind(done);
+ emit_leave_runtime();
+ emit_leave_runtime_frame();
+
+ a.ret(a64::x30);
+}
+
+/*
+ * The instruction sequence:
+ *
+ * is_ge Fail1 Src A
+ * is_lt Fail1 B Src
+ *
+ * is common (1841 occurrences in OTP at the time of writing).
+ *
+ * is_ge + is_lt is 18 instructions, while is_ge_lt is
+ * 14 instructions.
+ */
+void BeamModuleAssembler::emit_is_ge_lt(ArgLabel const &Fail1,
+ ArgLabel const &Fail2,
+ ArgRegister const &Src,
+ ArgConstant const &A,
+ ArgConstant const &B) {
+ Label generic = a.newLabel(), next = a.newLabel();
+ auto src = load_source(Src, ARG1);
+
+ mov_arg(ARG2, A);
+ mov_arg(ARG3, B);
+
+ a.and_(TMP1, src.reg, imm(_TAG_IMMED1_MASK));
+ a.cmp(TMP1, imm(_TAG_IMMED1_SMALL));
+ a.b_ne(generic);
+
+ a.cmp(src.reg, ARG2);
+ a.b_lt(resolve_beam_label(Fail1, disp1MB));
+ a.cmp(ARG3, src.reg);
+ a.b_ge(resolve_beam_label(Fail2, disp1MB));
+ a.b(next);
+
+ a.bind(generic);
+ mov_var(ARG1, src);
+ fragment_call(ga->get_is_ge_lt_shared());
+ a.b_lt(resolve_beam_label(Fail1, disp1MB));
+ a.b_gt(resolve_beam_label(Fail2, disp1MB));
+
+ a.bind(next);
+}
+
+/*
+ * 60 occurrences in OTP at the time of writing. Seems to be common in
+ * Elixir code.
+ *
+ * Currently not very frequent in OTP but very nice reduction in code
+ * size when it happens. We expect this combination of instructions
+ * to become more common in the future.
+ */
+void BeamModuleAssembler::emit_is_int_in_range(ArgLabel const &Fail,
+ ArgRegister const &Src,
+ ArgConstant const &Min,
+ ArgConstant const &Max) {
+ auto src = load_source(Src, ARG1);
+
+ sub(TMP1, src.reg, Min.as<ArgImmed>().get());
+
+ /* Since we have subtracted the (tagged) lower bound, the
+ * tag bits of the difference is 0 if and only if Src is
+ * a small. */
+ ERTS_CT_ASSERT(_TAG_IMMED1_SMALL == _TAG_IMMED1_MASK);
+ a.tst(TMP1, imm(_TAG_IMMED1_MASK));
+ a.b_ne(resolve_beam_label(Fail, disp1MB));
+ cmp(TMP1, Max.as<ArgImmed>().get() - Min.as<ArgImmed>().get());
+ a.b_hi(resolve_beam_label(Fail, disp1MB));
+}
+
+/*
+ * 428 occurrencs in OTP at the time of writing.
+ */
+void BeamModuleAssembler::emit_is_int_ge(ArgLabel const &Fail,
+ ArgRegister const &Src,
+ ArgConstant const &Min) {
+ auto src = load_source(Src, ARG1);
+ Label small = a.newLabel(), next = a.newLabel();
+
+ if (always_one_of(Src, BEAM_TYPE_INTEGER | BEAM_TYPE_MASK_ALWAYS_BOXED)) {
+ comment("simplified small test since all other types are boxed");
+ emit_is_boxed(small, Src, src.reg);
+ } else {
+ a.and_(TMP2, src.reg, imm(_TAG_IMMED1_MASK));
+ a.cmp(TMP2, imm(_TAG_IMMED1_SMALL));
+ a.b_eq(small);
+
+ emit_is_boxed(resolve_beam_label(Fail, dispUnknown), Src, TMP2);
+ }
+
+ arm::Gp boxed_ptr = emit_ptr_val(TMP1, src.reg);
+ a.ldur(TMP1, emit_boxed_val(boxed_ptr));
+ a.and_(TMP1, TMP1, imm(_TAG_HEADER_MASK));
+ a.cmp(TMP1, imm(_TAG_HEADER_POS_BIG));
+ a.b_ne(resolve_beam_label(Fail, disp1MB));
+ a.b(next);
+
+ a.bind(small);
+ cmp(src.reg, Min.as<ArgImmed>().get());
+ a.b_lt(resolve_beam_label(Fail, disp1MB));
+
+ a.bind(next);
+}
+
void BeamModuleAssembler::emit_badmatch(const ArgSource &Src) {
mov_arg(arm::Mem(c_p, offsetof(Process, fvalue)), Src);
emit_error(BADMATCH);
diff --git a/erts/emulator/beam/jit/arm/ops.tab b/erts/emulator/beam/jit/arm/ops.tab
index 862d46ebc0..9676577079 100644
--- a/erts/emulator/beam/jit/arm/ops.tab
+++ b/erts/emulator/beam/jit/arm/ops.tab
@@ -455,6 +455,36 @@ is_eq_exact f s s
is_ne_exact f s s
+is_integer NotInt N0 | is_ge Small N1=xy Min=i | is_ge Large Max=i N2=xy |
+ equal(N0, N1) | equal(N1, N2) |
+ equal(NotInt, Small) | equal(Small, Large) =>
+ is_int_in_range NotInt N0 Min Max
+
+is_integer NotInt N0 | is_ge Large Max=i N2=xy | is_ge Small N1=xy Min=i |
+ equal(N0, N1) | equal(N1, N2) |
+ equal(NotInt, Small) | equal(Small, Large) =>
+ is_int_in_range NotInt N0 Min Max
+
+is_integer NotInt N0 | is_ge Fail N1=xy Min=i |
+ equal(N0, N1) | equal(NotInt, Fail) =>
+ is_int_ge NotInt N0 Min
+
+is_int_in_range f S c c
+is_int_ge f S c
+
+is_ge Small N1=xy Min=i | is_ge Large Max=i N2=xy | equal(N1, N2) =>
+ is_in_range Small Large N1 Min Max
+
+is_ge Large Max=i N2=xy | is_ge Small N1=xy Min=i | equal(N1, N2) =>
+ is_in_range Small Large N2 Min Max
+
+is_in_range f f S c c
+
+is_ge Small N1=xy A=i | is_lt Large B=i N2=xy | equal(N1, N2) =>
+ is_ge_lt Small Large N1 A B
+
+is_ge_lt f f S c c
+
is_lt f s s
is_ge f s s
diff --git a/erts/emulator/beam/jit/x86/beam_asm.hpp b/erts/emulator/beam/jit/x86/beam_asm.hpp
index cd353714ef..da037aa235 100644
--- a/erts/emulator/beam/jit/x86/beam_asm.hpp
+++ b/erts/emulator/beam/jit/x86/beam_asm.hpp
@@ -1377,6 +1377,24 @@ protected:
}
}
+ void cmp(x86::Gp gp, int64_t val, const x86::Gp &spill) {
+ if (Support::isInt32(val)) {
+ a.cmp(gp, imm(val));
+ } else {
+ mov_imm(spill, val);
+ a.cmp(gp, spill);
+ }
+ }
+
+ void sub(x86::Gp gp, int64_t val, const x86::Gp &spill) {
+ if (Support::isInt32(val)) {
+ a.sub(gp, imm(val));
+ } else {
+ mov_imm(spill, val);
+ a.sub(gp, spill);
+ }
+ }
+
/* Note: May clear flags. */
void mov_arg(x86::Gp to, const ArgVal &from, const x86::Gp &spill) {
if (from.isBytePtr()) {
diff --git a/erts/emulator/beam/jit/x86/beam_asm_global.hpp.pl b/erts/emulator/beam/jit/x86/beam_asm_global.hpp.pl
index 9ea270c4f4..917a2815e5 100755
--- a/erts/emulator/beam/jit/x86/beam_asm_global.hpp.pl
+++ b/erts/emulator/beam/jit/x86/beam_asm_global.hpp.pl
@@ -79,6 +79,8 @@ my @beam_global_funcs = qw(
increment_body_shared
int_div_rem_body_shared
int_div_rem_guard_shared
+ is_in_range_shared
+ is_ge_lt_shared
internal_hash_helper
minus_body_shared
minus_guard_shared
diff --git a/erts/emulator/beam/jit/x86/instr_common.cpp b/erts/emulator/beam/jit/x86/instr_common.cpp
index a5cd54fb8f..bf26f5284f 100644
--- a/erts/emulator/beam/jit/x86/instr_common.cpp
+++ b/erts/emulator/beam/jit/x86/instr_common.cpp
@@ -1738,6 +1738,342 @@ void BeamModuleAssembler::emit_bif_is_ne_exact(const ArgRegister &LHS,
emit_bif_is_eq_ne_exact(LHS, RHS, Dst, am_true, am_false);
}
+/*
+ * ARG1 = Src
+ * ARG2 = Min
+ * ARG3 = Max
+ *
+ * Result is returned in the flags.
+ */
+void BeamGlobalAssembler::emit_is_in_range_shared() {
+ Label immediate = a.newLabel();
+ Label generic_compare = a.newLabel();
+ Label done = a.newLabel();
+
+ /* Is the source a float? */
+ emit_is_boxed(immediate, ARG1);
+
+ x86::Gp boxed_ptr = emit_ptr_val(ARG4, ARG1);
+ a.cmp(emit_boxed_val(boxed_ptr), imm(HEADER_FLONUM));
+ a.short_().jne(generic_compare);
+
+ /* Compare the float to the limits. */
+ a.movsd(x86::xmm0, emit_boxed_val(boxed_ptr, sizeof(Eterm)));
+ a.sar(ARG2, imm(_TAG_IMMED1_SIZE));
+ a.sar(ARG3, imm(_TAG_IMMED1_SIZE));
+ a.cvtsi2sd(x86::xmm1, ARG2);
+ a.cvtsi2sd(x86::xmm2, ARG3);
+ a.xor_(x86::ecx, x86::ecx);
+ a.ucomisd(x86::xmm0, x86::xmm2);
+ a.seta(x86::cl);
+ mov_imm(RET, -1);
+ a.ucomisd(x86::xmm1, x86::xmm0);
+ a.cmovbe(RET, x86::rcx);
+
+ a.cmp(RET, imm(0));
+
+ a.ret();
+
+ a.bind(immediate);
+ {
+ /*
+ * Src is an immediate (such as ATOM) but not SMALL.
+ * That means that Src must be greater than the upper
+ * limit.
+ */
+ mov_imm(RET, 1);
+ a.cmp(RET, imm(0));
+ a.ret();
+ }
+
+ a.bind(generic_compare);
+ {
+ emit_enter_runtime();
+
+ a.mov(TMP_MEM1q, ARG1);
+ a.mov(TMP_MEM2q, ARG3);
+
+ comment("erts_cmp_compound(X, Y, 0, 0);");
+ mov_imm(ARG3, 0);
+ mov_imm(ARG4, 0);
+ runtime_call<4>(erts_cmp_compound);
+ a.test(RET, RET);
+ a.js(done);
+
+ a.mov(ARG1, TMP_MEM1q);
+ a.mov(ARG2, TMP_MEM2q);
+
+ comment("erts_cmp_compound(X, Y, 0, 0);");
+ mov_imm(ARG3, 0);
+ mov_imm(ARG4, 0);
+ runtime_call<4>(erts_cmp_compound);
+ a.test(RET, RET);
+
+ a.bind(done);
+ emit_leave_runtime();
+
+ a.ret();
+ }
+}
+
+void BeamModuleAssembler::emit_is_in_range(ArgLabel const &Small,
+ ArgLabel const &Large,
+ ArgRegister const &Src,
+ ArgConstant const &Min,
+ ArgConstant const &Max) {
+ Label next = a.newLabel(), generic = a.newLabel();
+ bool need_generic = true;
+
+ mov_arg(ARG1, Src);
+
+ if (always_small(Src)) {
+ need_generic = false;
+ comment("skipped test for small operand since it always small");
+ } else if (always_one_of(Src, BEAM_TYPE_INTEGER | BEAM_TYPE_MASK_BOXED)) {
+ /* The only possible kind of immediate is a small and all
+ * other values are boxed, so we can test for smalls by
+ * testing boxed. */
+ comment("simplified small test since all other types are boxed");
+ ERTS_CT_ASSERT(_TAG_PRIMARY_MASK - TAG_PRIMARY_BOXED == (1 << 0));
+ if (Small == Large &&
+ always_one_of(Src, BEAM_TYPE_MASK_BOXED - BEAM_TYPE_FLOAT)) {
+ /* Src is never a float and the failure labels are
+ * equal. Therefore, since a bignum will never be within
+ * the range, we can fail immediately if Src is not a
+ * small. */
+ need_generic = false;
+ a.test(ARG1.r8(), imm(_TAG_PRIMARY_MASK - TAG_PRIMARY_BOXED));
+ a.je(resolve_beam_label(Small));
+ } else {
+ /* Src can be a float or the failures labels are distinct.
+ * We need to call the generic routine if Src is not a small. */
+ a.test(ARG1.r8(), imm(_TAG_PRIMARY_MASK - TAG_PRIMARY_BOXED));
+ a.short_().je(generic);
+ }
+ } else if (Small == Large) {
+ /* We can save one instruction if we incorporate the test for
+ * small into the range check. */
+ ERTS_CT_ASSERT(_TAG_IMMED1_SMALL == _TAG_IMMED1_MASK);
+ comment("simplified small & range tests since failure labels are "
+ "equal");
+ a.mov(RET, ARG1);
+ sub(RET, Min.as<ArgImmed>().get(), ARG4);
+
+ /* Since we have subtracted the (tagged) lower bound, the
+ * tag bits of the difference is 0 if and only if Src is
+ * a small. Testing for a tag of 0 can be done in two
+ * instructions. */
+ a.test(RETb, imm(_TAG_IMMED1_MASK));
+ a.jne(generic);
+
+ /* Now do the range check. */
+ cmp(RET, Max.as<ArgImmed>().get() - Min.as<ArgImmed>().get(), ARG4);
+ a.ja(resolve_beam_label(Small));
+
+ /* Bypass the test code. */
+ goto test_done;
+ } else {
+ /* We have no applicable type information and the failure
+ * labels are distinct. Emit the standard test for small
+ * and call the generic routine if Src is not a small. */
+ a.mov(RETd, ARG1d);
+ a.and_(RETb, imm(_TAG_IMMED1_MASK));
+ a.cmp(RETb, imm(_TAG_IMMED1_SMALL));
+ a.short_().jne(generic);
+ }
+
+ /* We have now established that the operand is small. */
+ if (Small == Large) {
+ comment("simplified range test since failure labels are equal");
+ sub(ARG1, Min.as<ArgImmed>().get(), RET);
+ cmp(ARG1, Max.as<ArgImmed>().get() - Min.as<ArgImmed>().get(), RET);
+ a.ja(resolve_beam_label(Small));
+ } else {
+ cmp(ARG1, Min.as<ArgImmed>().get(), RET);
+ a.jl(resolve_beam_label(Small));
+ cmp(ARG1, Max.as<ArgImmed>().get(), RET);
+ a.jg(resolve_beam_label(Large));
+ }
+
+test_done:
+ if (need_generic) {
+ a.short_().jmp(next);
+ }
+
+ a.bind(generic);
+ if (!need_generic) {
+ comment("skipped generic comparison because it is not needed");
+ } else {
+ mov_arg(ARG2, Min);
+ mov_arg(ARG3, Max);
+ safe_fragment_call(ga->get_is_in_range_shared());
+ if (Small == Large) {
+ a.jne(resolve_beam_label(Small));
+ } else {
+ a.jl(resolve_beam_label(Small));
+ a.jg(resolve_beam_label(Large));
+ }
+ }
+
+ a.bind(next);
+}
+
+/*
+ * ARG1 = Src
+ * ARG2 = A
+ * ARG3 = B
+ *
+ * Result is returned in the flags.
+ */
+void BeamGlobalAssembler::emit_is_ge_lt_shared() {
+ Label done = a.newLabel();
+
+ emit_enter_runtime();
+
+ a.mov(TMP_MEM1q, ARG1);
+ a.mov(TMP_MEM2q, ARG3);
+
+ comment("erts_cmp_compound(Src, A, 0, 0);");
+ mov_imm(ARG3, 0);
+ mov_imm(ARG4, 0);
+ runtime_call<4>(erts_cmp_compound);
+ a.test(RET, RET);
+ a.short_().js(done);
+
+ comment("erts_cmp_compound(B, Src, 0, 0);");
+ a.mov(ARG1, TMP_MEM2q);
+ a.mov(ARG2, TMP_MEM1q);
+ mov_imm(ARG3, 0);
+ mov_imm(ARG4, 0);
+ runtime_call<4>(erts_cmp_compound);
+
+ /* The following instructions implements the signum function. */
+ mov_imm(ARG1, -1);
+ mov_imm(ARG4, 1);
+ a.test(RET, RET);
+ a.cmovs(RET, ARG1);
+ a.cmovg(RET, ARG4);
+
+ /* RET is now -1, 0, or 1. */
+ a.add(RET, imm(1));
+
+ /* We now have:
+ * RET == 0 if B < SRC
+ * RET > 0 if B => SRC
+ * and flags set accordingly. */
+
+ a.bind(done);
+ emit_leave_runtime();
+
+ a.ret();
+}
+
+/*
+ * is_ge + is_lt is 20 instructions.
+ *
+ * is_ge_lt is 15 instructions.
+ */
+void BeamModuleAssembler::emit_is_ge_lt(ArgLabel const &Fail1,
+ ArgLabel const &Fail2,
+ ArgRegister const &Src,
+ ArgConstant const &A,
+ ArgConstant const &B) {
+ Label generic = a.newLabel(), next = a.newLabel();
+
+ mov_arg(ARG2, A);
+ mov_arg(ARG3, B);
+ mov_arg(ARG1, Src);
+
+ a.mov(RETd, ARG1d);
+ a.and_(RETb, imm(_TAG_IMMED1_MASK));
+ a.cmp(RETb, imm(_TAG_IMMED1_SMALL));
+ a.short_().jne(generic);
+
+ a.cmp(ARG1, ARG2);
+ a.jl(resolve_beam_label(Fail1));
+ a.cmp(ARG3, ARG1);
+ a.jge(resolve_beam_label(Fail2));
+ a.short_().jmp(next);
+
+ a.bind(generic);
+ safe_fragment_call(ga->get_is_ge_lt_shared());
+ a.jl(resolve_beam_label(Fail1));
+ a.jg(resolve_beam_label(Fail2));
+
+ a.bind(next);
+}
+
+/*
+ * Combine is_integer with range check.
+ *
+ * is_integer + is_ge + is_ge is 31 instructions.
+ *
+ * is_int_in_range is 6 instructions.
+ */
+void BeamModuleAssembler::emit_is_int_in_range(ArgLabel const &Fail,
+ ArgRegister const &Src,
+ ArgConstant const &Min,
+ ArgConstant const &Max) {
+ mov_arg(RET, Src);
+
+ sub(RET, Min.as<ArgImmed>().get(), ARG1);
+
+ /* Since we have subtracted the (tagged) lower bound, the
+ * tag bits of the difference is 0 if and only if Src is
+ * a small. */
+ ERTS_CT_ASSERT(_TAG_IMMED1_SMALL == _TAG_IMMED1_MASK);
+ a.test(RETb, imm(_TAG_IMMED1_MASK));
+ a.jne(resolve_beam_label(Fail));
+ cmp(RET, Max.as<ArgImmed>().get() - Min.as<ArgImmed>().get(), ARG1);
+ a.ja(resolve_beam_label(Fail));
+}
+
+/*
+ * is_integer + is_ge is 21 instructions.
+ *
+ * is_int_ge is 14 instructions.
+ */
+void BeamModuleAssembler::emit_is_int_ge(ArgLabel const &Fail,
+ ArgRegister const &Src,
+ ArgConstant const &Min) {
+ Label small = a.newLabel();
+ Label fail = a.newLabel();
+ Label next = a.newLabel();
+ /* On Unix, using rcx instead of ARG1 makes the `test` instruction
+ * in the boxed test one byte shorter. */
+ const x86::Gp src_reg = x86::rcx;
+
+ mov_arg(src_reg, Src);
+
+ if (always_one_of(Src, BEAM_TYPE_INTEGER | BEAM_TYPE_MASK_ALWAYS_BOXED)) {
+ comment("simplified small test since all other types are boxed");
+ emit_is_boxed(small, Src, src_reg);
+ } else {
+ a.mov(RETd, src_reg.r32());
+ a.and_(RETb, imm(_TAG_IMMED1_MASK));
+ a.cmp(RETb, imm(_TAG_IMMED1_SMALL));
+ a.short_().je(small);
+
+ emit_is_boxed(resolve_beam_label(Fail), Src, src_reg);
+ }
+
+ /* Src is boxed. Jump to failure unless Src is a positive bignum. */
+ x86::Gp boxed_ptr = emit_ptr_val(src_reg, src_reg);
+ a.mov(RETd, emit_boxed_val(boxed_ptr, 0, sizeof(Uint32)));
+ a.and_(RETb, imm(_TAG_HEADER_MASK));
+ a.cmp(RETb, imm(_TAG_HEADER_POS_BIG));
+ a.short_().je(next);
+
+ a.bind(fail);
+ a.jmp(resolve_beam_label(Fail));
+
+ a.bind(small);
+ cmp(src_reg, Min.as<ArgImmed>().get(), RET);
+ a.short_().jl(fail);
+
+ a.bind(next);
+}
+
void BeamModuleAssembler::emit_badmatch(const ArgSource &Src) {
mov_arg(x86::qword_ptr(c_p, offsetof(Process, fvalue)), Src);
emit_error(BADMATCH);
diff --git a/erts/emulator/beam/jit/x86/ops.tab b/erts/emulator/beam/jit/x86/ops.tab
index a0fa7124cd..0b7455bccd 100644
--- a/erts/emulator/beam/jit/x86/ops.tab
+++ b/erts/emulator/beam/jit/x86/ops.tab
@@ -443,6 +443,36 @@ is_eq_exact f s s
is_ne_exact f s s
+is_integer NotInt N0 | is_ge Small N1=xy Min=i | is_ge Large Max=i N2=xy |
+ equal(N0, N1) | equal(N1, N2) |
+ equal(NotInt, Small) | equal(Small, Large) =>
+ is_int_in_range NotInt N0 Min Max
+
+is_integer NotInt N0 | is_ge Large Max=i N2=xy | is_ge Small N1=xy Min=i |
+ equal(N0, N1) | equal(N1, N2) |
+ equal(NotInt, Small) | equal(Small, Large) =>
+ is_int_in_range NotInt N0 Min Max
+
+is_integer NotInt N0 | is_ge Fail N1=xy Min=i |
+ equal(N0, N1) | equal(NotInt, Fail) =>
+ is_int_ge NotInt N0 Min
+
+is_int_in_range f S c c
+is_int_ge f S c
+
+is_ge Small N1=xy Min=i | is_ge Large Max=i N2=xy | equal(N1, N2) =>
+ is_in_range Small Large N1 Min Max
+
+is_ge Large Max=i N2=xy | is_ge Small N1=xy Min=i | equal(N1, N2) =>
+ is_in_range Small Large N2 Min Max
+
+is_in_range f f S c c
+
+is_ge Small N1=xy A=i | is_lt Large B=i N2=xy | equal(N1, N2) =>
+ is_ge_lt Small Large N1 A B
+
+is_ge_lt f f S c c
+
is_lt f s s
is_ge f s s
diff --git a/erts/emulator/test/op_SUITE.erl b/erts/emulator/test/op_SUITE.erl
index 1f1f34d35f..df81b22e08 100644
--- a/erts/emulator/test/op_SUITE.erl
+++ b/erts/emulator/test/op_SUITE.erl
@@ -25,7 +25,7 @@
-export([all/0, suite/0,
bsl_bsr/1,logical/1,t_not/1,relop_simple/1,relop/1,
complex_relop/1,unsafe_fusing/1,
- range_tests/1,typed_relop/1]).
+ range_tests/1,combined_relops/1,typed_relop/1]).
-import(lists, [foldl/3,flatmap/2]).
@@ -35,7 +35,8 @@ suite() ->
all() ->
[bsl_bsr, logical, t_not, relop_simple, relop,
- complex_relop, unsafe_fusing, range_tests, typed_relop].
+ complex_relop, unsafe_fusing, range_tests,
+ combined_relops, typed_relop].
%% Test the bsl and bsr operators.
bsl_bsr(Config) when is_list(Config) ->
@@ -523,7 +524,7 @@ range_tests(_Config) ->
inside = range_big(MinSmall),
inside = range_big(-1 bsl 58),
inside = range_big(0),
- inside = range_barely_small(17.75),
+ inside = range_big(17.75),
inside = range_big(1 bsl 58),
inside = range_big(MaxSmall),
@@ -531,6 +532,39 @@ range_tests(_Config) ->
greater = range_big(1 bsl 64),
greater = range_big(float(1 bsl 64)),
+ inside = int_range_1(id(-100_000)),
+ inside = int_range_1(id(-10)),
+ inside = int_range_1(id(100)),
+ inside = int_range_1(id(100_000)),
+
+ outside = int_range_1(id(atom)),
+ outside = int_range_1(id(-1 bsl 60)),
+ outside = int_range_1(id(-100_001)),
+ outside = int_range_1(id(100_001)),
+ outside = int_range_1(id(1 bsl 60)),
+
+ inside = int_range_2(id(1)),
+ inside = int_range_2(id(42)),
+ inside = int_range_2(id(16#f000_0000)),
+
+ outside = int_range_2(id([a,list])),
+ outside = int_range_2(id(0)),
+ outside = int_range_1(id(-1 bsl 60)),
+ outside = int_range_1(id(1 bsl 60)),
+
+ inside = int_range_3(id(1 bsl 28)),
+ inside = int_range_3(id((1 bsl 28) + 1)),
+ inside = int_range_3(id((1 bsl 33) + 555)),
+ inside = int_range_3(id((1 bsl 58) - 1)),
+ inside = int_range_3(id(1 bsl 58)),
+
+ outside = int_range_3(id({a,tuple})),
+ outside = int_range_3(id(-1 bsl 60)),
+ outside = int_range_3(id(-1000)),
+ outside = int_range_3(id(100)),
+ outside = int_range_3(id((1 bsl 58) + 1)),
+ outside = int_range_3(id(1 bsl 60)),
+
ok.
range(X) ->
@@ -683,6 +717,83 @@ compare_integer_pid(N) when is_integer(N) ->
N < Immed -> ok
end.
+int_range_1(X) when is_integer(X), -100_000 =< X, X =< 100_000 ->
+ inside;
+int_range_1(_) ->
+ outside.
+
+int_range_2(X) when is_integer(X), 1 =< X, X =< 16#f000_0000 ->
+ inside;
+int_range_2(_) ->
+ outside.
+
+int_range_3(X) when is_integer(X), 1 bsl 28 =< X, X =< 1 bsl 58 ->
+ inside;
+int_range_3(_) ->
+ outside.
+
+combined_relops(_Config) ->
+ other = test_tok_char(-1 bsl 64),
+ other = test_tok_char($A - 1),
+
+ var = test_tok_char($A),
+ var = test_tok_char($B),
+ var = test_tok_char($P),
+ var = test_tok_char($Y),
+ var = test_tok_char($Z),
+
+ other = test_tok_char($Z + 1),
+
+ var = tok_char($_),
+ other = tok_char(float($_)),
+
+ other = test_tok_char(1 bsl 64),
+
+ other = test_tok_char(atom),
+ other = test_tok_char(self()),
+
+ ok.
+
+test_tok_char(C) ->
+ Result = tok_char(C),
+ if
+ is_integer(C) ->
+ Result = tok_char(float(C)),
+ Result = tok_char_int(C),
+ if
+ C band 16#FFFF =:= C ->
+ Result = tok_char_int_range(C);
+ true ->
+ Result
+ end;
+ true ->
+ Result
+ end.
+
+%% is_ge + is_lt
+tok_char(C) when $A =< C, C =< $Z ->
+ var;
+tok_char($_) ->
+ var;
+tok_char(_) ->
+ other.
+
+%% is_ge + is_ge
+tok_char_int(C) when $A =< C, C =< $Z ->
+ var;
+tok_char_int($_) ->
+ var;
+tok_char_int(_) ->
+ other.
+
+%% is_ge + is_ge
+tok_char_int_range(C) when $A =< C, C =< $Z ->
+ var;
+tok_char_int_range($_) ->
+ var;
+tok_char_int_range(_) ->
+ other.
+
%%%
%%% Utilities.
%%%
--
2.35.3