File 2103-jit-Optimize-relational-guard-tests.patch of Package erlang

From f8a9ccda6c4a69ac6cb029f82e473bf0ab14f533 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Sun, 1 May 2022 07:32:09 +0200
Subject: [PATCH 3/3] jit: Optimize relational guard tests

Optimize the generation of native code for common combinations of
relational operators in guards.
---
 erts/emulator/beam/jit/arm/beam_asm.hpp       |  21 +-
 .../beam/jit/arm/beam_asm_global.hpp.pl       |   2 +
 erts/emulator/beam/jit/arm/instr_common.cpp   | 329 +++++++++++++++++
 erts/emulator/beam/jit/arm/ops.tab            |  30 ++
 erts/emulator/beam/jit/x86/beam_asm.hpp       |  18 +
 .../beam/jit/x86/beam_asm_global.hpp.pl       |   2 +
 erts/emulator/beam/jit/x86/instr_common.cpp   | 336 ++++++++++++++++++
 erts/emulator/beam/jit/x86/ops.tab            |  30 ++
 erts/emulator/test/op_SUITE.erl               | 117 +++++-
 9 files changed, 874 insertions(+), 11 deletions(-)

diff --git a/erts/emulator/beam/jit/arm/beam_asm.hpp b/erts/emulator/beam/jit/arm/beam_asm.hpp
index bf1fdc5c48..eb3baeefb2 100644
--- a/erts/emulator/beam/jit/arm/beam_asm.hpp
+++ b/erts/emulator/beam/jit/arm/beam_asm.hpp
@@ -739,6 +739,17 @@ protected:
         }
     }
 
+    void cmp(arm::Gp src, int64_t val) {
+        if (Support::isUInt12(val)) {
+            a.cmp(src, imm(val));
+        } else if (Support::isUInt12(-val)) {
+            a.cmn(src, imm(-val));
+        } else {
+            mov_imm(SUPER_TMP, val);
+            a.cmp(src, SUPER_TMP);
+        }
+    }
+
     void ldur(arm::Gp reg, arm::Mem mem) {
         safe_9bit_imm(a64::Inst::kIdLdur, reg, mem);
     }
@@ -1589,14 +1600,8 @@ protected:
         if (arg.isImmed() || arg.isWord()) {
             Sint val = arg.isImmed() ? arg.as<ArgImmed>().get()
                                      : arg.as<ArgWord>().get();
-
-            if (Support::isUInt12(val)) {
-                a.cmp(gp, imm(val));
-                return;
-            } else if (Support::isUInt12(-val)) {
-                a.cmn(gp, imm(-val));
-                return;
-            }
+            cmp(gp, val);
+            return;
         }
 
         auto tmp = load_source(arg, SUPER_TMP);
diff --git a/erts/emulator/beam/jit/arm/beam_asm_global.hpp.pl b/erts/emulator/beam/jit/arm/beam_asm_global.hpp.pl
index f42d16e853..0099e6cc94 100644
--- a/erts/emulator/beam/jit/arm/beam_asm_global.hpp.pl
+++ b/erts/emulator/beam/jit/arm/beam_asm_global.hpp.pl
@@ -88,6 +88,8 @@ my @beam_global_funcs = qw(
     int_div_rem_body_shared
     int_div_rem_guard_shared
     internal_hash_helper
+    is_in_range_shared
+    is_ge_lt_shared
     minus_body_shared
     new_map_shared
     update_map_assoc_shared
diff --git a/erts/emulator/beam/jit/arm/instr_common.cpp b/erts/emulator/beam/jit/arm/instr_common.cpp
index 2a1d08a1a1..70097efdc4 100644
--- a/erts/emulator/beam/jit/arm/instr_common.cpp
+++ b/erts/emulator/beam/jit/arm/instr_common.cpp
@@ -1467,6 +1467,335 @@ void BeamModuleAssembler::emit_is_ge(const ArgLabel &Fail,
     }
 }
 
+/*
+ * ARG1 = Src
+ * ARG2 = Min
+ * ARG3 = Max
+ *
+ * Result is returned in the flags.
+ */
+void BeamGlobalAssembler::emit_is_in_range_shared() {
+    Label immediate = a.newLabel(), generic_compare = a.newLabel(),
+          float_done = a.newLabel(), done = a.newLabel();
+
+    /* Is the source a float? */
+    emit_is_boxed(immediate, ARG1);
+
+    arm::Gp boxed_ptr = emit_ptr_val(TMP1, ARG1);
+    a.ldur(TMP2, emit_boxed_val(boxed_ptr));
+
+    mov_imm(TMP3, HEADER_FLONUM);
+    a.cmp(TMP2, TMP3);
+    a.b_ne(generic_compare);
+
+    a.ldur(a64::d0, emit_boxed_val(boxed_ptr, sizeof(Eterm)));
+    a.asr(TMP1, ARG2, imm(_TAG_IMMED1_SIZE));
+    a.scvtf(a64::d1, TMP1);
+
+    a.fcmpe(a64::d0, a64::d1);
+    a.b_mi(float_done);
+
+    a.asr(TMP1, ARG3, imm(_TAG_IMMED1_SIZE));
+    a.scvtf(a64::d1, TMP1);
+    a.fcmpe(a64::d0, a64::d1);
+    a.b_gt(float_done);
+    a.tst(ZERO, ZERO);
+
+    a.bind(float_done);
+    a.ret(a64::x30);
+
+    a.bind(immediate);
+    {
+        /*
+         * Src is an immediate (such as ATOM) but not SMALL.
+         * That means that Src must be greater than the upper
+         * limit.
+         */
+        mov_imm(TMP1, 1);
+        a.cmp(TMP1, imm(0));
+        a.ret(a64::x30);
+    }
+
+    a.bind(generic_compare);
+    {
+        emit_enter_runtime_frame();
+        emit_enter_runtime();
+
+        a.stp(ARG1, ARG3, TMP_MEM1q);
+
+        comment("erts_cmp_compound(X, Y, 0, 0);");
+        mov_imm(ARG3, 0);
+        mov_imm(ARG4, 0);
+        runtime_call<4>(erts_cmp_compound);
+        a.tst(ARG1, ARG1);
+        a.b_mi(done);
+
+        a.ldp(ARG1, ARG2, TMP_MEM1q);
+
+        comment("erts_cmp_compound(X, Y, 0, 0);");
+        mov_imm(ARG3, 0);
+        mov_imm(ARG4, 0);
+        runtime_call<4>(erts_cmp_compound);
+        a.tst(ARG1, ARG1);
+
+        a.bind(done);
+        emit_leave_runtime();
+        emit_leave_runtime_frame();
+
+        a.ret(a64::x30);
+    }
+}
+
+/*
+ * 1121 occurrences in OTP at the time of writing.
+ */
+void BeamModuleAssembler::emit_is_in_range(ArgLabel const &Small,
+                                           ArgLabel const &Large,
+                                           ArgRegister const &Src,
+                                           ArgConstant const &Min,
+                                           ArgConstant const &Max) {
+    Label next = a.newLabel(), generic = a.newLabel();
+    bool need_generic = true;
+    auto src = load_source(Src, ARG1);
+
+    if (always_small(Src)) {
+        need_generic = false;
+        comment("skipped test for small operand since it always small");
+    } else if (always_one_of(Src, BEAM_TYPE_INTEGER | BEAM_TYPE_MASK_BOXED)) {
+        /* The only possible kind of immediate is a small and all
+         * other values are boxed, so we can test for smalls by
+         * testing boxed. */
+        comment("simplified small test since all other types are boxed");
+        ERTS_CT_ASSERT(_TAG_PRIMARY_MASK - TAG_PRIMARY_BOXED == (1 << 0));
+        if (Small == Large &&
+            always_one_of(Src, BEAM_TYPE_MASK_BOXED - BEAM_TYPE_FLOAT)) {
+            /* Src is never a float and the failure labels are
+             * equal. Therefore, since a bignum will never be within
+             * the range, we can fail immediately if Src is not a
+             * small. */
+            need_generic = false;
+            a.tbz(src.reg, imm(0), resolve_beam_label(Small, disp32K));
+        } else {
+            /* Src can be a float or the failures labels are distinct.
+             * We need to call the generic routine if Src is not a small. */
+            a.tbz(src.reg, imm(0), generic);
+        }
+    } else if (Small == Large) {
+        /* We can save one instruction if we incorporate the test for
+         * small into the range check. */
+        ERTS_CT_ASSERT(_TAG_IMMED1_SMALL == _TAG_IMMED1_MASK);
+        comment("simplified small & range tests since failure labels are "
+                "equal");
+        sub(TMP1, src.reg, Min.as<ArgImmed>().get());
+
+        /* Since we have subtracted the (tagged) lower bound, the
+         * tag bits of the difference is 0 if and only if Src is
+         * a small. Testing for a tag of 0 can be done in two
+         * instructions. */
+        a.tst(TMP1, imm(_TAG_IMMED1_MASK));
+        a.b_ne(generic);
+
+        /* Now do the range check. */
+        cmp(TMP1, Max.as<ArgImmed>().get() - Min.as<ArgImmed>().get());
+        a.b_hi(resolve_beam_label(Small, disp1MB));
+
+        /* Bypass the test code. */
+        goto test_done;
+    } else {
+        /* We have no applicable type information and the failure
+         * labels are distinct. Emit the standard test for small
+         * and call the generic routine if Src is not a small. */
+        a.and_(TMP1, src.reg, imm(_TAG_IMMED1_MASK));
+        a.cmp(TMP1, imm(_TAG_IMMED1_SMALL));
+        a.b_ne(generic);
+    }
+
+    /* We have now established that the operand is small. */
+    if (Small == Large) {
+        comment("simplified range test since failure labels are equal");
+        sub(TMP1, src.reg, Min.as<ArgImmed>().get());
+        cmp(TMP1, Max.as<ArgImmed>().get() - Min.as<ArgImmed>().get());
+        a.b_hi(resolve_beam_label(Small, disp1MB));
+    } else {
+        cmp(src.reg, Min.as<ArgImmed>().get());
+        a.b_lt(resolve_beam_label(Small, disp1MB));
+        cmp(src.reg, Max.as<ArgImmed>().get());
+        a.b_gt(resolve_beam_label(Large, disp1MB));
+    }
+
+test_done:
+    if (need_generic) {
+        a.b(next);
+    }
+
+    a.bind(generic);
+    if (!need_generic) {
+        comment("skipped generic comparison because it is not needed");
+    } else {
+        mov_var(ARG1, src);
+        mov_arg(ARG2, Min);
+        mov_arg(ARG3, Max);
+        fragment_call(ga->get_is_in_range_shared());
+        if (Small == Large) {
+            a.b_ne(resolve_beam_label(Small, disp1MB));
+        } else {
+            a.b_lt(resolve_beam_label(Small, disp1MB));
+            a.b_gt(resolve_beam_label(Large, disp1MB));
+        }
+    }
+
+    a.bind(next);
+}
+
+/*
+ * ARG1 = Src
+ * ARG2 = A
+ * ARG3 = B
+ *
+ * Result is returned in the flags.
+ */
+void BeamGlobalAssembler::emit_is_ge_lt_shared() {
+    Label done = a.newLabel();
+
+    emit_enter_runtime_frame();
+    emit_enter_runtime();
+
+    a.stp(ARG1, ARG3, TMP_MEM1q);
+
+    comment("erts_cmp_compound(Src, A, 0, 0);");
+    mov_imm(ARG3, 0);
+    mov_imm(ARG4, 0);
+    runtime_call<4>(erts_cmp_compound);
+    a.tst(ARG1, ARG1);
+    a.b_mi(done);
+
+    comment("erts_cmp_compound(B, Src, 0, 0);");
+    a.ldp(ARG2, ARG1, TMP_MEM1q);
+    mov_imm(ARG3, 0);
+    mov_imm(ARG4, 0);
+    runtime_call<4>(erts_cmp_compound);
+    a.cmp(ARG1, imm(0));
+
+    /* Make sure that ARG1 is -1, 0, or 1. */
+    a.cset(ARG1, imm(arm::CondCode::kNE));
+    a.csinv(ARG1, ARG1, ZERO, imm(arm::CondCode::kGE));
+
+    /* Prepare return value and flags. */
+    a.adds(ARG1, ARG1, imm(1));
+
+    /* We now have:
+     *   ARG1 == 0 if B < SRC
+     *   ARG1 > 0 if B => SRC
+     * and flags set accordingly. */
+
+    a.bind(done);
+    emit_leave_runtime();
+    emit_leave_runtime_frame();
+
+    a.ret(a64::x30);
+}
+
+/*
+ * The instruction sequence:
+ *
+ *   is_ge Fail1 Src A
+ *   is_lt Fail1 B Src
+ *
+ * is common (1841 occurrences in OTP at the time of writing).
+ *
+ * is_ge + is_lt is 18 instructions, while is_ge_lt is
+ * 14 instructions.
+ */
+void BeamModuleAssembler::emit_is_ge_lt(ArgLabel const &Fail1,
+                                        ArgLabel const &Fail2,
+                                        ArgRegister const &Src,
+                                        ArgConstant const &A,
+                                        ArgConstant const &B) {
+    Label generic = a.newLabel(), next = a.newLabel();
+    auto src = load_source(Src, ARG1);
+
+    mov_arg(ARG2, A);
+    mov_arg(ARG3, B);
+
+    a.and_(TMP1, src.reg, imm(_TAG_IMMED1_MASK));
+    a.cmp(TMP1, imm(_TAG_IMMED1_SMALL));
+    a.b_ne(generic);
+
+    a.cmp(src.reg, ARG2);
+    a.b_lt(resolve_beam_label(Fail1, disp1MB));
+    a.cmp(ARG3, src.reg);
+    a.b_ge(resolve_beam_label(Fail2, disp1MB));
+    a.b(next);
+
+    a.bind(generic);
+    mov_var(ARG1, src);
+    fragment_call(ga->get_is_ge_lt_shared());
+    a.b_lt(resolve_beam_label(Fail1, disp1MB));
+    a.b_gt(resolve_beam_label(Fail2, disp1MB));
+
+    a.bind(next);
+}
+
+/*
+ * 60 occurrences in OTP at the time of writing. Seems to be common in
+ * Elixir code.
+ *
+ * Currently not very frequent in OTP but very nice reduction in code
+ * size when it happens. We expect this combination of instructions
+ * to become more common in the future.
+ */
+void BeamModuleAssembler::emit_is_int_in_range(ArgLabel const &Fail,
+                                               ArgRegister const &Src,
+                                               ArgConstant const &Min,
+                                               ArgConstant const &Max) {
+    auto src = load_source(Src, ARG1);
+
+    sub(TMP1, src.reg, Min.as<ArgImmed>().get());
+
+    /* Since we have subtracted the (tagged) lower bound, the
+     * tag bits of the difference is 0 if and only if Src is
+     * a small. */
+    ERTS_CT_ASSERT(_TAG_IMMED1_SMALL == _TAG_IMMED1_MASK);
+    a.tst(TMP1, imm(_TAG_IMMED1_MASK));
+    a.b_ne(resolve_beam_label(Fail, disp1MB));
+    cmp(TMP1, Max.as<ArgImmed>().get() - Min.as<ArgImmed>().get());
+    a.b_hi(resolve_beam_label(Fail, disp1MB));
+}
+
+/*
+ * 428 occurrencs in OTP at the time of writing.
+ */
+void BeamModuleAssembler::emit_is_int_ge(ArgLabel const &Fail,
+                                         ArgRegister const &Src,
+                                         ArgConstant const &Min) {
+    auto src = load_source(Src, ARG1);
+    Label small = a.newLabel(), next = a.newLabel();
+
+    if (always_one_of(Src, BEAM_TYPE_INTEGER | BEAM_TYPE_MASK_ALWAYS_BOXED)) {
+        comment("simplified small test since all other types are boxed");
+        emit_is_boxed(small, Src, src.reg);
+    } else {
+        a.and_(TMP2, src.reg, imm(_TAG_IMMED1_MASK));
+        a.cmp(TMP2, imm(_TAG_IMMED1_SMALL));
+        a.b_eq(small);
+
+        emit_is_boxed(resolve_beam_label(Fail, dispUnknown), Src, TMP2);
+    }
+
+    arm::Gp boxed_ptr = emit_ptr_val(TMP1, src.reg);
+    a.ldur(TMP1, emit_boxed_val(boxed_ptr));
+    a.and_(TMP1, TMP1, imm(_TAG_HEADER_MASK));
+    a.cmp(TMP1, imm(_TAG_HEADER_POS_BIG));
+    a.b_ne(resolve_beam_label(Fail, disp1MB));
+    a.b(next);
+
+    a.bind(small);
+    cmp(src.reg, Min.as<ArgImmed>().get());
+    a.b_lt(resolve_beam_label(Fail, disp1MB));
+
+    a.bind(next);
+}
+
 void BeamModuleAssembler::emit_badmatch(const ArgSource &Src) {
     mov_arg(arm::Mem(c_p, offsetof(Process, fvalue)), Src);
     emit_error(BADMATCH);
diff --git a/erts/emulator/beam/jit/arm/ops.tab b/erts/emulator/beam/jit/arm/ops.tab
index 862d46ebc0..9676577079 100644
--- a/erts/emulator/beam/jit/arm/ops.tab
+++ b/erts/emulator/beam/jit/arm/ops.tab
@@ -455,6 +455,36 @@ is_eq_exact f s s
 
 is_ne_exact f s s
 
+is_integer NotInt N0 | is_ge Small N1=xy Min=i | is_ge Large Max=i N2=xy |
+  equal(N0, N1) | equal(N1, N2) |
+  equal(NotInt, Small) | equal(Small, Large) =>
+    is_int_in_range NotInt N0 Min Max
+
+is_integer NotInt N0 | is_ge Large Max=i N2=xy | is_ge Small N1=xy Min=i |
+  equal(N0, N1) | equal(N1, N2) |
+  equal(NotInt, Small) | equal(Small, Large) =>
+    is_int_in_range NotInt N0 Min Max
+
+is_integer NotInt N0 | is_ge Fail N1=xy Min=i |
+  equal(N0, N1) | equal(NotInt, Fail) =>
+    is_int_ge NotInt N0 Min
+
+is_int_in_range f S c c
+is_int_ge f S c
+
+is_ge Small N1=xy Min=i | is_ge Large Max=i N2=xy | equal(N1, N2) =>
+    is_in_range Small Large N1 Min Max
+
+is_ge Large Max=i N2=xy | is_ge Small N1=xy Min=i | equal(N1, N2) =>
+    is_in_range Small Large N2 Min Max
+
+is_in_range f f S c c
+
+is_ge Small N1=xy A=i | is_lt Large B=i N2=xy | equal(N1, N2) =>
+    is_ge_lt Small Large N1 A B
+
+is_ge_lt f f S c c
+
 is_lt f s s
 is_ge f s s
 
diff --git a/erts/emulator/beam/jit/x86/beam_asm.hpp b/erts/emulator/beam/jit/x86/beam_asm.hpp
index cd353714ef..da037aa235 100644
--- a/erts/emulator/beam/jit/x86/beam_asm.hpp
+++ b/erts/emulator/beam/jit/x86/beam_asm.hpp
@@ -1377,6 +1377,24 @@ protected:
         }
     }
 
+    void cmp(x86::Gp gp, int64_t val, const x86::Gp &spill) {
+        if (Support::isInt32(val)) {
+            a.cmp(gp, imm(val));
+        } else {
+            mov_imm(spill, val);
+            a.cmp(gp, spill);
+        }
+    }
+
+    void sub(x86::Gp gp, int64_t val, const x86::Gp &spill) {
+        if (Support::isInt32(val)) {
+            a.sub(gp, imm(val));
+        } else {
+            mov_imm(spill, val);
+            a.sub(gp, spill);
+        }
+    }
+
     /* Note: May clear flags. */
     void mov_arg(x86::Gp to, const ArgVal &from, const x86::Gp &spill) {
         if (from.isBytePtr()) {
diff --git a/erts/emulator/beam/jit/x86/beam_asm_global.hpp.pl b/erts/emulator/beam/jit/x86/beam_asm_global.hpp.pl
index 9ea270c4f4..917a2815e5 100755
--- a/erts/emulator/beam/jit/x86/beam_asm_global.hpp.pl
+++ b/erts/emulator/beam/jit/x86/beam_asm_global.hpp.pl
@@ -79,6 +79,8 @@ my @beam_global_funcs = qw(
     increment_body_shared
     int_div_rem_body_shared
     int_div_rem_guard_shared
+    is_in_range_shared
+    is_ge_lt_shared
     internal_hash_helper
     minus_body_shared
     minus_guard_shared
diff --git a/erts/emulator/beam/jit/x86/instr_common.cpp b/erts/emulator/beam/jit/x86/instr_common.cpp
index a5cd54fb8f..bf26f5284f 100644
--- a/erts/emulator/beam/jit/x86/instr_common.cpp
+++ b/erts/emulator/beam/jit/x86/instr_common.cpp
@@ -1738,6 +1738,342 @@ void BeamModuleAssembler::emit_bif_is_ne_exact(const ArgRegister &LHS,
     emit_bif_is_eq_ne_exact(LHS, RHS, Dst, am_true, am_false);
 }
 
+/*
+ * ARG1 = Src
+ * ARG2 = Min
+ * ARG3 = Max
+ *
+ * Result is returned in the flags.
+ */
+void BeamGlobalAssembler::emit_is_in_range_shared() {
+    Label immediate = a.newLabel();
+    Label generic_compare = a.newLabel();
+    Label done = a.newLabel();
+
+    /* Is the source a float? */
+    emit_is_boxed(immediate, ARG1);
+
+    x86::Gp boxed_ptr = emit_ptr_val(ARG4, ARG1);
+    a.cmp(emit_boxed_val(boxed_ptr), imm(HEADER_FLONUM));
+    a.short_().jne(generic_compare);
+
+    /* Compare the float to the limits. */
+    a.movsd(x86::xmm0, emit_boxed_val(boxed_ptr, sizeof(Eterm)));
+    a.sar(ARG2, imm(_TAG_IMMED1_SIZE));
+    a.sar(ARG3, imm(_TAG_IMMED1_SIZE));
+    a.cvtsi2sd(x86::xmm1, ARG2);
+    a.cvtsi2sd(x86::xmm2, ARG3);
+    a.xor_(x86::ecx, x86::ecx);
+    a.ucomisd(x86::xmm0, x86::xmm2);
+    a.seta(x86::cl);
+    mov_imm(RET, -1);
+    a.ucomisd(x86::xmm1, x86::xmm0);
+    a.cmovbe(RET, x86::rcx);
+
+    a.cmp(RET, imm(0));
+
+    a.ret();
+
+    a.bind(immediate);
+    {
+        /*
+         * Src is an immediate (such as ATOM) but not SMALL.
+         * That means that Src must be greater than the upper
+         * limit.
+         */
+        mov_imm(RET, 1);
+        a.cmp(RET, imm(0));
+        a.ret();
+    }
+
+    a.bind(generic_compare);
+    {
+        emit_enter_runtime();
+
+        a.mov(TMP_MEM1q, ARG1);
+        a.mov(TMP_MEM2q, ARG3);
+
+        comment("erts_cmp_compound(X, Y, 0, 0);");
+        mov_imm(ARG3, 0);
+        mov_imm(ARG4, 0);
+        runtime_call<4>(erts_cmp_compound);
+        a.test(RET, RET);
+        a.js(done);
+
+        a.mov(ARG1, TMP_MEM1q);
+        a.mov(ARG2, TMP_MEM2q);
+
+        comment("erts_cmp_compound(X, Y, 0, 0);");
+        mov_imm(ARG3, 0);
+        mov_imm(ARG4, 0);
+        runtime_call<4>(erts_cmp_compound);
+        a.test(RET, RET);
+
+        a.bind(done);
+        emit_leave_runtime();
+
+        a.ret();
+    }
+}
+
+void BeamModuleAssembler::emit_is_in_range(ArgLabel const &Small,
+                                           ArgLabel const &Large,
+                                           ArgRegister const &Src,
+                                           ArgConstant const &Min,
+                                           ArgConstant const &Max) {
+    Label next = a.newLabel(), generic = a.newLabel();
+    bool need_generic = true;
+
+    mov_arg(ARG1, Src);
+
+    if (always_small(Src)) {
+        need_generic = false;
+        comment("skipped test for small operand since it always small");
+    } else if (always_one_of(Src, BEAM_TYPE_INTEGER | BEAM_TYPE_MASK_BOXED)) {
+        /* The only possible kind of immediate is a small and all
+         * other values are boxed, so we can test for smalls by
+         * testing boxed. */
+        comment("simplified small test since all other types are boxed");
+        ERTS_CT_ASSERT(_TAG_PRIMARY_MASK - TAG_PRIMARY_BOXED == (1 << 0));
+        if (Small == Large &&
+            always_one_of(Src, BEAM_TYPE_MASK_BOXED - BEAM_TYPE_FLOAT)) {
+            /* Src is never a float and the failure labels are
+             * equal. Therefore, since a bignum will never be within
+             * the range, we can fail immediately if Src is not a
+             * small. */
+            need_generic = false;
+            a.test(ARG1.r8(), imm(_TAG_PRIMARY_MASK - TAG_PRIMARY_BOXED));
+            a.je(resolve_beam_label(Small));
+        } else {
+            /* Src can be a float or the failures labels are distinct.
+             * We need to call the generic routine if Src is not a small. */
+            a.test(ARG1.r8(), imm(_TAG_PRIMARY_MASK - TAG_PRIMARY_BOXED));
+            a.short_().je(generic);
+        }
+    } else if (Small == Large) {
+        /* We can save one instruction if we incorporate the test for
+         * small into the range check. */
+        ERTS_CT_ASSERT(_TAG_IMMED1_SMALL == _TAG_IMMED1_MASK);
+        comment("simplified small & range tests since failure labels are "
+                "equal");
+        a.mov(RET, ARG1);
+        sub(RET, Min.as<ArgImmed>().get(), ARG4);
+
+        /* Since we have subtracted the (tagged) lower bound, the
+         * tag bits of the difference is 0 if and only if Src is
+         * a small. Testing for a tag of 0 can be done in two
+         * instructions. */
+        a.test(RETb, imm(_TAG_IMMED1_MASK));
+        a.jne(generic);
+
+        /* Now do the range check. */
+        cmp(RET, Max.as<ArgImmed>().get() - Min.as<ArgImmed>().get(), ARG4);
+        a.ja(resolve_beam_label(Small));
+
+        /* Bypass the test code. */
+        goto test_done;
+    } else {
+        /* We have no applicable type information and the failure
+         * labels are distinct. Emit the standard test for small
+         * and call the generic routine if Src is not a small. */
+        a.mov(RETd, ARG1d);
+        a.and_(RETb, imm(_TAG_IMMED1_MASK));
+        a.cmp(RETb, imm(_TAG_IMMED1_SMALL));
+        a.short_().jne(generic);
+    }
+
+    /* We have now established that the operand is small. */
+    if (Small == Large) {
+        comment("simplified range test since failure labels are equal");
+        sub(ARG1, Min.as<ArgImmed>().get(), RET);
+        cmp(ARG1, Max.as<ArgImmed>().get() - Min.as<ArgImmed>().get(), RET);
+        a.ja(resolve_beam_label(Small));
+    } else {
+        cmp(ARG1, Min.as<ArgImmed>().get(), RET);
+        a.jl(resolve_beam_label(Small));
+        cmp(ARG1, Max.as<ArgImmed>().get(), RET);
+        a.jg(resolve_beam_label(Large));
+    }
+
+test_done:
+    if (need_generic) {
+        a.short_().jmp(next);
+    }
+
+    a.bind(generic);
+    if (!need_generic) {
+        comment("skipped generic comparison because it is not needed");
+    } else {
+        mov_arg(ARG2, Min);
+        mov_arg(ARG3, Max);
+        safe_fragment_call(ga->get_is_in_range_shared());
+        if (Small == Large) {
+            a.jne(resolve_beam_label(Small));
+        } else {
+            a.jl(resolve_beam_label(Small));
+            a.jg(resolve_beam_label(Large));
+        }
+    }
+
+    a.bind(next);
+}
+
+/*
+ * ARG1 = Src
+ * ARG2 = A
+ * ARG3 = B
+ *
+ * Result is returned in the flags.
+ */
+void BeamGlobalAssembler::emit_is_ge_lt_shared() {
+    Label done = a.newLabel();
+
+    emit_enter_runtime();
+
+    a.mov(TMP_MEM1q, ARG1);
+    a.mov(TMP_MEM2q, ARG3);
+
+    comment("erts_cmp_compound(Src, A, 0, 0);");
+    mov_imm(ARG3, 0);
+    mov_imm(ARG4, 0);
+    runtime_call<4>(erts_cmp_compound);
+    a.test(RET, RET);
+    a.short_().js(done);
+
+    comment("erts_cmp_compound(B, Src, 0, 0);");
+    a.mov(ARG1, TMP_MEM2q);
+    a.mov(ARG2, TMP_MEM1q);
+    mov_imm(ARG3, 0);
+    mov_imm(ARG4, 0);
+    runtime_call<4>(erts_cmp_compound);
+
+    /* The following instructions implements the signum function. */
+    mov_imm(ARG1, -1);
+    mov_imm(ARG4, 1);
+    a.test(RET, RET);
+    a.cmovs(RET, ARG1);
+    a.cmovg(RET, ARG4);
+
+    /* RET is now -1, 0, or 1. */
+    a.add(RET, imm(1));
+
+    /* We now have:
+     *   RET == 0 if B < SRC
+     *   RET > 0 if B => SRC
+     * and flags set accordingly. */
+
+    a.bind(done);
+    emit_leave_runtime();
+
+    a.ret();
+}
+
+/*
+ * is_ge + is_lt is 20 instructions.
+ *
+ * is_ge_lt is 15 instructions.
+ */
+void BeamModuleAssembler::emit_is_ge_lt(ArgLabel const &Fail1,
+                                        ArgLabel const &Fail2,
+                                        ArgRegister const &Src,
+                                        ArgConstant const &A,
+                                        ArgConstant const &B) {
+    Label generic = a.newLabel(), next = a.newLabel();
+
+    mov_arg(ARG2, A);
+    mov_arg(ARG3, B);
+    mov_arg(ARG1, Src);
+
+    a.mov(RETd, ARG1d);
+    a.and_(RETb, imm(_TAG_IMMED1_MASK));
+    a.cmp(RETb, imm(_TAG_IMMED1_SMALL));
+    a.short_().jne(generic);
+
+    a.cmp(ARG1, ARG2);
+    a.jl(resolve_beam_label(Fail1));
+    a.cmp(ARG3, ARG1);
+    a.jge(resolve_beam_label(Fail2));
+    a.short_().jmp(next);
+
+    a.bind(generic);
+    safe_fragment_call(ga->get_is_ge_lt_shared());
+    a.jl(resolve_beam_label(Fail1));
+    a.jg(resolve_beam_label(Fail2));
+
+    a.bind(next);
+}
+
+/*
+ * Combine is_integer with range check.
+ *
+ * is_integer + is_ge + is_ge is 31 instructions.
+ *
+ * is_int_in_range is 6 instructions.
+ */
+void BeamModuleAssembler::emit_is_int_in_range(ArgLabel const &Fail,
+                                               ArgRegister const &Src,
+                                               ArgConstant const &Min,
+                                               ArgConstant const &Max) {
+    mov_arg(RET, Src);
+
+    sub(RET, Min.as<ArgImmed>().get(), ARG1);
+
+    /* Since we have subtracted the (tagged) lower bound, the
+     * tag bits of the difference is 0 if and only if Src is
+     * a small. */
+    ERTS_CT_ASSERT(_TAG_IMMED1_SMALL == _TAG_IMMED1_MASK);
+    a.test(RETb, imm(_TAG_IMMED1_MASK));
+    a.jne(resolve_beam_label(Fail));
+    cmp(RET, Max.as<ArgImmed>().get() - Min.as<ArgImmed>().get(), ARG1);
+    a.ja(resolve_beam_label(Fail));
+}
+
+/*
+ * is_integer + is_ge is 21 instructions.
+ *
+ * is_int_ge is 14 instructions.
+ */
+void BeamModuleAssembler::emit_is_int_ge(ArgLabel const &Fail,
+                                         ArgRegister const &Src,
+                                         ArgConstant const &Min) {
+    Label small = a.newLabel();
+    Label fail = a.newLabel();
+    Label next = a.newLabel();
+    /* On Unix, using rcx instead of ARG1 makes the `test` instruction
+     * in the boxed test one byte shorter. */
+    const x86::Gp src_reg = x86::rcx;
+
+    mov_arg(src_reg, Src);
+
+    if (always_one_of(Src, BEAM_TYPE_INTEGER | BEAM_TYPE_MASK_ALWAYS_BOXED)) {
+        comment("simplified small test since all other types are boxed");
+        emit_is_boxed(small, Src, src_reg);
+    } else {
+        a.mov(RETd, src_reg.r32());
+        a.and_(RETb, imm(_TAG_IMMED1_MASK));
+        a.cmp(RETb, imm(_TAG_IMMED1_SMALL));
+        a.short_().je(small);
+
+        emit_is_boxed(resolve_beam_label(Fail), Src, src_reg);
+    }
+
+    /* Src is boxed. Jump to failure unless Src is a positive bignum. */
+    x86::Gp boxed_ptr = emit_ptr_val(src_reg, src_reg);
+    a.mov(RETd, emit_boxed_val(boxed_ptr, 0, sizeof(Uint32)));
+    a.and_(RETb, imm(_TAG_HEADER_MASK));
+    a.cmp(RETb, imm(_TAG_HEADER_POS_BIG));
+    a.short_().je(next);
+
+    a.bind(fail);
+    a.jmp(resolve_beam_label(Fail));
+
+    a.bind(small);
+    cmp(src_reg, Min.as<ArgImmed>().get(), RET);
+    a.short_().jl(fail);
+
+    a.bind(next);
+}
+
 void BeamModuleAssembler::emit_badmatch(const ArgSource &Src) {
     mov_arg(x86::qword_ptr(c_p, offsetof(Process, fvalue)), Src);
     emit_error(BADMATCH);
diff --git a/erts/emulator/beam/jit/x86/ops.tab b/erts/emulator/beam/jit/x86/ops.tab
index a0fa7124cd..0b7455bccd 100644
--- a/erts/emulator/beam/jit/x86/ops.tab
+++ b/erts/emulator/beam/jit/x86/ops.tab
@@ -443,6 +443,36 @@ is_eq_exact f s s
 
 is_ne_exact f s s
 
+is_integer NotInt N0 | is_ge Small N1=xy Min=i | is_ge Large Max=i N2=xy |
+  equal(N0, N1) | equal(N1, N2) |
+  equal(NotInt, Small) | equal(Small, Large) =>
+    is_int_in_range NotInt N0 Min Max
+
+is_integer NotInt N0 | is_ge Large Max=i N2=xy | is_ge Small N1=xy Min=i |
+  equal(N0, N1) | equal(N1, N2) |
+  equal(NotInt, Small) | equal(Small, Large) =>
+    is_int_in_range NotInt N0 Min Max
+
+is_integer NotInt N0 | is_ge Fail N1=xy Min=i |
+  equal(N0, N1) | equal(NotInt, Fail) =>
+    is_int_ge NotInt N0 Min
+
+is_int_in_range f S c c
+is_int_ge f S c
+
+is_ge Small N1=xy Min=i | is_ge Large Max=i N2=xy | equal(N1, N2) =>
+    is_in_range Small Large N1 Min Max
+
+is_ge Large Max=i N2=xy | is_ge Small N1=xy Min=i | equal(N1, N2) =>
+    is_in_range Small Large N2 Min Max
+
+is_in_range f f S c c
+
+is_ge Small N1=xy A=i | is_lt Large B=i N2=xy | equal(N1, N2) =>
+    is_ge_lt Small Large N1 A B
+
+is_ge_lt f f S c c
+
 is_lt f s s
 is_ge f s s
 
diff --git a/erts/emulator/test/op_SUITE.erl b/erts/emulator/test/op_SUITE.erl
index 1f1f34d35f..df81b22e08 100644
--- a/erts/emulator/test/op_SUITE.erl
+++ b/erts/emulator/test/op_SUITE.erl
@@ -25,7 +25,7 @@
 -export([all/0, suite/0,
          bsl_bsr/1,logical/1,t_not/1,relop_simple/1,relop/1,
          complex_relop/1,unsafe_fusing/1,
-         range_tests/1,typed_relop/1]).
+         range_tests/1,combined_relops/1,typed_relop/1]).
 
 -import(lists, [foldl/3,flatmap/2]).
 
@@ -35,7 +35,8 @@ suite() ->
 
 all() ->
     [bsl_bsr, logical, t_not, relop_simple, relop,
-     complex_relop, unsafe_fusing, range_tests, typed_relop].
+     complex_relop, unsafe_fusing, range_tests,
+     combined_relops, typed_relop].
 
 %% Test the bsl and bsr operators.
 bsl_bsr(Config) when is_list(Config) ->
@@ -523,7 +524,7 @@ range_tests(_Config) ->
     inside = range_big(MinSmall),
     inside = range_big(-1 bsl 58),
     inside = range_big(0),
-    inside = range_barely_small(17.75),
+    inside = range_big(17.75),
     inside = range_big(1 bsl 58),
     inside = range_big(MaxSmall),
 
@@ -531,6 +532,39 @@ range_tests(_Config) ->
     greater = range_big(1 bsl 64),
     greater = range_big(float(1 bsl 64)),
 
+    inside = int_range_1(id(-100_000)),
+    inside = int_range_1(id(-10)),
+    inside = int_range_1(id(100)),
+    inside = int_range_1(id(100_000)),
+
+    outside = int_range_1(id(atom)),
+    outside = int_range_1(id(-1 bsl 60)),
+    outside = int_range_1(id(-100_001)),
+    outside = int_range_1(id(100_001)),
+    outside = int_range_1(id(1 bsl 60)),
+
+    inside = int_range_2(id(1)),
+    inside = int_range_2(id(42)),
+    inside = int_range_2(id(16#f000_0000)),
+
+    outside = int_range_2(id([a,list])),
+    outside = int_range_2(id(0)),
+    outside = int_range_1(id(-1 bsl 60)),
+    outside = int_range_1(id(1 bsl 60)),
+
+    inside = int_range_3(id(1 bsl 28)),
+    inside = int_range_3(id((1 bsl 28) + 1)),
+    inside = int_range_3(id((1 bsl 33) + 555)),
+    inside = int_range_3(id((1 bsl 58) - 1)),
+    inside = int_range_3(id(1 bsl 58)),
+
+    outside = int_range_3(id({a,tuple})),
+    outside = int_range_3(id(-1 bsl 60)),
+    outside = int_range_3(id(-1000)),
+    outside = int_range_3(id(100)),
+    outside = int_range_3(id((1 bsl 58) + 1)),
+    outside = int_range_3(id(1 bsl 60)),
+
     ok.
 
 range(X) ->
@@ -683,6 +717,83 @@ compare_integer_pid(N) when is_integer(N) ->
         N < Immed -> ok
     end.
 
+int_range_1(X) when is_integer(X), -100_000 =< X, X =< 100_000 ->
+    inside;
+int_range_1(_) ->
+    outside.
+
+int_range_2(X) when is_integer(X), 1 =< X, X =< 16#f000_0000 ->
+    inside;
+int_range_2(_) ->
+    outside.
+
+int_range_3(X) when is_integer(X), 1 bsl 28 =< X, X =< 1 bsl 58 ->
+    inside;
+int_range_3(_) ->
+    outside.
+
+combined_relops(_Config) ->
+    other = test_tok_char(-1 bsl 64),
+    other = test_tok_char($A - 1),
+
+    var = test_tok_char($A),
+    var = test_tok_char($B),
+    var = test_tok_char($P),
+    var = test_tok_char($Y),
+    var = test_tok_char($Z),
+
+    other = test_tok_char($Z + 1),
+
+    var = tok_char($_),
+    other = tok_char(float($_)),
+
+    other = test_tok_char(1 bsl 64),
+
+    other = test_tok_char(atom),
+    other = test_tok_char(self()),
+
+    ok.
+
+test_tok_char(C) ->
+    Result = tok_char(C),
+    if
+        is_integer(C) ->
+            Result = tok_char(float(C)),
+            Result = tok_char_int(C),
+            if
+                C band 16#FFFF =:= C ->
+                    Result = tok_char_int_range(C);
+                true ->
+                    Result
+            end;
+        true ->
+            Result
+    end.
+
+%% is_ge + is_lt
+tok_char(C) when $A =< C, C =< $Z ->
+    var;
+tok_char($_) ->
+    var;
+tok_char(_) ->
+    other.
+
+%% is_ge + is_ge
+tok_char_int(C) when $A =< C, C =< $Z ->
+    var;
+tok_char_int($_) ->
+    var;
+tok_char_int(_) ->
+    other.
+
+%% is_ge + is_ge
+tok_char_int_range(C) when $A =< C, C =< $Z ->
+    var;
+tok_char_int_range($_) ->
+    var;
+tok_char_int_range(_) ->
+    other.
+
 %%%
 %%% Utilities.
 %%%
-- 
2.35.3

openSUSE Build Service is sponsored by