File 2481-erts-Optimize-arithmetic-ops-using-overflow-intrinsi.patch of Package erlang

From 01e70c30d837887acbbb91d13ec2d8d1cb616c7e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?John=20H=C3=B6gberg?= <john@erlang.org>
Date: Wed, 3 Apr 2019 12:31:37 +0200
Subject: [PATCH 2/2] erts: Optimize arithmetic ops using overflow intrinsics

---
 erts/emulator/beam/arith_instrs.tab |  83 ++++++++++++++++++++++++--
 erts/emulator/test/Makefile         |   1 +
 erts/emulator/test/small_SUITE.erl  | 115 ++++++++++++++++++++++++++++++++++++
 3 files changed, 194 insertions(+), 5 deletions(-)
 create mode 100644 erts/emulator/test/small_SUITE.erl

diff --git a/erts/emulator/beam/arith_instrs.tab b/erts/emulator/beam/arith_instrs.tab
index 5f23b2c168..f14b376419 100644
--- a/erts/emulator/beam/arith_instrs.tab
+++ b/erts/emulator/beam/arith_instrs.tab
@@ -51,11 +51,50 @@ plus.fetch(Op1, Op2) {
 
 plus.execute(Fail, Dst) {
     if (ERTS_LIKELY(is_both_small(PlusOp1, PlusOp2))) {
+#ifdef HAVE_OVERFLOW_CHECK_BUILTINS
+        Sint lhs_tagged, rhs_untagged, res;
+
+        /* The value part of immediate integers start right after the tag and
+         * occupy the rest of the word, so if you squint a bit they look like
+         * fixed-point integers; as long as you mask the tag away you will get
+         * correct results from addition/subtraction since they share the same
+         * notion of zero. It's fairly easy to see that the following holds
+         * when (a + b) is in range:
+         *
+         *     (a >> s) + (b >> s) == ((a & ~m) + (b & ~m)) >> s
+         *
+         * Where 's' is the tag size and 'm' is the tag mask.
+         *
+         * The left-hand side is our fallback in the #else clause and is the
+         * fastest way to do this safely in plain C. The actual addition will
+         * never overflow since `Sint` has a much greater range than our
+         * smalls, so we can use the IS_SSMALL macro to see if the result is
+         * within range.
+         *
+         * What we're doing below is an extension of the right-hand side. By
+         * treating `a` and `b` as fixed-point integers, all additions whose
+         * result is out of range will also overflow `Sint` and we can use the
+         * compiler's overflow intrinsics to check for this condition.
+         *
+         * In addition, since the tag lives in the lowest bits we can further
+         * optimize this by only stripping the tag from either side. The higher
+         * bits can't influence the tag bits since we bail on overflow, so the
+         * tag bits from the tagged side will simply appear in the result. */
+        lhs_tagged = PlusOp1;
+        rhs_untagged = PlusOp2 & ~_TAG_IMMED1_MASK;
+
+        if (ERTS_LIKELY(!__builtin_add_overflow(lhs_tagged, rhs_untagged, &res))) {
+            ASSERT(is_small(res));
+            $Dst = res;
+            $NEXT0();
+        }
+#else
         Sint i = signed_val(PlusOp1) + signed_val(PlusOp2);
         if (ERTS_LIKELY(IS_SSMALL(i))) {
             $Dst = make_small(i);
             $NEXT0();
         }
+#endif
     }
     $OUTLINED_ARITH_2($Fail, mixed_plus, BIF_splus_2, PlusOp1, PlusOp2, $Dst);
 }
@@ -73,11 +112,26 @@ minus.fetch(Op1, Op2) {
 
 minus.execute(Fail, Dst) {
     if (ERTS_LIKELY(is_both_small(MinusOp1, MinusOp2))) {
+#ifdef HAVE_OVERFLOW_CHECK_BUILTINS
+        Sint lhs_tagged, rhs_untagged, res;
+
+        /* See plus.execute */
+        lhs_tagged = MinusOp1;
+        rhs_untagged = MinusOp2 & ~_TAG_IMMED1_MASK;
+
+        if (ERTS_LIKELY(!__builtin_sub_overflow(lhs_tagged, rhs_untagged, &res))) {
+            ASSERT(is_small(res));
+            $Dst = res;
+            $NEXT0();
+        }
+#else
         Sint i = signed_val(MinusOp1) - signed_val(MinusOp2);
+
         if (ERTS_LIKELY(IS_SSMALL(i))) {
             $Dst = make_small(i);
             $NEXT0();
         }
+#endif
     }
     $OUTLINED_ARITH_2($Fail, mixed_minus, BIF_sminus_2, MinusOp1, MinusOp2, $Dst);
 }
@@ -97,12 +151,27 @@ increment.execute(IncrementVal, Dst) {
     Eterm result;
 
     if (ERTS_LIKELY(is_small(increment_reg_val))) {
+#ifdef HAVE_OVERFLOW_CHECK_BUILTINS
+        Sint lhs_tagged, rhs_untagged, res;
+
+        /* See plus.execute */
+        lhs_tagged = increment_reg_val;
+        rhs_untagged = (Sint)increment_val << _TAG_IMMED1_SIZE;
+
+        if (ERTS_LIKELY(!__builtin_add_overflow(lhs_tagged, rhs_untagged, &res))) {
+            ASSERT(is_small(res));
+            $Dst = res;
+            $NEXT0();
+        }
+#else
         Sint i = signed_val(increment_reg_val) + increment_val;
         if (ERTS_LIKELY(IS_SSMALL(i))) {
             $Dst = make_small(i);
             $NEXT0();
         }
+#endif
     }
+
     result = erts_mixed_plus(c_p, increment_reg_val, make_small(increment_val));
     ERTS_HOLE_CHECK(c_p);
     if (ERTS_LIKELY(is_value(result))) {
@@ -118,11 +187,15 @@ i_times(Fail, Op1, Op2, Dst) {
     Eterm op2 = $Op2;
 #ifdef HAVE_OVERFLOW_CHECK_BUILTINS
     if (ERTS_LIKELY(is_both_small(op1, op2))) {
-        Sint a = signed_val(op1);
-        Sint b = signed_val(op2);
-        Sint res;
-        if (ERTS_LIKELY(!__builtin_mul_overflow(a, b, &res) && IS_SSMALL(res))) {
-            $Dst = make_small(res);
+        /* See plus.execute */
+        Sint lhs_untagged, rhs_actual, res;
+
+        lhs_untagged = op1 & ~_TAG_IMMED1_MASK;
+        rhs_actual = signed_val(op2);
+
+        if (ERTS_LIKELY(!__builtin_mul_overflow(lhs_untagged, rhs_actual, &res))) {
+            ASSERT(!(res & _TAG_IMMED1_MASK));
+            $Dst = res | _TAG_IMMED1_SMALL;
             $NEXT0();
         }
     }
diff --git a/erts/emulator/test/Makefile b/erts/emulator/test/Makefile
index 8c2054cb51..28775b6f02 100644
--- a/erts/emulator/test/Makefile
+++ b/erts/emulator/test/Makefile
@@ -105,6 +105,7 @@ MODULES= \
 	send_term_SUITE \
 	sensitive_SUITE \
 	signal_SUITE \
+	small_SUITE \
 	smoke_test_SUITE \
 	statistics_SUITE \
 	system_info_SUITE \
diff --git a/erts/emulator/test/small_SUITE.erl b/erts/emulator/test/small_SUITE.erl
new file mode 100644
index 0000000000..00a02e5560
--- /dev/null
+++ b/erts/emulator/test/small_SUITE.erl
@@ -0,0 +1,115 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2019. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+-module(small_SUITE).
+
+-export([all/0, suite/0]).
+-export([edge_cases/1]).
+
+-include_lib("common_test/include/ct.hrl").
+
+suite() ->
+    [{ct_hooks,[ts_install_cth]},
+     {timetrap, {minutes, 1}}].
+
+all() ->
+    [edge_cases].
+
+edge_cases(Config) when is_list(Config) ->
+    {MinSmall, MaxSmall} = Limits = determine_small_limits(0),
+    ct:pal("Limits = ~p", [Limits]),
+
+    true = (MaxSmall + 1) =:= MaxSmall + id(1),
+    true = (MinSmall - 1) =:= MinSmall - id(1),
+    true = (MaxSmall + 1) > id(MaxSmall),
+    true = (MinSmall - 1) < id(MinSmall),
+    -1 = MinSmall + id(MaxSmall),
+    -1 = MaxSmall + id(MinSmall),
+
+    false = is_small(MinSmall * -1),
+    false = is_small(MinSmall - id(1)),
+    false = is_small(MinSmall - 1),
+    false = is_small(MaxSmall + id(1)),
+
+    Lower = lists:seq(MinSmall, MinSmall + 128),
+    Upper = lists:seq(MaxSmall, MaxSmall - 128, -1),
+    Pow2 = seq_pow2(MinSmall, MaxSmall),
+    NearZero = lists:seq(-128, 128),
+
+    ok = test_combinations([Lower, Upper, Pow2, NearZero], MinSmall, MaxSmall),
+
+    ok.
+
+test_combinations([As | Rest]=TestVectors, MinS, MaxS) ->
+    [begin
+        _ = [arith_test(A, B, MinS, MaxS) || B <- Bs]
+     end || A <- As, Bs <- TestVectors],
+    test_combinations(Rest, MinS, MaxS);
+test_combinations([], _MinS, _MaxS) ->
+    ok.
+
+%% Builds a sequence of all powers of 2 between MinSmall and MaxSmall
+seq_pow2(MinSmall, MaxSmall) ->
+    sp2_1(MinSmall, MinSmall, MaxSmall).
+
+sp2_1(N, _MinS, MaxS) when N >= MaxS ->
+    [];
+sp2_1(-1, MinS, MaxS) ->
+    [-1 | sp2_1(1, MinS, MaxS)];
+sp2_1(N, MinS, MaxS) when N < 0 ->
+    [N | sp2_1(N bsr 1, MinS, MaxS)];
+sp2_1(N, MinS, MaxS) when N > 0 ->
+    [N | sp2_1(N bsl 1, MinS, MaxS)].
+
+arith_test(A, B, MinS, MaxS) ->
+    verify_kind(A + B, MinS, MaxS),
+    verify_kind(B + A, MinS, MaxS),
+    verify_kind(A - B, MinS, MaxS),
+    verify_kind(B - A, MinS, MaxS),
+    verify_kind(A * B, MinS, MaxS),
+    verify_kind(B * A, MinS, MaxS),
+
+    true = A + B =:= apply(erlang, id('+'), [A, B]),
+    true = A - B =:= apply(erlang, id('-'), [A, B]),
+    true = A * B =:= apply(erlang, id('*'), [A, B]),
+
+    true = A + B =:= B + id(A),
+    true = A - B =:= A + id(-B),
+    true = B - A =:= B + id(-A),
+    true = A * B =:= B * id(A),
+
+    true = B =:= 0 orelse ((A * B) div id(B) =:= A),
+    true = A =:= 0 orelse ((B * A) div id(A) =:= B),
+
+    ok.
+
+%% Verifies that N is a small when it should be
+verify_kind(N, MinS, MaxS) ->
+    true = is_small(N) =:= (N >= MinS andalso N =< MaxS).
+
+is_small(N) when is_integer(N) ->
+    0 =:= erts_debug:flat_size(N).
+
+determine_small_limits(N) ->
+    case is_small(-1 bsl N) of
+        true -> determine_small_limits(N + 1);
+        false -> {-1 bsl (N - 1), (1 bsl (N - 1)) - 1}
+    end.
+
+id(I) -> I.
-- 
2.16.4

openSUSE Build Service is sponsored by