File 5361-Add-a-cost-for-copying-binaries-with-the-binary-synt.patch of Package erlang

From efce237f21f75637a7cbff4486b6e4b30f3a4c50 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Mon, 20 Apr 2020 13:16:02 +0200
Subject: [PATCH] Add a cost for copying binaries with the binary syntax

When constructing binaries using the binary syntax, there would
not be any cost in terms of reductions for copying large binaries,
and a process could copy many large binaries before being scheduled
out.

Add a cost of 1 reduction per 1 KiB of binary data copied in
binary construction.
---
 erts/emulator/beam/bs_instrs.tab          | 15 ++++++++++++---
 erts/emulator/beam/erl_bits.c             | 24 ++++++++++++++++++++---
 erts/emulator/beam/erl_bits.h             |  4 ++--
 erts/emulator/test/bs_construct_SUITE.erl | 32 +++++++++++++++++++++++++++++--
 4 files changed, 65 insertions(+), 10 deletions(-)

diff --git a/erts/emulator/beam/bs_instrs.tab b/erts/emulator/beam/bs_instrs.tab
index fc3da7d016..df45dd54c6 100644
--- a/erts/emulator/beam/bs_instrs.tab
+++ b/erts/emulator/beam/bs_instrs.tab
@@ -294,10 +294,13 @@ i_new_bs_put_binary(Fail, Sz, Flags, Src) {
     Eterm sz = $Sz;
     Sint _size;
     $BS_GET_UNCHECKED_FIELD_SIZE(sz, (($Flags) >> 3), $BADARG($Fail), _size);
-    if (!erts_new_bs_put_binary(ERL_BITS_ARGS_2(($Src), _size))) {
+    c_p->fcalls = FCALLS;
+    if (!erts_new_bs_put_binary(c_p, $Src, _size)) {
         $BADARG($Fail);
     }
+    FCALLS = c_p->fcalls;
 }
+
 i_new_bs_put_binary_all := i_new_bs_put_binary_all.fetch.execute;
 
 i_new_bs_put_binary_all.head() {
@@ -309,15 +312,19 @@ i_new_bs_put_binary_all.fetch(Src) {
 }
 
 i_new_bs_put_binary_all.execute(Fail, Unit) {
-    if (!erts_new_bs_put_binary_all(ERL_BITS_ARGS_2(src, ($Unit)))) {
+    c_p->fcalls = FCALLS;
+    if (!erts_new_bs_put_binary_all(c_p, src, ($Unit))) {
         $BADARG($Fail);
     }
+    FCALLS = c_p->fcalls;
 }
 
 i_new_bs_put_binary_imm(Fail, Sz, Src) {
-    if (!erts_new_bs_put_binary(ERL_BITS_ARGS_2(($Src), ($Sz)))) {
+    c_p->fcalls = FCALLS;
+    if (!erts_new_bs_put_binary(c_p, ($Src), ($Sz))) {
         $BADARG($Fail);
     }
+    FCALLS = c_p->fcalls;
 }
 
 i_new_bs_put_float(Fail, Sz, Flags, Src) {
@@ -707,11 +714,13 @@ i_bs_append(Fail, ExtraHeap, Live, Unit, Size, Dst) {
 i_bs_private_append(Fail, Unit, Size, Src, Dst) {
     Eterm res;
 
+    c_p->fcalls = FCALLS;
     res = erts_bs_private_append(c_p, $Src, $Size, $Unit);
     if (is_non_value(res)) {
         /* c_p->freason is already set (to BADARG or SYSTEM_LIMIT). */
         $FAIL_HEAD_OR_BODY($Fail);
     }
+    FCALLS = c_p->fcalls;
     $Dst = res;
 }
 
diff --git a/erts/emulator/beam/erl_bits.c b/erts/emulator/beam/erl_bits.c
index 7084642f7c..79c5781929 100644
--- a/erts/emulator/beam/erl_bits.c
+++ b/erts/emulator/beam/erl_bits.c
@@ -40,6 +40,18 @@
 
 #define BIT_IS_MACHINE_ENDIAN(x) (((x)&BSF_LITTLE) == BIT_ENDIAN_MACHINE)
 
+/*
+ * Here is how many bits we can copy in each reduction.
+ *
+ * At the time of writing of this comment, CONTEXT_REDS was 4000 and
+ * BITS_PER_REDUCTION was 1 KiB (8192 bits). The time for copying an
+ * unaligned 4000 KiB binary on my computer (which has a 4,2 GHz Intel
+ * i7 CPU) was about 5 ms. The time was approximately 4 times lower if
+ * the source and destinations binaries were aligned.
+ */
+
+#define BITS_PER_REDUCTION (8*1024)
+
 /*
  * MAKE_MASK(n) constructs a mask with n bits.
  * Example: MAKE_MASK(3) returns the binary number 00000111.
@@ -977,14 +989,14 @@ erts_bs_put_utf16(ERL_BITS_PROTO_2(Eterm arg, Uint flags))
     erts_bin_offset += num_bits;
     return 1;
 }
-     
 
 int
-erts_new_bs_put_binary(ERL_BITS_PROTO_2(Eterm arg, Uint num_bits))
+erts_new_bs_put_binary(Process *c_p, Eterm arg, Uint num_bits)
 {
     byte *bptr;
     Uint bitoffs;
     Uint bitsize; 
+    ERL_BITS_DEFINE_STATEP(c_p);
 
     if (!is_binary(arg)) {
 	return 0;
@@ -995,16 +1007,18 @@ erts_new_bs_put_binary(ERL_BITS_PROTO_2(Eterm arg, Uint num_bits))
     }
     copy_binary_to_buffer(erts_current_bin, erts_bin_offset, bptr, bitoffs, num_bits);
     erts_bin_offset += num_bits;
+    BUMP_REDS(c_p, num_bits / BITS_PER_REDUCTION);
     return 1;
 }
 
 int
-erts_new_bs_put_binary_all(ERL_BITS_PROTO_2(Eterm arg, Uint unit))
+erts_new_bs_put_binary_all(Process *c_p, Eterm arg, Uint unit)
 {
    byte *bptr;
    Uint bitoffs;
    Uint bitsize;
    Uint num_bits;
+   ERL_BITS_DEFINE_STATEP(c_p);
 
    /*
     * This type test is not needed if the code was compiled with
@@ -1029,6 +1043,7 @@ erts_new_bs_put_binary_all(ERL_BITS_PROTO_2(Eterm arg, Uint unit))
    }
    copy_binary_to_buffer(erts_current_bin, erts_bin_offset, bptr, bitoffs, num_bits);
    erts_bin_offset += num_bits;
+   BUMP_REDS(c_p, num_bits / BITS_PER_REDUCTION);
    return 1;
 }
 
@@ -1352,6 +1367,7 @@ erts_bs_append(Process* c_p, Eterm* reg, Uint live, Eterm build_size_term,
 	binp = erts_bin_realloc(binp, new_size);
 	pb->val = binp;
 	pb->bytes = (byte *) binp->orig_bytes;
+        BUMP_REDS(c_p, pb->size / BITS_PER_REDUCTION);
     }
     erts_current_bin = pb->bytes;
 
@@ -1473,6 +1489,7 @@ erts_bs_append(Process* c_p, Eterm* reg, Uint live, Eterm build_size_term,
 	 * Now copy the data into the binary.
 	 */
 	copy_binary_to_buffer(erts_current_bin, 0, src_bytes, bitoffs, erts_bin_offset);
+        BUMP_REDS(c_p, erts_bin_offset / BITS_PER_REDUCTION);
 
 	return make_binary(sb);
     }
@@ -1537,6 +1554,7 @@ erts_bs_private_append(Process* p, Eterm bin, Eterm build_size_term, Uint unit)
     if (binp->orig_size < pb->size) {
 	Uint new_size = GROW_PROC_BIN_SIZE(pb->size);
 
+        BUMP_REDS(p, pb->size / BITS_PER_REDUCTION);
 	if (pb->flags & PB_IS_WRITABLE) {
 	    /*
 	     * This is the normal case - the binary is writable.
diff --git a/erts/emulator/beam/erl_bits.h b/erts/emulator/beam/erl_bits.h
index eced966a7f..d9262bea00 100644
--- a/erts/emulator/beam/erl_bits.h
+++ b/erts/emulator/beam/erl_bits.h
@@ -164,8 +164,8 @@ Eterm erts_bs_get_binary_all_2(Process *p, ErlBinMatchBuffer* mb);
 int erts_new_bs_put_integer(ERL_BITS_PROTO_3(Eterm Integer, Uint num_bits, unsigned flags));
 int erts_bs_put_utf8(ERL_BITS_PROTO_1(Eterm Integer));
 int erts_bs_put_utf16(ERL_BITS_PROTO_2(Eterm Integer, Uint flags));
-int erts_new_bs_put_binary(ERL_BITS_PROTO_2(Eterm Bin, Uint num_bits));
-int erts_new_bs_put_binary_all(ERL_BITS_PROTO_2(Eterm Bin, Uint unit));
+int erts_new_bs_put_binary(Process *c_p, Eterm Bin, Uint num_bits);
+int erts_new_bs_put_binary_all(Process *c_p, Eterm Bin, Uint unit);
 int erts_new_bs_put_float(Process *c_p, Eterm Float, Uint num_bits, int flags);
 void erts_new_bs_put_string(ERL_BITS_PROTO_2(byte* iptr, Uint num_bytes));
 
diff --git a/erts/emulator/test/bs_construct_SUITE.erl b/erts/emulator/test/bs_construct_SUITE.erl
index 29d9c5a74e..25549a4b73 100644
--- a/erts/emulator/test/bs_construct_SUITE.erl
+++ b/erts/emulator/test/bs_construct_SUITE.erl
@@ -27,7 +27,8 @@
 	 mem_leak/1, coerce_to_float/1, bjorn/1, append_empty_is_same/1,
 	 huge_float_field/1, system_limit/1, badarg/1,
 	 copy_writable_binary/1, kostis/1, dynamic/1, bs_add/1,
-	 otp_7422/1, zero_width/1, bad_append/1, bs_append_overflow/1]).
+	 otp_7422/1, zero_width/1, bad_append/1, bs_append_overflow/1,
+         reductions/1]).
 
 -include_lib("common_test/include/ct.hrl").
 
@@ -40,7 +41,7 @@ all() ->
      in_guard, mem_leak, coerce_to_float, bjorn, append_empty_is_same,
      huge_float_field, system_limit, badarg,
      copy_writable_binary, kostis, dynamic, bs_add, otp_7422, zero_width,
-     bad_append, bs_append_overflow].
+     bad_append, bs_append_overflow, reductions].
 
 init_per_suite(Config) ->
     Config.
@@ -889,6 +890,33 @@ bs_append_overflow_unsigned() ->
     C = <<A/binary,1,B/binary>>,
     true = byte_size(B) < byte_size(C).
 
+reductions(_Config) ->
+    TwoMeg = <<0:(2_000*1024)/unit:8>>,
+    reds_at_least(2000, fun() -> <<0:8,TwoMeg/binary>> end),
+    reds_at_least(4000, fun() -> <<0:8,TwoMeg/binary,TwoMeg/binary>> end),
+    reds_at_least(1000, fun() -> <<0:8,TwoMeg:(1000*1024)/binary>> end),
+
+    %% Here we expect about 500 reductions in the bs_append
+    %% instruction for setting up a writable binary and about 2000
+    %% reductions in the bs_put_binary instruction for copying the
+    %% binary data.
+    reds_at_least(2500, fun() -> <<TwoMeg/binary,TwoMeg:(2000*1024)/binary>> end),
+    ok.
+
+reds_at_least(N, Fun) ->
+    receive after 1 -> ok end,
+    {reductions,Red0} = process_info(self(), reductions),
+    _ = Fun(),
+    {reductions,Red1} = process_info(self(), reductions),
+    Diff = Red1 - Red0,
+    io:format("Expected at least ~p; got ~p\n", [N,Diff]),
+    if
+        Diff >= N ->
+            ok;
+        Diff ->
+            ct:fail({expected,N,got,Diff})
+    end.
+
 id(I) -> I.
 
 memsize() ->
-- 
2.16.4

openSUSE Build Service is sponsored by