File 5361-Add-a-cost-for-copying-binaries-with-the-binary-synt.patch of Package erlang
From efce237f21f75637a7cbff4486b6e4b30f3a4c50 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Mon, 20 Apr 2020 13:16:02 +0200
Subject: [PATCH] Add a cost for copying binaries with the binary syntax
When constructing binaries using the binary syntax, there would
not be any cost in terms of reductions for copying large binaries,
and a process could copy many large binaries before being scheduled
out.
Add a cost of 1 reduction per 1 KiB of binary data copied in
binary construction.
---
erts/emulator/beam/bs_instrs.tab | 15 ++++++++++++---
erts/emulator/beam/erl_bits.c | 24 ++++++++++++++++++++---
erts/emulator/beam/erl_bits.h | 4 ++--
erts/emulator/test/bs_construct_SUITE.erl | 32 +++++++++++++++++++++++++++++--
4 files changed, 65 insertions(+), 10 deletions(-)
diff --git a/erts/emulator/beam/bs_instrs.tab b/erts/emulator/beam/bs_instrs.tab
index fc3da7d016..df45dd54c6 100644
--- a/erts/emulator/beam/bs_instrs.tab
+++ b/erts/emulator/beam/bs_instrs.tab
@@ -294,10 +294,13 @@ i_new_bs_put_binary(Fail, Sz, Flags, Src) {
Eterm sz = $Sz;
Sint _size;
$BS_GET_UNCHECKED_FIELD_SIZE(sz, (($Flags) >> 3), $BADARG($Fail), _size);
- if (!erts_new_bs_put_binary(ERL_BITS_ARGS_2(($Src), _size))) {
+ c_p->fcalls = FCALLS;
+ if (!erts_new_bs_put_binary(c_p, $Src, _size)) {
$BADARG($Fail);
}
+ FCALLS = c_p->fcalls;
}
+
i_new_bs_put_binary_all := i_new_bs_put_binary_all.fetch.execute;
i_new_bs_put_binary_all.head() {
@@ -309,15 +312,19 @@ i_new_bs_put_binary_all.fetch(Src) {
}
i_new_bs_put_binary_all.execute(Fail, Unit) {
- if (!erts_new_bs_put_binary_all(ERL_BITS_ARGS_2(src, ($Unit)))) {
+ c_p->fcalls = FCALLS;
+ if (!erts_new_bs_put_binary_all(c_p, src, ($Unit))) {
$BADARG($Fail);
}
+ FCALLS = c_p->fcalls;
}
i_new_bs_put_binary_imm(Fail, Sz, Src) {
- if (!erts_new_bs_put_binary(ERL_BITS_ARGS_2(($Src), ($Sz)))) {
+ c_p->fcalls = FCALLS;
+ if (!erts_new_bs_put_binary(c_p, ($Src), ($Sz))) {
$BADARG($Fail);
}
+ FCALLS = c_p->fcalls;
}
i_new_bs_put_float(Fail, Sz, Flags, Src) {
@@ -707,11 +714,13 @@ i_bs_append(Fail, ExtraHeap, Live, Unit, Size, Dst) {
i_bs_private_append(Fail, Unit, Size, Src, Dst) {
Eterm res;
+ c_p->fcalls = FCALLS;
res = erts_bs_private_append(c_p, $Src, $Size, $Unit);
if (is_non_value(res)) {
/* c_p->freason is already set (to BADARG or SYSTEM_LIMIT). */
$FAIL_HEAD_OR_BODY($Fail);
}
+ FCALLS = c_p->fcalls;
$Dst = res;
}
diff --git a/erts/emulator/beam/erl_bits.c b/erts/emulator/beam/erl_bits.c
index 7084642f7c..79c5781929 100644
--- a/erts/emulator/beam/erl_bits.c
+++ b/erts/emulator/beam/erl_bits.c
@@ -40,6 +40,18 @@
#define BIT_IS_MACHINE_ENDIAN(x) (((x)&BSF_LITTLE) == BIT_ENDIAN_MACHINE)
+/*
+ * Here is how many bits we can copy in each reduction.
+ *
+ * At the time of writing of this comment, CONTEXT_REDS was 4000 and
+ * BITS_PER_REDUCTION was 1 KiB (8192 bits). The time for copying an
+ * unaligned 4000 KiB binary on my computer (which has a 4,2 GHz Intel
+ * i7 CPU) was about 5 ms. The time was approximately 4 times lower if
+ * the source and destinations binaries were aligned.
+ */
+
+#define BITS_PER_REDUCTION (8*1024)
+
/*
* MAKE_MASK(n) constructs a mask with n bits.
* Example: MAKE_MASK(3) returns the binary number 00000111.
@@ -977,14 +989,14 @@ erts_bs_put_utf16(ERL_BITS_PROTO_2(Eterm arg, Uint flags))
erts_bin_offset += num_bits;
return 1;
}
-
int
-erts_new_bs_put_binary(ERL_BITS_PROTO_2(Eterm arg, Uint num_bits))
+erts_new_bs_put_binary(Process *c_p, Eterm arg, Uint num_bits)
{
byte *bptr;
Uint bitoffs;
Uint bitsize;
+ ERL_BITS_DEFINE_STATEP(c_p);
if (!is_binary(arg)) {
return 0;
@@ -995,16 +1007,18 @@ erts_new_bs_put_binary(ERL_BITS_PROTO_2(Eterm arg, Uint num_bits))
}
copy_binary_to_buffer(erts_current_bin, erts_bin_offset, bptr, bitoffs, num_bits);
erts_bin_offset += num_bits;
+ BUMP_REDS(c_p, num_bits / BITS_PER_REDUCTION);
return 1;
}
int
-erts_new_bs_put_binary_all(ERL_BITS_PROTO_2(Eterm arg, Uint unit))
+erts_new_bs_put_binary_all(Process *c_p, Eterm arg, Uint unit)
{
byte *bptr;
Uint bitoffs;
Uint bitsize;
Uint num_bits;
+ ERL_BITS_DEFINE_STATEP(c_p);
/*
* This type test is not needed if the code was compiled with
@@ -1029,6 +1043,7 @@ erts_new_bs_put_binary_all(ERL_BITS_PROTO_2(Eterm arg, Uint unit))
}
copy_binary_to_buffer(erts_current_bin, erts_bin_offset, bptr, bitoffs, num_bits);
erts_bin_offset += num_bits;
+ BUMP_REDS(c_p, num_bits / BITS_PER_REDUCTION);
return 1;
}
@@ -1352,6 +1367,7 @@ erts_bs_append(Process* c_p, Eterm* reg, Uint live, Eterm build_size_term,
binp = erts_bin_realloc(binp, new_size);
pb->val = binp;
pb->bytes = (byte *) binp->orig_bytes;
+ BUMP_REDS(c_p, pb->size / BITS_PER_REDUCTION);
}
erts_current_bin = pb->bytes;
@@ -1473,6 +1489,7 @@ erts_bs_append(Process* c_p, Eterm* reg, Uint live, Eterm build_size_term,
* Now copy the data into the binary.
*/
copy_binary_to_buffer(erts_current_bin, 0, src_bytes, bitoffs, erts_bin_offset);
+ BUMP_REDS(c_p, erts_bin_offset / BITS_PER_REDUCTION);
return make_binary(sb);
}
@@ -1537,6 +1554,7 @@ erts_bs_private_append(Process* p, Eterm bin, Eterm build_size_term, Uint unit)
if (binp->orig_size < pb->size) {
Uint new_size = GROW_PROC_BIN_SIZE(pb->size);
+ BUMP_REDS(p, pb->size / BITS_PER_REDUCTION);
if (pb->flags & PB_IS_WRITABLE) {
/*
* This is the normal case - the binary is writable.
diff --git a/erts/emulator/beam/erl_bits.h b/erts/emulator/beam/erl_bits.h
index eced966a7f..d9262bea00 100644
--- a/erts/emulator/beam/erl_bits.h
+++ b/erts/emulator/beam/erl_bits.h
@@ -164,8 +164,8 @@ Eterm erts_bs_get_binary_all_2(Process *p, ErlBinMatchBuffer* mb);
int erts_new_bs_put_integer(ERL_BITS_PROTO_3(Eterm Integer, Uint num_bits, unsigned flags));
int erts_bs_put_utf8(ERL_BITS_PROTO_1(Eterm Integer));
int erts_bs_put_utf16(ERL_BITS_PROTO_2(Eterm Integer, Uint flags));
-int erts_new_bs_put_binary(ERL_BITS_PROTO_2(Eterm Bin, Uint num_bits));
-int erts_new_bs_put_binary_all(ERL_BITS_PROTO_2(Eterm Bin, Uint unit));
+int erts_new_bs_put_binary(Process *c_p, Eterm Bin, Uint num_bits);
+int erts_new_bs_put_binary_all(Process *c_p, Eterm Bin, Uint unit);
int erts_new_bs_put_float(Process *c_p, Eterm Float, Uint num_bits, int flags);
void erts_new_bs_put_string(ERL_BITS_PROTO_2(byte* iptr, Uint num_bytes));
diff --git a/erts/emulator/test/bs_construct_SUITE.erl b/erts/emulator/test/bs_construct_SUITE.erl
index 29d9c5a74e..25549a4b73 100644
--- a/erts/emulator/test/bs_construct_SUITE.erl
+++ b/erts/emulator/test/bs_construct_SUITE.erl
@@ -27,7 +27,8 @@
mem_leak/1, coerce_to_float/1, bjorn/1, append_empty_is_same/1,
huge_float_field/1, system_limit/1, badarg/1,
copy_writable_binary/1, kostis/1, dynamic/1, bs_add/1,
- otp_7422/1, zero_width/1, bad_append/1, bs_append_overflow/1]).
+ otp_7422/1, zero_width/1, bad_append/1, bs_append_overflow/1,
+ reductions/1]).
-include_lib("common_test/include/ct.hrl").
@@ -40,7 +41,7 @@ all() ->
in_guard, mem_leak, coerce_to_float, bjorn, append_empty_is_same,
huge_float_field, system_limit, badarg,
copy_writable_binary, kostis, dynamic, bs_add, otp_7422, zero_width,
- bad_append, bs_append_overflow].
+ bad_append, bs_append_overflow, reductions].
init_per_suite(Config) ->
Config.
@@ -889,6 +890,33 @@ bs_append_overflow_unsigned() ->
C = <<A/binary,1,B/binary>>,
true = byte_size(B) < byte_size(C).
+reductions(_Config) ->
+ TwoMeg = <<0:(2_000*1024)/unit:8>>,
+ reds_at_least(2000, fun() -> <<0:8,TwoMeg/binary>> end),
+ reds_at_least(4000, fun() -> <<0:8,TwoMeg/binary,TwoMeg/binary>> end),
+ reds_at_least(1000, fun() -> <<0:8,TwoMeg:(1000*1024)/binary>> end),
+
+ %% Here we expect about 500 reductions in the bs_append
+ %% instruction for setting up a writable binary and about 2000
+ %% reductions in the bs_put_binary instruction for copying the
+ %% binary data.
+ reds_at_least(2500, fun() -> <<TwoMeg/binary,TwoMeg:(2000*1024)/binary>> end),
+ ok.
+
+reds_at_least(N, Fun) ->
+ receive after 1 -> ok end,
+ {reductions,Red0} = process_info(self(), reductions),
+ _ = Fun(),
+ {reductions,Red1} = process_info(self(), reductions),
+ Diff = Red1 - Red0,
+ io:format("Expected at least ~p; got ~p\n", [N,Diff]),
+ if
+ Diff >= N ->
+ ok;
+ Diff ->
+ ct:fail({expected,N,got,Diff})
+ end.
+
id(I) -> I.
memsize() ->
--
2.16.4