File 0461-Fix-rare-bug-in-binary-matching-again.patch of Package erlang

From 0bb261b730eff06cca524ea9999ff035585ef5b4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Fri, 28 Sep 2018 05:57:47 +0200
Subject: [PATCH] Fix rare bug in binary matching (again)

2e40d8d1c51a attempted fix a bug in binary matching, but it only
fixed the bug for the minimized test case.

This commit removes the previous fix and fixes the bug in a more
effective way. See the comments in the new code in `sys_core_bsm`
for an explanation.

This commit restores the optimizations in string.erl and dets_v9.erl
that the previous fix disabled.

I have not found any code where this commit will disable optimizations
when they are actually safe. There are some changes to the code
in ssl_cipher.erl in that some bs_start_match2 instruction did not
reuse the binary register for the match context, but the delayed
sub binary optimizations was never applied to the code in the first
place.

https://bugs.erlang.org/browse/ERL-689
---
 lib/compiler/src/beam_bsm.erl        | 13 +-------
 lib/compiler/src/sys_core_bsm.erl    | 62 ++++++++++++++++++++++++++++++++++++
 lib/compiler/test/bs_match_SUITE.erl | 38 ++++++++++++++++++++--
 3 files changed, 98 insertions(+), 15 deletions(-)

diff --git a/lib/compiler/src/beam_bsm.erl b/lib/compiler/src/beam_bsm.erl
index abc6e96c85..1c8e0e9854 100644
--- a/lib/compiler/src/beam_bsm.erl
+++ b/lib/compiler/src/beam_bsm.erl
@@ -310,18 +310,7 @@ btb_reaches_match_2([{test,bs_start_match2,{f,F},Live,[Bin,_],Ctx}|Is],
     end;
 btb_reaches_match_2([{test,_,{f,F},Ss}=I|Is], Regs, D0) ->
     btb_ensure_not_used(Ss, I, Regs),
-    D1 = btb_follow_branch(F, Regs, D0),
-    D = case Is of
-            [{bs_context_to_binary,_}|_] ->
-                %% bs_context_to_binary following a test instruction
-                %% probably needs the current position to be saved as
-                %% the new start position, but we can't be sure.
-                %% Therefore, conservatively disable the optimization
-                %% (instead of forcing a saving of the position).
-                D1#btb{must_save=true,must_not_save=true};
-            _ ->
-                D1
-        end,
+    D = btb_follow_branch(F, Regs, D0),
     btb_reaches_match_1(Is, Regs, D);
 btb_reaches_match_2([{test,_,{f,F},_,Ss,_}=I|Is], Regs, D0) ->
     btb_ensure_not_used(Ss, I, Regs),
diff --git a/lib/compiler/src/sys_core_fold.erl b/lib/compiler/src/sys_core_fold.erl
index d7b26c3a56..62657933ee 100644
--- a/lib/compiler/src/sys_core_fold.erl
+++ b/lib/compiler/src/sys_core_fold.erl
@@ -123,6 +123,14 @@ function([{#c_var{name={F,Arity}}=Name,B0}|Fs], FsAcc, Ws0) ->
 	B = expr(B0, value, sub_new()),			%This must be a fun!
 	{Name,B}
     catch
+        throw:unsafe_bs_context_to_binary ->
+            %% Unsafe bs_context_to_binary (in the sense that the
+            %% contents of the binary will probably be wrong).
+            %% Disable binary optimizations for the entire function.
+            %% We don't generate an INFO message, because this happens
+            %% very infrequently and it would be hard to explain in
+            %% a comprehensible way in an INFO message.
+            {Name,B0};
 	Class:Error ->
 	    Stack = erlang:get_stacktrace(),
 	    io:fwrite("Function: ~w/~w\n", [F,Arity]),
@@ -3196,6 +3204,7 @@
 bsm_do_an(Vs0, Pos, Cs0, Case) ->
     case nth(Pos, Vs0) of
 	#c_var{name=Vname}=V0 ->
+	    bsm_inner_context_to_binary(Cs0),
 	    Cs = bsm_do_an_var(Vname, Pos, Cs0, []),
 	    V = bsm_annotate_for_reuse(V0),
 	    Bef = lists:sublist(Vs0, Pos-1),
@@ -3210,6 +3219,59 @@ move_from_col(Pos, L) ->
 	    Case
     end.
 
+bsm_inner_context_to_binary([#c_clause{body=B}|Cs]) ->
+    %% Consider:
+    %%
+    %%   foo(<<Length, Data/binary>>) ->  %Line 1
+    %%      case {Data, Length} of        %Line 2
+    %%        {_, 0} -> Data;             %Line 3
+    %%        {<<...>>, 4} -> ...         %Line 4
+    %%      end.
+    %%
+    %% No sub binary will be created for Data in line 1. The match
+    %% context will be passed on to the `case` in line 2. In line 3,
+    %% this pass inserts a `bs_context_to_binary` instruction to
+    %% convert the match context representing Data to a binary before
+    %% returning it. The problem is that the binary created will be
+    %% the original binary (including the matched out Length field),
+    %% not the tail of the binary as it is supposed to be.
+    %%
+    %% Here follows a heuristic to disable the binary optimizations
+    %% for the entire function if this code kind of code is found.
+
+    case cerl_trees:free_variables(B) of
+        [] ->
+            %% Since there are no free variables in the body of
+            %% this clause, there can't be any troublesome
+            %% bs_context_to_binary instructions.
+            bsm_inner_context_to_binary(Cs);
+        [_|_]=Free ->
+            %% One of the free variables could refer to a match context
+            %% created by the outer binary match.
+            F = fun(#c_primop{name=#c_literal{val=bs_context_to_binary},
+                              args=[#c_var{name=V}]}, _) ->
+                        case member(V, Free) of
+                            true ->
+                                %% This bs_context_to_binary instruction will
+                                %% make a binary of the match context from an
+                                %% outer binary match. It is very likely that
+                                %% the contents of the binary will be wrong
+                                %% (the original binary as opposed to only
+                                %% the tail binary).
+                                throw(unsafe_bs_context_to_binary);
+                            false ->
+                                %% Safe. This bs_context_to_binary instruction
+                                %% will make a binary from a match context
+                                %% defined in the body of the clause.
+                                ok
+                        end;
+                   (_, _) ->
+                        ok
+                end,
+            cerl_trees:fold(F, ok, B)
+    end;
+bsm_inner_context_to_binary([]) -> ok.
+
 bsm_do_an_var(V, S, [#c_clause{pats=Ps,guard=G,body=B0}=C0|Cs], Acc) ->
     case nth(S, Ps) of
 	#c_var{name=VarName} ->
diff --git a/lib/compiler/test/bs_match_SUITE.erl b/lib/compiler/test/bs_match_SUITE.erl
index 7814738449..e97dbac8a6 100644
--- a/lib/compiler/test/bs_match_SUITE.erl
+++ b/lib/compiler/test/bs_match_SUITE.erl
@@ -1690,30 +1690,62 @@ non_opt_eq([], <<>>) ->
 
 %% ERL-689
 
-erl_689(Config) ->
+erl_689(_Config) ->
     {{0, 0, 0}, <<>>} = do_erl_689_1(<<0>>, ?MODULE),
     {{2018, 8, 7}, <<>>} = do_erl_689_1(<<4,2018:16/little,8,7>>, ?MODULE),
     {{0, 0, 0}, <<>>} = do_erl_689_2(?MODULE, <<0>>),
     {{2018, 8, 7}, <<>>} = do_erl_689_2(?MODULE, <<4,2018:16/little,8,7>>),
     ok.
 
-do_erl_689_1(<<Length, Data/binary>>, _) ->
+do_erl_689_1(Arg1, Arg2) ->
+    Res = do_erl_689_1a(Arg1, Arg2),
+    Res = do_erl_689_1b(Arg1, Arg2).
+
+do_erl_689_2(Arg1, Arg2) ->
+    Res = do_erl_689_2a(Arg1, Arg2),
+    Res = do_erl_689_2b(Arg1, Arg2).
+
+do_erl_689_1a(<<Length, Data/binary>>, _) ->
+    case {Data, Length} of
+        {_, 0} ->
+            %% bs_context_to_binary would incorrectly set Data to the original
+            %% binary (before matching in the function head).
+            {{0, 0, 0}, Data};
+        {<<Y:16/little, M, D, Rest/binary>>, 4} ->
+            {{Y, M, D}, Rest}
+    end.
+
+do_erl_689_1b(<<Length, Data/binary>>, _) ->
     case {Data, Length} of
         {_, 0} ->
             %% bs_context_to_binary would incorrectly set Data to the original
             %% binary (before matching in the function head).
+            id(0),
             {{0, 0, 0}, Data};
         {<<Y:16/little, M, D, Rest/binary>>, 4} ->
+            id(1),
+            {{Y, M, D}, Rest}
+    end.
+
+do_erl_689_2a(_, <<Length, Data/binary>>) ->
+    case {Length, Data} of
+        {0, _} ->
+            %% bs_context_to_binary would incorrectly set Data to the original
+            %% binary (before matching in the function head).
+            {{0, 0, 0}, Data};
+        {4, <<Y:16/little, M, D, Rest/binary>>} ->
             {{Y, M, D}, Rest}
     end.
 
-do_erl_689_2(_, <<Length, Data/binary>>) ->
+do_erl_689_2b(_, <<Length, Data/binary>>) ->
     case {Length, Data} of
         {0, _} ->
             %% bs_context_to_binary would incorrectly set Data to the original
             %% binary (before matching in the function head).
+            id(0),
             {{0, 0, 0}, Data};
         {4, <<Y:16/little, M, D, Rest/binary>>} ->
+            id(1),
             {{Y, M, D}, Rest}
     end.
 
-- 
2.16.4

openSUSE Build Service is sponsored by