File 0428-Correct-evaluation-order-for-binary-comprehensions.patch of Package erlang

From 48a8d22383e4e28455fbeec3bd63ac7c899b8e3b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Thu, 3 Aug 2023 08:35:09 +0200
Subject: [PATCH] Correct evaluation order for binary comprehensions

Consider the following function:

    bug() ->
        << <<C:8>> || C <- [], _ <- ok >>.

The first generator is an empty list, so the second generator should
never be evaluated, and thus the expected result is an empty
binary. That is indeed the result when evaluating the comprehension in
the shell:

    1> << <<C:8>> || C <- [], _ <- ok >>.
    <<>>

(And also when turning off compiler optimizations.)

However, when running compiled code there will be an exception:

    2> t:bug().
    ** exception error: bad generator ok
         in function  t:bug/0 (t.erl, line 8)

The reason for the exception is that the compiler as an optimization
inserts code to calculate the size of the resulting binary. As part of
the size calculation, `length(ok)` is evaluated, which will obviously
fail. When the size calculation fails, a `bad_generator` exception is
raised.

This commit changes the error handling for the size calculation to
use a default size for the binary instead of raising an exception.

Fixes #7494
---
 lib/compiler/src/beam_ssa_bc_size.erl  | 232 +++++++++----------------
 lib/compiler/test/bs_bincomp_SUITE.erl |   8 +
 2 files changed, 94 insertions(+), 146 deletions(-)

diff --git a/lib/compiler/src/beam_ssa_bc_size.erl b/lib/compiler/src/beam_ssa_bc_size.erl
index 9f1e802dd0..0e33e0c5af 100644
--- a/lib/compiler/src/beam_ssa_bc_size.erl
+++ b/lib/compiler/src/beam_ssa_bc_size.erl
@@ -91,19 +91,14 @@ opt_blks([], _ParamInfo, _StMap, unchanged, _Count, _Acc) ->
 opt_writable(Bs0, L, Blk, Blks, ParamInfo, Count0, Acc0) ->
     case {Blk,Blks} of
         {#b_blk{last=#b_br{succ=Next,fail=Next}},
-         [{Next,#b_blk{is=[#b_set{op=call,args=[_|Args],dst=Dst}=Call|_],
-                       last=CallLast}}|_]} ->
+         [{Next,#b_blk{is=[#b_set{op=call,args=[_|Args],dst=Dst}=Call|_]}}|_]} ->
             ensure_not_match_context(Call, ParamInfo),
 
             ArgTypes = maps:from_list([{Arg,{arg,Arg}} || Arg <- Args]),
             Bs = maps:merge(ArgTypes, Bs0),
             Result = map_get(Dst, call_size_func(Call, Bs)),
-            {Expr,Annos} = make_expr_tree(Result),
-
-            %% Note that we pass the generator call's terminator: should we
-            %% need to raise a `bad_generator` exception, it needs to fail in
-            %% the same manner as the generator itself.
-            cg_size_calc(Expr, L, Blk, CallLast, Annos, Count0, Acc0);
+            Expr = make_expr_tree(Result),
+            cg_size_calc(Expr, L, Blk, Count0, Acc0);
         {_,_} ->
             throw(not_possible)
     end.
@@ -140,7 +135,7 @@ ensure_not_match_context(#b_set{anno=Anno,args=[_|Args]}, ParamInfo) ->
 %%% and how many bits are appended to the writable binary.
 %%%
 
-call_size_func(#b_set{anno=Anno,op=call,args=[Name|Args],dst=Dst}, Bs) ->
+call_size_func(#b_set{op=call,args=[Name|Args],dst=Dst}, Bs) ->
     StMap = map_get(st_map, Bs),
     case StMap of
         #{Name := #opt_st{ssa=Linear,args=Params}} ->
@@ -186,28 +181,28 @@ call_size_func(#b_set{anno=Anno,op=call,args=[Name|Args],dst=Dst}, Bs) ->
                 #b_remote{mod=#b_literal{val=erlang},
                           name=#b_literal{val=error},
                           arity=1} ->
-                    capture_anno(Anno, Dst, Args, Bs#{Dst => exception});
+                    capture_generator(Dst, Args, Bs#{Dst => exception});
                 _ ->
                     Bs#{Dst => any}
             end
     end.
 
-capture_anno(Anno, Dst, [ErrorTerm], Bs) ->
+capture_generator(Dst, [ErrorTerm], Bs) ->
     case get_value(ErrorTerm, Bs) of
         {tuple,Elements} ->
             Ts = [get_value(E, Bs) || E <- Elements],
-            capture_anno_1(Anno, Dst, Ts, Bs);
+            capture_generator_1(Dst, Ts, Bs);
         _ ->
             Bs
     end.
 
-capture_anno_1(Anno, Dst, [{nil_or_bad,Generator}|_], Bs) ->
-    Bs#{Dst => {generator_anno,{Generator,Anno}}};
-capture_anno_1(Anno, Dst, [{arg,Generator}|_], Bs) ->
-    Bs#{Dst => {generator_anno,{Generator,Anno}}};
-capture_anno_1(Anno, Dst, [_|T], Bs) ->
-    capture_anno_1(Anno, Dst, T, Bs);
-capture_anno_1(_, _, [], Bs) ->
+capture_generator_1(Dst, [{nil_or_bad,_Generator}|_], Bs) ->
+    Bs#{Dst => generator};
+capture_generator_1(Dst, [{arg,_Generator}|_], Bs) ->
+    Bs#{Dst => generator};
+capture_generator_1(Dst, [_|T], Bs) ->
+    capture_generator_1(Dst, T, Bs);
+capture_generator_1(_, [], Bs) ->
     Bs.
 
 setup_call_bs([V|Vs], [A0|As], OldBs, NewBs) ->
@@ -239,8 +234,8 @@ calc_size([{L,#b_blk{is=Is,last=Last}}|Blks], Map0) ->
     end;
 calc_size([], Map) ->
     case sort(maps:values(Map)) of
-        [{call,_}=Call,{generator_anno,GenAnno}] ->
-            {Call,GenAnno};
+        [generator,{call,_}=Call] ->
+            Call;
         _ ->
             throw(not_possible)
     end.
@@ -251,8 +246,8 @@ get_ret(#b_ret{arg=Arg}, Bs) ->
             none;
         {writable,#b_literal{val=0}} ->
             none;
-        {generator_anno,_}=GenAnno ->
-            GenAnno;
+        generator ->
+            generator;
         Ret ->
             Ret
     end;
@@ -316,7 +311,7 @@ calc_size_instr(#b_set{op=bs_match,args=[_Type,Ctx,_Flags,
         none ->
             Bs#{Dst => any};
         Val ->
-            update_match(Dst, Ctx, {{safe,{bif,'*'}},[Val,Unit]}, Bs)
+            update_match(Dst, Ctx, {{bif,'*'},[Val,Unit]}, Bs)
     end;
 calc_size_instr(#b_set{op=bs_start_match,args=[#b_literal{val=new},Arg],dst=Dst}, Bs) ->
     case get_arg_value(Arg, Bs) of
@@ -428,30 +423,33 @@ join_bs_1([], _Bigger, Smaller) -> Smaller.
 %%% Turn the result of the traversal of the SSA code into an expression tree.
 %%%
 
-make_expr_tree({{call,Alloc0},GenAnno}) ->
-    {Alloc1,Annos} = make_expr_tree_list(Alloc0, none, none, [GenAnno]),
-    Alloc2 = opt_expr(Alloc1),
-    Alloc = round_up_to_byte_size(Alloc2),
-    {Alloc,maps:from_list(Annos)};
+make_expr_tree({call,Alloc0}) ->
+    Alloc1 = make_expr_tree_list(Alloc0, none, none),
+    Alloc = opt_expr(Alloc1),
+    round_up_to_byte_size(Alloc);
 make_expr_tree(_) ->
     throw(not_possible).
 
-make_expr_tree_list([{{call,List},GenAnno}|T], Match, none, Annos0) ->
-    {BuildSize,Annos} = make_expr_tree_list(List, none, none, [GenAnno|Annos0]),
-    make_expr_tree_list(T, Match, BuildSize, Annos);
-make_expr_tree_list([{match,NumItems,N}|T], none, BuildSize, Annos) ->
-    make_expr_tree_list(T, {NumItems,N}, BuildSize, Annos);
-make_expr_tree_list([{writable,BuildSize}|T], Match, none, Annos) ->
-    make_expr_tree_list(T, Match, BuildSize, Annos);
-make_expr_tree_list([_|T], Match, BuildSize, Annos) ->
-    make_expr_tree_list(T, Match, BuildSize, Annos);
-make_expr_tree_list([], Match, BuildSize, Annos)
+make_expr_tree_list([{call,List}|T], Match, none) ->
+    BuildSize = make_expr_tree_list(List, none, none),
+    make_expr_tree_list(T, Match, BuildSize);
+make_expr_tree_list([{match,NumItems,N}|T], none, BuildSize) ->
+    make_expr_tree_list(T, {NumItems,N}, BuildSize);
+make_expr_tree_list([{writable,BuildSize}|T], Match, none) ->
+    make_expr_tree_list(T, Match, BuildSize);
+make_expr_tree_list([_|T], Match, BuildSize) ->
+    make_expr_tree_list(T, Match, BuildSize);
+make_expr_tree_list([], Match, BuildSize)
   when Match =/= none, BuildSize =/= none ->
     {NumItems,N} = Match,
-    Expr = {{bif,'*'},[{{safe,{bif,'div'}},[NumItems,N]},BuildSize]},
-    {Expr,Annos};
-make_expr_tree_list([], _, _, Annos) ->
-    {none,Annos}.
+    case N of
+        #b_literal{val=0} ->
+            throw(not_possible);
+        _ ->
+            {{bif,'*'},[{{bif,'div'},[NumItems,N]},BuildSize]}
+    end;
+make_expr_tree_list([], _, _) ->
+    none.
 
 round_up_to_byte_size(Alloc0) ->
     Alloc = case divisible_by_eight(Alloc0) of
@@ -476,10 +474,7 @@ opt_expr({Op,Args0}) ->
         none ->
             opt_expr_1(Op, Args);
         LitArgs ->
-            Bif = case Op of
-                      {safe,{bif,Bif0}} -> Bif0;
-                      {bif,Bif0} -> Bif0
-                  end,
+            {bif,Bif} = Op,
             try apply(erlang, Bif, LitArgs) of
                 Result ->
                     #b_literal{val=Result}
@@ -490,15 +485,10 @@ opt_expr({Op,Args0}) ->
     end;
 opt_expr(none) -> none.
 
-opt_expr_1({safe,{bif,'div'}}=Op, Args) ->
-    case Args of
-        [Int,#b_literal{val=1}] ->
-            Int;
-        [_Int,#b_literal{val=N}] when N > 1 ->
-            opt_expr_1({bif,'div'}, Args);
-        [_,_] ->
-            {Op,Args}
-    end;
+opt_expr_1({bif,'div'}=Op, [_,#b_literal{val=0}]=Args) ->
+    {Op,Args};
+opt_expr_1({bif,'div'}, [Numerator,#b_literal{val=1}]) ->
+    Numerator;
 opt_expr_1({bif,'div'}=Op, [Numerator,#b_literal{val=Denominator}]=Args) ->
     try
         opt_expr_div(Numerator, Denominator)
@@ -516,10 +506,10 @@ opt_expr_1({bif,'div'}=Op, [Numerator,#b_literal{val=Denominator}]=Args) ->
                     {Op,Args}
             end
     end;
-opt_expr_1({bif,'*'}, [{{safe,_},_},#b_literal{val=0}=Zero]) ->
-    Zero;
 opt_expr_1({bif,'*'}, [Factor,#b_literal{val=1}]) ->
     Factor;
+opt_expr_1({bif,'+'}, [#b_literal{val=0},Term]) ->
+    Term;
 opt_expr_1(Op, Args) ->
     {Op,Args}.
 
@@ -551,87 +541,41 @@ literal_expr_args([], Acc) ->
 
 %%%
 %%% Given an expression tree, generate SSA code to calculate the number
-%%% bytes to allocate for the writable binary.
+%%% of bytes to allocate for the writable binary.
 %%%
 
-cg_size_calc(Expr, L, #b_blk{is=Is0}=Blk0, CallLast, Annos, Count0, Acc0) ->
-    [InitWr] = Is0,
-    FailBlk0 = [],
-    {Acc1,Alloc,NextBlk,FailBlk,Count} = cg_size_calc_1(L, Expr, Annos,
-                                                        CallLast, FailBlk0,
-                                                        Count0, Acc0),
-    Is = [InitWr#b_set{args=[Alloc]}],
-    Blk = Blk0#b_blk{is=Is},
-    Acc = [{NextBlk,Blk}|FailBlk++Acc1],
-    {Acc,Count}.
+cg_size_calc(Expr, L, #b_blk{}=Blk0, Count0, Acc0) ->
+    {[Fail,PhiL,InitWrL],Count1} = new_blocks(3, Count0),
+    {PhiDst,Count2} = new_var('@ssa_tmp', Count1),
+    {Acc1,Alloc,NextBlk,Count} = cg_size_calc_1(L, Expr, Fail, Count2, Acc0),
 
-cg_size_calc_1(L, #b_literal{}=Alloc, _Annos, _CallLast, FailBlk, Count, Acc) ->
-    {Acc,Alloc,L,FailBlk,Count};
-cg_size_calc_1(L0, {Op0,Args0}, Annos, CallLast, FailBlk0, Count0, Acc0) ->
-    {Args,Acc1,L,FailBlk1,Count1} = cg_atomic_args(Args0, L0, Annos, CallLast,
-                                                   FailBlk0, Count0, Acc0, []),
-    {BadGenL,FailBlk,Count2} = cg_bad_generator(Args, Annos, CallLast,
-                                                FailBlk1, Count1),
-    {Dst,Count3} = new_var('@ssa_tmp', Count2),
-    case Op0 of
-        {safe,Op} ->
-            {OpDst,Count4} = new_var('@ssa_size', Count3),
-            {[OpSuccL,OpFailL,PhiL,NextL],Count5} = new_blocks(4, Count4),
-            I = #b_set{op=Op,args=Args,dst=OpDst},
-            {Blk,Count} = cg_succeeded(I, OpSuccL, OpFailL, Count5),
-            JumpBlk = #b_blk{is=[],last=cg_br(PhiL)},
-            PhiIs = [#b_set{op=phi,
-                            args=[{OpDst,OpSuccL},{#b_literal{val=0},OpFailL}],
-                            dst=Dst}],
-            PhiBlk = #b_blk{is=PhiIs,last=cg_br(NextL)},
-            Acc = [{PhiL,PhiBlk},{OpSuccL,JumpBlk},
-                   {OpFailL,JumpBlk},{L,Blk}|Acc1],
-            {Acc,Dst,NextL,FailBlk,Count};
-        _ ->
-            {NextBlkL,Count4} = new_block(Count3),
-            I = #b_set{op=Op0,args=Args,dst=Dst},
-            {SuccBlk,Count} = cg_succeeded(I, NextBlkL, BadGenL, Count4),
-            Acc = [{L,SuccBlk}|Acc1],
-            {Acc,Dst,NextBlkL,FailBlk,Count}
-    end.
+    BrPhiBlk = #b_blk{is=[],last=cg_br(PhiL)},
 
-cg_bad_generator([Arg|_], Annos, CallLast, FailBlk, Count) ->
-    case Annos of
-        #{Arg := Anno} ->
-            cg_bad_generator_1(Anno, Arg, CallLast, FailBlk, Count);
-        #{} ->
-            case FailBlk of
-                [{L,_}|_] ->
-                    {L,FailBlk,Count};
-                [] ->
-                    cg_bad_generator_1(#{}, Arg, CallLast, FailBlk, Count)
-            end
-    end.
+    PhiArgs = [{Alloc,NextBlk},
+               {#b_literal{val=256},Fail}],
+    PhiIs = [#b_set{op=phi,dst=PhiDst,args=PhiArgs}],
+    PhiBlk = #b_blk{is=PhiIs,last=cg_br(InitWrL)},
 
-cg_bad_generator_1(Anno, Arg, CallLast, FailBlk, Count0) ->
-    {L,Count1} = new_block(Count0),
-    {TupleDst,Count2} = new_var('@ssa_tuple', Count1),
-    {SuccDst,Count3} = new_var('@ssa_bool', Count2),
-    {Ret,Count4} = new_var('@ssa_ret', Count3),
-    MFA = #b_remote{mod=#b_literal{val=erlang},
-                    name=#b_literal{val=error},
-                    arity=1},
-    TupleI = #b_set{op=put_tuple,
-                    args=[#b_literal{val=bad_generator},Arg],
-                    dst=TupleDst},
-    CallI = #b_set{anno=Anno,op=call,args=[MFA,TupleDst],dst=Ret},
-    SuccI = #b_set{op={succeeded,body},args=[Ret],dst=SuccDst},
-    Is = [TupleI,CallI,SuccI],
+    #b_blk{is=[InitWr]} = Blk0,
+    Is = [InitWr#b_set{args=[PhiDst]}],
+    Blk = Blk0#b_blk{is=Is},
 
-    %% The `bad_generator` call must refer to the same fail label (either a
-    %% landing pad or ?EXCEPTION_BLOCK) as the caller, or else we'll break
-    %% optimizations that assume exceptions are always reflected in the control
-    %% flow.
-    #b_br{fail=FailLbl} = CallLast,             %Assertion.
-    Last = #b_br{bool=SuccDst,succ=FailLbl,fail=FailLbl},
+    Acc = [{InitWrL,Blk},
+           {PhiL,PhiBlk},
+           {NextBlk,BrPhiBlk},
+           {Fail,BrPhiBlk}|Acc1],
+    {Acc,Count}.
 
-    Blk = #b_blk{is=Is,last=Last},
-    {L,[{L,Blk}|FailBlk],Count4}.
+cg_size_calc_1(L, #b_literal{}=Alloc, _Fail, Count, Acc) ->
+    {Acc,Alloc,L,Count};
+cg_size_calc_1(L0, {Op,Args0}, Fail, Count0, Acc0) ->
+    {Args,Acc1,L,Count1} = cg_atomic_args(Args0, L0, Fail, Count0, Acc0, []),
+    {Dst,Count3} = new_var('@ssa_tmp', Count1),
+    {NextBlkL,Count4} = new_block(Count3),
+    I = #b_set{op=Op,args=Args,dst=Dst},
+    {SuccBlk,Count} = cg_succeeded(I, NextBlkL, Fail, Count4),
+    Acc = [{L,SuccBlk}|Acc1],
+    {Acc,Dst,NextBlkL,Count}.
 
 cg_succeeded(#b_set{dst=OpDst}=I, Succ, Fail, Count0) ->
     {Bool,Count} = new_var('@ssa_bool', Count0),
@@ -642,25 +586,21 @@ cg_succeeded(#b_set{dst=OpDst}=I, Succ, Fail, Count0) ->
 cg_br(Target) ->
     #b_br{bool=#b_literal{val=true},succ=Target,fail=Target}.
 
-cg_atomic_args([A|As], L, Annos, CallLast, FailBlk0, Count0, BlkAcc0, Acc) ->
+cg_atomic_args([A|As], L, Fail, Count0, BlkAcc0, Acc) ->
     case A of
         #b_literal{} ->
-            cg_atomic_args(As, L, Annos, CallLast, FailBlk0,
-                           Count0, BlkAcc0, [A|Acc]);
+            cg_atomic_args(As, L, Fail, Count0, BlkAcc0, [A|Acc]);
         #b_var{} ->
-            cg_atomic_args(As, L, Annos, CallLast, FailBlk0,
-                           Count0, BlkAcc0, [A|Acc]);
+            cg_atomic_args(As, L, Fail, Count0, BlkAcc0, [A|Acc]);
         none ->
             throw(not_possible);
         _ ->
-            {BlkAcc,Var,NextBlk,FailBlk,Count} =
-                cg_size_calc_1(L, A, Annos, CallLast, FailBlk0,
-                               Count0, BlkAcc0),
-            cg_atomic_args(As, NextBlk, Annos, CallLast, FailBlk,
-                               Count, BlkAcc, [Var|Acc])
+            {BlkAcc,Var,NextBlk,Count} =
+                cg_size_calc_1(L, A, Fail, Count0, BlkAcc0),
+            cg_atomic_args(As, NextBlk, Fail, Count, BlkAcc, [Var|Acc])
     end;
-cg_atomic_args([], NextBlk, _Annos, _CallLast, FailBlk, Count, BlkAcc, Acc) ->
-    {reverse(Acc),BlkAcc,NextBlk,FailBlk,Count}.
+cg_atomic_args([], NextBlk, _Fail, Count, BlkAcc, Acc) ->
+    {reverse(Acc),BlkAcc,NextBlk,Count}.
 
 new_var(Base, Count) ->
     {#b_var{name={Base,Count}},Count+1}.
diff --git a/lib/compiler/test/bs_bincomp_SUITE.erl b/lib/compiler/test/bs_bincomp_SUITE.erl
index 0a03aa9400..41877a9e84 100644
--- a/lib/compiler/test/bs_bincomp_SUITE.erl
+++ b/lib/compiler/test/bs_bincomp_SUITE.erl
@@ -389,6 +389,14 @@ nomatch(Config) when is_list(Config) ->
 
     <<>> = << <<>> || <<_:8>> <= <<>> >>,
 
+    %% GH-7494. Qualifiers should be evaluated from left to right. The
+    %% second (failing) generator should never be evaluated because the
+    %% first generator is an empty list.
+    <<>> = id(<< <<C:8>> || C <- [], _ <- ok >>),
+    <<>> = id(<<0 || _ <- [], _ <- ok, false>>),
+
+    {'EXIT',{{bad_generator,false},_}} = catch << [] || <<0:0>> <= false >>,
+
     ok.
 
 nomatch_1(Bin, Size) ->
-- 
2.35.3
Places

File 0428-Correct-evaluation-order-for-binary-comprehensions.patch of Package erlang

Places