File 2661-Improve-pattern-matching-of-binary-segments.patch of Package erlang
From 7318e9cbea60f894f2f3f9a56bf2b8dce19135f4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Mon, 15 Sep 2025 08:34:31 +0200
Subject: [PATCH] Improve pattern matching of binary segments
In the following example, the compiler would not understand
that the second clause could never succeed:
decode(<<X:1/signed-unit:64>>) ->
{int, X};
decode(<<X:1/unsigned-float-unit:64>>) ->
{float, X}.
In the following example:
count_newlines(<<$\n,B/binary>>, Count) ->
count_newlines(B, Count + 1);
count_newlines(<<_:1/unit:8,B/binary>>, Count) ->
count_newlines(B, Count);
count_newlines(<<>>, Count) ->
Count.
the size of the first segment is given in two different ways:
in the first clause it is given as size 8 and unit 1, and in
the second clause it is given as size 1 and unit 8.
The compiler would not understand that the size of segments were the
same and would generate suboptimal code for the second clause.
This commit improves the pattern matching compilation of binary
segments, which results in better code for the `count_newlines/2`
function, and the following warning for the `decode/1` function:
t.erl:7:1: Warning: this clause cannot match because a previous clause at line 5 matches the same pattern as this clause
% 7| decode(<<X:1/unsigned-float-unit:64>>) ->
% | ^
---
lib/compiler/src/beam_core_to_ssa.erl | 77 ++++++++++++++++++++++++++-
lib/compiler/test/warnings_SUITE.erl | 10 +++-
2 files changed, 84 insertions(+), 3 deletions(-)
diff --git a/lib/compiler/src/beam_core_to_ssa.erl b/lib/compiler/src/beam_core_to_ssa.erl
index d96cfda5c6..45ef4b591b 100644
--- a/lib/compiler/src/beam_core_to_ssa.erl
+++ b/lib/compiler/src/beam_core_to_ssa.erl
@@ -1658,11 +1658,82 @@ group_values(Cs) ->
maps:groups_from_list(F, Cs).
group_keeping_order(Us, [C1|Cs]) ->
- V1 = clause_val(C1),
- {More,Rest} = splitwith(fun (C) -> clause_val(C) =:= V1 end, Cs),
+ SplitFun = group_keeping_order_fun(C1),
+ {More,Rest} = splitwith(SplitFun, Cs),
[{Us,[C1|More]}|group_keeping_order(Us, Rest)];
group_keeping_order(_, []) -> [].
+group_keeping_order_fun(C1) ->
+ case is_suitable_bin_seg(C1) of
+ true ->
+ %% The `C1` clause can only fail if the binary pattern
+ %% fails to match.
+ V1 = bin_seg_val(C1),
+ fun(C) ->
+ case {V1,bin_seg_val(C)} of
+ {{S,U,T,F,_}, {S,U,T,F,_}} ->
+ %% Identical segments.
+ true;
+ {{S,U,binary,_,_}, {S,U,binary,_,_}} ->
+ %% Identical binary segments.
+ true;
+ {{S,U,T1,_,Next}, {S,U,_T2,_,Next}}
+ when T1 =:= integer; T1 =:= binary ->
+ %% The patterns in clauses `C1` and `C`
+ %% match the same number of bits, meaning
+ %% that clause `C` clause will not be
+ %% reached if clause `C1` succeeds.
+ true;
+ {_, _} ->
+ false
+ end
+ end;
+ false ->
+ %% Handle map or "unsuitable" binary segment.
+ V1 = clause_val(C1),
+ fun(C) ->
+ V1 =:= clause_val(C)
+ end
+ end.
+
+%% is_suitable_bin_seg(iclause()) -> boolean().
+%% Return `true` if this clause has a pattern that matches a binary
+%% segment having no other patterns and having a `true` guard. In
+%% order words, it returns `true` when the only reason a clause can
+%% fail is that the binary pattern doesn't match.
+%%
+%% For example, this functions returns `true` when given the
+%% following clause:
+%%
+%% foo(<<X:64,integer>>, A) -> ...
+%%
+%% It returns `false` when given one of these clauses:
+%%
+%% bar(<<X:64,integer>>, true) -> ...
+%%
+%% baz(<<X:64,integer>>, A) when X < 10 -> ...
+%%
+is_suitable_bin_seg(#iclause{pats=[#cg_bin_seg{}|Ps],guard=G}) ->
+ is_true_guard(G) andalso
+ all(fun(#c_var{}) -> true;
+ (_) -> false
+ end, Ps);
+is_suitable_bin_seg(#iclause{}) ->
+ false.
+
+%% Similar to arg_val/2 for #cg_bin_seg{}, except that we also
+%% include the pattern for the next segment.
+bin_seg_val(#iclause{pats=[#cg_bin_seg{next=Next}=Arg|_]}=C) ->
+ {S,U,T,F} = arg_val(Arg, C),
+ case Next of
+ #cg_bin_end{} ->
+ {S,U,T,F,cg_bin_end};
+ #cg_bin_seg{size=#b_literal{val=all},unit=Unit,next=#cg_bin_end{}} ->
+ {S,U,T,F,{cg_bin_all,Unit}};
+ _ ->
+ {S,U,T,F,Next}
+ end.
+
%% match_clause([Var], [Clause], Default, State) -> {Clause,State}.
%% At this point all the clauses have the same "value". Build one
%% select clause for this value and continue matching. Rename
@@ -1951,6 +2022,8 @@ arg_val(Arg, C) ->
#b_var{name=V} ->
#iclause{sub=Sub} = C,
{#b_var{name=get_vsub(V, Sub)},U,T,Fs};
+ #b_literal{val=Sz} when is_integer(Sz), U > 1 ->
+ {#b_literal{val=Sz*U},1,T,Fs};
#b_literal{} ->
{S,U,T,Fs}
end;
diff --git a/lib/compiler/test/warnings_SUITE.erl b/lib/compiler/test/warnings_SUITE.erl
index 94cfc9efec..2c4766b0e6 100644
--- a/lib/compiler/test/warnings_SUITE.erl
+++ b/lib/compiler/test/warnings_SUITE.erl
@@ -1088,6 +1088,12 @@ bit_syntax(Config) ->
end.
d(<<16#110000/utf8>>) -> error;
d(_) -> ok.
+ e(<<X:1/big-signed-unit:64>>) -> {int, X};
+ e(<<X:1/big-unsigned-float-unit:64>>) -> {float, X}.
+ f(<<X:1/big-unsigned-float-unit:64>>) -> {float, X};
+ f(<<X:1/big-signed-unit:64>>) -> {int, X}.
+ g(<<X:4/signed-binary>>) -> X;
+ g(<<X:1/unsigned-binary-unit:32>>) -> X.
">>,
[],
{warnings,[{{2,15},sys_core_fold,{nomatch,no_clause}},
@@ -1107,7 +1113,9 @@ bit_syntax(Config) ->
{{12,37},sys_core_fold,{nomatch,{bit_syntax_size,bad}}},
{{15,21},sys_core_fold,{nomatch,{bit_syntax_unsigned,-42}}},
{{17,21},sys_core_fold,{nomatch,{bit_syntax_type,42,binary}}},
- {{19,19},sys_core_fold,{nomatch,{bit_syntax_unicode,1114112}}}
+ {{19,19},sys_core_fold,{nomatch,{bit_syntax_unicode,1114112}}},
+ {{22,15},beam_core_to_ssa,{nomatch,{shadow,21}}},
+ {{26,15},beam_core_to_ssa,{nomatch,{shadow,25}}}
]}
}],
run(Config, Ts),
--
2.51.0