File 4171-Fix-bug-string-slice-3-on-bad-input.patch of Package erlang

From c53cb8cf9b446104d50f65a855684b4fe02c011e Mon Sep 17 00:00:00 2001
From: Dan Gudmundsson <dgud@erlang.org>
Date: Tue, 30 Apr 2019 12:39:24 +0200
Subject: [PATCH] Fix bug string:slice/3 on bad input

Fixed bug in slice which wrongly could return <<>> for non-utf8 binary input.

Also give a better error reason when non-utf8 binaries are given as
input to some functions.
---
 lib/stdlib/src/string.erl                  | 78 ++++++++++++++++++++++--------
 lib/stdlib/test/string_SUITE.erl           | 63 +++++++++++++++++++++++-
 lib/stdlib/uc_spec/gen_unicode_mod.escript | 12 +++--
 3 files changed, 127 insertions(+), 26 deletions(-)

diff --git a/lib/stdlib/src/string.erl b/lib/stdlib/src/string.erl
index 1f8bdc5432..a418754caf 100644
--- a/lib/stdlib/src/string.erl
+++ b/lib/stdlib/src/string.erl
@@ -128,7 +128,8 @@ length(CD) ->
 to_graphemes(CD0) ->
     case unicode_util:gc(CD0) of
         [GC|CD] -> [GC|to_graphemes(CD)];
-        [] -> []
+        [] -> [];
+        {error, Err} -> error({badarg, Err})
     end.
 
 %% Compare two strings return boolean, assumes that the input are
@@ -332,7 +333,10 @@ uppercase(<<CP1/utf8, Rest/binary>>=Orig) ->
     catch unchanged -> Orig
     end;
 uppercase(<<>>) ->
-    <<>>.
+    <<>>;
+uppercase(Bin) ->
+    error({badarg, Bin}).
+
 
 %% Lowercase all chars in Str
 -spec lowercase(String::unicode:chardata()) -> unicode:chardata().
@@ -346,7 +350,10 @@ lowercase(<<CP1/utf8, Rest/binary>>=Orig) ->
     catch unchanged -> Orig
     end;
 lowercase(<<>>) ->
-    <<>>.
+    <<>>;
+lowercase(Bin) ->
+    error({badarg, Bin}).
+
 
 %% Make a titlecase of the first char in Str
 -spec titlecase(String::unicode:chardata()) -> unicode:chardata().
@@ -375,7 +382,9 @@ casefold(<<CP1/utf8, Rest/binary>>=Orig) ->
     catch unchanged -> Orig
     end;
 casefold(<<>>) ->
-    <<>>.
+    <<>>;
+casefold(Bin) ->
+    error({badarg, Bin}).
 
 -spec to_integer(String) -> {Int, Rest} | {'error', Reason} when
       String :: unicode:chardata(),
@@ -544,7 +553,8 @@ length_1([CP1|[CP2|_]=Cont], N) when ?ASCII_LIST(CP1,CP2) ->
 length_1(Str, N) ->
     case unicode_util:gc(Str) of
         [] -> N;
-        [_|Rest] -> length_1(Rest, N+1)
+        [_|Rest] -> length_1(Rest, N+1);
+        {error, Err} -> error({badarg, Err})
     end.
 
 length_b(<<CP2/utf8, Rest/binary>>, CP1, N)
@@ -554,7 +564,8 @@ length_b(Bin0, CP1, N) ->
     [_|Bin1] = unicode_util:gc([CP1|Bin0]),
     case unicode_util:cp(Bin1) of
         [] -> N+1;
-        [CP3|Bin] -> length_b(Bin, CP3, N+1)
+        [CP3|Bin] -> length_b(Bin, CP3, N+1);
+        {error, Err} -> error({badarg, Err})
     end.
 
 equal_1([A|AR], [B|BR]) when is_integer(A), is_integer(B) ->
@@ -599,7 +610,8 @@ reverse_1([CP1|[CP2|_]=Cont], Acc) when ?ASCII_LIST(CP1,CP2) ->
 reverse_1(CD, Acc) ->
     case unicode_util:gc(CD) of
         [GC|Rest] -> reverse_1(Rest, [GC|Acc]);
-        [] -> Acc
+        [] -> Acc;
+        {error, Err} -> error({badarg, Err})
     end.
 
 reverse_b(<<CP2/utf8, Rest/binary>>, CP1, Acc)
@@ -609,7 +621,8 @@ reverse_b(Bin0, CP1, Acc) ->
     [GC|Bin1] = unicode_util:gc([CP1|Bin0]),
     case unicode_util:cp(Bin1) of
         [] -> [GC|Acc];
-        [CP3|Bin] -> reverse_b(Bin, CP3, [GC|Acc])
+        [CP3|Bin] -> reverse_b(Bin, CP3, [GC|Acc]);
+        {error, Err} -> error({badarg, Err})
     end.
 
 slice_l0(<<CP1/utf8, Bin/binary>>, N) when N > 0 ->
@@ -622,7 +635,8 @@ slice_l([CP1|[CP2|_]=Cont], N) when ?ASCII_LIST(CP1,CP2),N > 0 ->
 slice_l(CD, N) when N > 0 ->
     case unicode_util:gc(CD) of
         [_|Cont] -> slice_l(Cont, N-1);
-        [] -> []
+        [] -> [];
+        {error, Err} -> error({badarg, Err})
     end;
 slice_l(Cont, 0) ->
     Cont.
@@ -634,7 +648,8 @@ slice_lb(Bin, CP1, N) ->
     if N > 1 ->
             case unicode_util:cp(Rest) of
                 [CP2|Cont] -> slice_lb(Cont, CP2, N-1);
-                [] -> <<>>
+                [] -> <<>>;
+                {error, Err} -> error({badarg, Err})
             end;
        N =:= 1 ->
             Rest
@@ -647,7 +662,10 @@ slice_trail(Orig, N) when is_binary(Orig) ->
             Sz = byte_size(Orig) - Length,
             <<Keep:Sz/binary, _/binary>> = Orig,
             Keep;
-        _ -> <<>>
+        <<_, _/binary>> when N > 0 ->
+            error({badarg, Orig});
+        _ ->
+            <<>>
     end;
 slice_trail(CD, N) when is_list(CD) ->
     slice_list(CD, N).
@@ -657,7 +675,8 @@ slice_list([CP1|[CP2|_]=Cont], N) when ?ASCII_LIST(CP1,CP2),N > 0 ->
 slice_list(CD, N) when N > 0 ->
     case unicode_util:gc(CD) of
         [GC|Cont] -> append(GC, slice_list(Cont, N-1));
-        [] -> []
+        [] -> [];
+        {error, Err} -> error({badarg, Err})
     end;
 slice_list(_, 0) ->
     [].
@@ -668,7 +687,8 @@ slice_bin(CD, CP1, N) when N > 0 ->
     [_|Bin] = unicode_util:gc([CP1|CD]),
     case unicode_util:cp(Bin) of
         [CP2|Cont] -> slice_bin(Cont, CP2, N-1);
-        [] -> 0
+        [] -> 0;
+        {error, Err} -> error({badarg, Err})
     end;
 slice_bin(CD, CP1, 0) ->
     byte_size(CD)+byte_size(<<CP1/utf8>>).
@@ -703,14 +723,18 @@ uppercase_bin(CP1, Bin, Changed) ->
                 [] when Changed ->
                     [CP1];
                 [] ->
-                    throw(unchanged)
+                    throw(unchanged);
+                {error, Err} ->
+                    error({badarg, Err})
             end;
         [Char|CPs] ->
             case unicode_util:cp(CPs) of
                 [Next|Rest] ->
                     [Char|uppercase_bin(Next, Rest, true)];
                 [] ->
-                    [Char]
+                    [Char];
+                {error, Err} ->
+                    error({badarg, Err})
             end
     end.
 
@@ -744,14 +768,18 @@ lowercase_bin(CP1, Bin, Changed) ->
                 [] when Changed ->
                     [CP1];
                 [] ->
-                    throw(unchanged)
+                    throw(unchanged);
+                {error, Err} ->
+                    error({badarg, Err})
             end;
         [Char|CPs] ->
             case unicode_util:cp(CPs) of
                 [Next|Rest] ->
                     [Char|lowercase_bin(Next, Rest, true)];
                 [] ->
-                    [Char]
+                    [Char];
+                {error, Err} ->
+                    error({badarg, Err})
             end
     end.
 
@@ -785,14 +813,18 @@ casefold_bin(CP1, Bin, Changed) ->
                 [] when Changed ->
                     [CP1];
                 [] ->
-                    throw(unchanged)
+                    throw(unchanged);
+                {error, Err} ->
+                    error({badarg, Err})
             end;
         [Char|CPs] ->
             case unicode_util:cp(CPs) of
                 [Next|Rest] ->
                     [Char|casefold_bin(Next, Rest, true)];
                 [] ->
-                    [Char]
+                    [Char];
+                {error, Err} ->
+                    error({badarg, Err})
             end
     end.
 
@@ -1634,7 +1666,9 @@ bin_search_inv_1(<<CP1/utf8, BinRest/binary>>=Bin0, Cont, Sep) ->
 bin_search_inv_1(<<>>, Cont, _Sep) ->
     {nomatch, Cont};
 bin_search_inv_1([], Cont, _Sep) ->
-    {nomatch, Cont}.
+    {nomatch, Cont};
+bin_search_inv_1(Bin, _, _) ->
+    error({badarg, Bin}).
 
 
 bin_search_inv_n(<<CP1/utf8, BinRest/binary>>=Bin0, Cont, Seps) ->
@@ -1666,7 +1700,9 @@ bin_search_inv_n(<<CP1/utf8, BinRest/binary>>=Bin0, Cont, Seps) ->
 bin_search_inv_n(<<>>, Cont, _Sep) ->
     {nomatch, Cont};
 bin_search_inv_n([], Cont, _Sep) ->
-    {nomatch, Cont}.
+    {nomatch, Cont};
+bin_search_inv_n(Bin, _, _) ->
+    error({badarg, Bin}).
 
 bin_search_str(Bin0, Start, [], SearchCPs) ->
     Compiled = binary:compile_pattern(unicode:characters_to_binary(SearchCPs)),
diff --git a/lib/stdlib/test/string_SUITE.erl b/lib/stdlib/test/string_SUITE.erl
index 248912c3f2..c9aadd7f10 100644
--- a/lib/stdlib/test/string_SUITE.erl
+++ b/lib/stdlib/test/string_SUITE.erl
@@ -103,6 +103,15 @@ debug() ->
         test(?LINE,?FUNCTION_NAME,B,C,D, false),
         test(?LINE,?FUNCTION_NAME,hd(C),[B|tl(C)],D, false)).
 
+-define(TRY(Exp),
+        fun() ->
+                try Exp
+                catch _E:Reason:_ST ->
+                        %% io:format("~p:~w: ~p: ~.0p ~p~n",
+                        %%           [?FUNCTION_NAME, ?LINE,_E,Reason, hd(_ST)]),
+                        {'EXIT', Reason}
+                end
+        end()).
 
 is_empty(_) ->
     ?TEST("", [], true),
@@ -126,6 +135,10 @@ length(_) ->
     ?TEST(["abc"|<<"abc">>], [], 6),
     ?TEST(["abc",["def"]], [], 6),
     ?TEST([<<97/utf8, 778/utf8, 98/utf8>>, [776,111,776]], [], 3), %% åäö in nfd
+
+    InvalidUTF8 = <<192,192>>,
+    {'EXIT', {badarg, _}} = ?TRY(string:length(InvalidUTF8)),
+    {'EXIT', {badarg, _}} = ?TRY(string:length(<<$a, InvalidUTF8/binary, $z>>)),
     ok.
 
 equal(_) ->
@@ -226,6 +239,8 @@ to_graphemes(_) ->
     true = erlang:length(GCs) =:= erlang:length(string:to_graphemes(NFD)),
     true = erlang:length(GCs) =:=
         erlang:length(string:to_graphemes(unicode:characters_to_nfc_list(String))),
+
+    {'EXIT', {badarg, _}} = ?TRY(string:to_graphemes(<<$a,192,192,$z>>)),
     ok.
 
 reverse(_) ->
@@ -238,6 +253,11 @@ reverse(_) ->
     ?TEST(Str2, [], lists:reverse(Str2)),
     ?TEST(Str3, [], lists:reverse(Str3)),
     true = string:reverse(Str3) =:= lists:reverse(string:to_graphemes(Str3)),
+
+    InvalidUTF8 = <<192,192>>,
+    {'EXIT', {badarg, _}} = ?TRY(string:reverse(InvalidUTF8)),
+    {'EXIT', {badarg, _}} = ?TRY(string:reverse(<<$a, InvalidUTF8/binary, $z>>)),
+
     ok.
 
 slice(_) ->
@@ -258,6 +278,14 @@ slice(_) ->
     ?TEST([<<"aå"/utf8>>,"äöbcd"], [3,3], "öbc"),
     ?TEST([<<"aåä"/utf8>>,"öbcd"], [3,10], "öbcd"),
 
+    InvalidUTF8 = <<192,192>>,
+    [$b, $c|InvalidUTF8] = string:slice(["abc", InvalidUTF8], 1),
+    InvalidUTF8 = string:slice(["abc", InvalidUTF8], 3),
+    {'EXIT', {badarg, _}} = ?TRY(string:slice(["abc", InvalidUTF8], 1, 5)),
+    BadUtf8 = <<$a, InvalidUTF8/binary, "teststring">>,
+    {'EXIT', {badarg, _}} = ?TRY(string:slice(BadUtf8, 2)),
+    {'EXIT', {badarg, _}} = ?TRY(string:slice(BadUtf8, 1, 5)),
+    {'EXIT', {badarg, _}} = ?TRY(string:slice(BadUtf8, 0, 5)),
     ok.
 
 pad(_) ->
@@ -270,6 +298,10 @@ pad(_) ->
     ?TEST(Str, [10, trailing, $.], "Hallå....."),
     ?TEST(Str++["f"], [10, trailing, $.], "Hallåf...."),
     ?TEST(Str++[" flåwer"], [10, trailing, $.], "Hallå flåwer"),
+
+    InvalidUTF8 = <<192,192>>,
+    {'EXIT', {badarg, _}} = ?TRY(string:pad(InvalidUTF8, 10, both, $.)),
+    {'EXIT', {badarg, _}} = ?TRY(string:pad(<<$a, InvalidUTF8/binary, $z>>, 10, both, $.)),
     ok.
 
 trim(_) ->
@@ -300,6 +332,11 @@ trim(_) ->
     ?TEST([[<<"!v">>|<<204,128,$v,204,129>>]],[trailing, [[$v,769]]], [$!,$v,768]),
     ?TEST([[[<<"v">>|<<204,129,118,204,128,118>>],769,118,769]], [trailing, [[118,769]]], [$v,769,$v,768]),
     ?TEST([<<"vv">>|<<204,128,118,204,128>>], [trailing, [[118,768]]], "v"),
+
+    InvalidUTF8 = <<192,192>>,
+    {'EXIT', {badarg, _}} = ?TRY(string:trim(InvalidUTF8, both, "az")),
+    %% Not checked  (using binary search)
+    %% {'EXIT', {badarg, _}} = ?TRY(string:trim(<<$a, $b, InvalidUTF8/binary, $z>>, both, "az")),
     ok.
 
 chomp(_) ->
@@ -400,6 +437,13 @@ take(_) ->
     ?TEST([<<"e">>,778,"åäöe", <<778/utf8>>, $e, 779], [[[$e,778]], true, trailing],
           {[$e,778]++"åäöe"++[778], [$e,779]}),
 
+    InvalidUTF8 = <<192,192>>,
+    {'EXIT', {badarg, _}} = ?TRY(string:take(InvalidUTF8, [$.], false, leading)),
+    %% Not checked  (using binary search)
+    %% {'EXIT', {badarg, _}} = ?TRY(string:take(InvalidUTF8, [$.], true, leading)),
+    %% {'EXIT', {badarg, _}} = ?TRY(string:take(InvalidUTF8, [$.], false, trailing)),
+    {'EXIT', {badarg, _}} = ?TRY(string:take(InvalidUTF8, [$.], true, trailing)),
+
     ok.
 
 
@@ -416,6 +460,11 @@ uppercase(_) ->
     ?TEST("ljLJ", [], "LJLJ"),
     ?TEST("LJlj", [], "LJLJ"),
     ?TEST("ß sharp s", [], "SS SHARP S"),
+
+    InvalidUTF8 = <<192,192>>,
+    {'EXIT', {badarg, _}} = ?TRY(string:uppercase(InvalidUTF8)),
+    {'EXIT', {badarg, _}} = ?TRY(string:uppercase(<<$a, InvalidUTF8/binary, $z>>)),
+
     ok.
 
 lowercase(_) ->
@@ -429,6 +478,10 @@ lowercase(_) ->
     ?TEST(["Mic",<<"HAŁ"/utf8>>], [], "michał"),
     ?TEST("ß SHARP S", [], "ß sharp s"),
     ?TEST("İ I WITH DOT ABOVE", [], "i̇ i with dot above"),
+
+    InvalidUTF8 = <<192,192>>,
+    {'EXIT', {badarg, _}} = ?TRY(string:lowercase(InvalidUTF8)),
+    {'EXIT', {badarg, _}} = ?TRY(string:lowercase(<<$a, InvalidUTF8/binary, $z>>)),
     ok.
 
 titlecase(_) ->
@@ -442,6 +495,10 @@ titlecase(_) ->
     ?TEST("ljLJ", [], "LjLJ"),
     ?TEST("LJlj", [], "Ljlj"),
     ?TEST("ß sharp s", [], "Ss sharp s"),
+
+    InvalidUTF8 = <<192,192>>,
+    {'EXIT', {badarg, _}} = ?TRY(string:titlecase(InvalidUTF8)),
+    <<$A, _/binary>> = ?TRY(string:titlecase(<<$a, InvalidUTF8/binary, $z>>)),
     ok.
 
 casefold(_) ->
@@ -456,6 +513,10 @@ casefold(_) ->
     ?TEST("ß SHARP S", [], "ss sharp s"),
     ?TEST("ẞ SHARP S", [], "ss sharp s"),
     ?TEST("İ I WITH DOT ABOVE", [], "i̇ i with dot above"),
+
+    InvalidUTF8 = <<192,192>>,
+    {'EXIT', {badarg, _}} = ?TRY(string:casefold(InvalidUTF8)),
+    {'EXIT', {badarg, _}} = ?TRY(string:casefold(<<$a, InvalidUTF8/binary, $z>>)),
     ok.
 
 
@@ -740,7 +801,7 @@ meas(Config) ->
         _ -> % No scaling, run at most 2 mins
             Tester = spawn(Exec),
             receive {test_done, Tester} -> ok
-            after 120000 ->
+            after 118000 ->
                     io:format("Timelimit reached stopping~n",[]),
                     exit(Tester, die)
             end,
diff --git a/lib/stdlib/uc_spec/gen_unicode_mod.escript b/lib/stdlib/uc_spec/gen_unicode_mod.escript
index 8636c69a0d..f560444619 100644
--- a/lib/stdlib/uc_spec/gen_unicode_mod.escript
+++ b/lib/stdlib/uc_spec/gen_unicode_mod.escript
@@ -202,7 +202,8 @@ gen_static(Fd) ->
     io:put_chars(Fd, "                {Upper,_} -> [Upper|Str];\n"),
     io:put_chars(Fd, "                {Upper,_,_,_} -> [Upper|Str]\n"),
     io:put_chars(Fd, "            end;\n"),
-    io:put_chars(Fd, "        [] -> []\n"),
+    io:put_chars(Fd, "        [] -> [];\n"),
+    io:put_chars(Fd, "        {error,Err} -> error({badarg, Err})\n"),
     io:put_chars(Fd, "    end.\n\n"),
     io:put_chars(Fd, "-spec lowercase(unicode:chardata()) -> "
                  "maybe_improper_list(gc(),unicode:chardata()).\n"),
@@ -213,7 +214,8 @@ gen_static(Fd) ->
     io:put_chars(Fd, "                {_,Lower} -> [Lower|Str];\n"),
     io:put_chars(Fd, "                {_,Lower,_,_} -> [Lower|Str]\n"),
     io:put_chars(Fd, "            end;\n"),
-    io:put_chars(Fd, "        [] -> []\n"),
+    io:put_chars(Fd, "        [] -> [];\n"),
+    io:put_chars(Fd, "        {error,Err} -> error({badarg, Err})\n"),
     io:put_chars(Fd, "    end.\n\n"),
     io:put_chars(Fd, "-spec titlecase(unicode:chardata()) -> "
                  "maybe_improper_list(gc(),unicode:chardata()).\n"),
@@ -224,7 +226,8 @@ gen_static(Fd) ->
     io:put_chars(Fd, "                {_,_,Title,_} -> [Title|Str];\n"),
     io:put_chars(Fd, "                {Upper,_} -> [Upper|Str]\n"),
     io:put_chars(Fd, "            end;\n"),
-    io:put_chars(Fd, "        [] -> []\n"),
+    io:put_chars(Fd, "        [] -> [];\n"),
+    io:put_chars(Fd, "        {error,Err} -> error({badarg, Err})\n"),
     io:put_chars(Fd, "    end.\n\n"),
     io:put_chars(Fd, "-spec casefold(unicode:chardata()) -> "
                  "maybe_improper_list(gc(),unicode:chardata()).\n"),
@@ -235,7 +238,8 @@ gen_static(Fd) ->
     io:put_chars(Fd, "                {_,_,_,Fold} -> [Fold|Str];\n"),
     io:put_chars(Fd, "                {_,Lower} -> [Lower|Str]\n"),
     io:put_chars(Fd, "            end;\n"),
-    io:put_chars(Fd, "        [] -> []\n"),
+    io:put_chars(Fd, "        [] -> [];\n"),
+    io:put_chars(Fd, "        {error,Err} -> error({badarg, Err})\n"),
     io:put_chars(Fd, "    end.\n\n"),
 
     ok.
-- 
2.16.4

openSUSE Build Service is sponsored by