File 4151-stdlib-Optimize-handling-of-Unicode-in-the-string-mo.patch of Package erlang

From cce2d5ab396ede0627a2a83f71fa6651de459faf Mon Sep 17 00:00:00 2001
From: Hans Bolinder <hasse@erlang.org>
Date: Mon, 4 Mar 2019 12:07:18 +0100
Subject: [PATCH] stdlib: Optimize handling of Unicode in the string module

The unicode_util:cp() function handles deep lists faster by returning
the rest of the input more balanced to the right than before.
---
 lib/stdlib/test/string_SUITE.erl           | 32 ++++++++++++++++++++++--------
 lib/stdlib/uc_spec/gen_unicode_mod.escript | 31 +++++++++++++++++++----------
 2 files changed, 45 insertions(+), 18 deletions(-)
 mode change 100755 => 100644 lib/stdlib/uc_spec/gen_unicode_mod.escript

diff --git a/lib/stdlib/test/string_SUITE.erl b/lib/stdlib/test/string_SUITE.erl
index 251e09121c..fe01900c55 100644
--- a/lib/stdlib/test/string_SUITE.erl
+++ b/lib/stdlib/test/string_SUITE.erl
@@ -753,19 +753,22 @@ do_measure(DataDir) ->
     io:format("~p~n",[byte_size(Bin)]),
     Do = fun(Name, Func, Mode) ->
                  {N, Mean, Stddev, _} = time_func(Func, Mode, Bin),
-                 io:format("~15w ~6w ~6.2fms ±~5.2fms #~.2w gc included~n",
+                 io:format("~15w ~11w ~8.2fms ±~6.2fms #~.2w gc included~n",
                            [Name, Mode, Mean/1000, Stddev/1000, N])
          end,
     Do2 = fun(Name, Func, Mode) ->
                   {N, Mean, Stddev, _} = time_func(Func, binary, <<>>),
-                  io:format("~15w ~6w ~6.2fms ±~5.2fms #~.2w gc included~n",
+                  io:format("~15w ~11w ~8.2fms ±~6.2fms #~.2w gc included~n",
                             [Name, Mode, Mean/1000, Stddev/1000, N])
           end,
+    %% lefty_list means a list balanced to the left, like
+    %% [[[30],31],32]. Only some functions check such lists.
+    Modes = [list, lefty_list, binary],
     io:format("----------------------~n"),
 
     Do(old_tokens, fun(Str) -> string:tokens(Str, [$\n,$\r]) end, list),
     Tokens = {lexemes, fun(Str) -> string:lexemes(Str, [$\n,$\r]) end},
-    [Do(Name,Fun,Mode) || {Name,Fun} <- [Tokens], Mode <- [list, binary]],
+    [Do(Name,Fun,Mode) || {Name,Fun} <- [Tokens], Mode <- Modes],
 
     S0 = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy.....",
     S0B = <<"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy.....">>,
@@ -823,17 +826,17 @@ do_measure(DataDir) ->
 
     io:format("--~n",[]),
     NthTokens = {nth_lexemes, fun(Str) -> string:nth_lexeme(Str, 18000, [$\n,$\r]) end},
-    [Do(Name,Fun,Mode) || {Name,Fun} <- [NthTokens], Mode <- [list, binary]],
+    [Do(Name,Fun,Mode) || {Name,Fun} <- [NthTokens], Mode <- Modes],
     Do2(take_t, repeat(fun() -> string:take(S0, [$.,$y], false, trailing) end), list),
     Do2(take_t, repeat(fun() -> string:take(S0B, [$.,$y], false, trailing) end), binary),
     Do2(take_tc, repeat(fun() -> string:take(S0, [$x], true, trailing) end), list),
     Do2(take_tc, repeat(fun() -> string:take(S0B, [$x], true, trailing) end), binary),
 
     Length = {length, fun(Str) -> string:length(Str) end},
-    [Do(Name,Fun,Mode) || {Name,Fun} <- [Length], Mode <- [list, binary]],
+    [Do(Name,Fun,Mode) || {Name,Fun} <- [Length], Mode <- Modes],
 
     Reverse = {reverse, fun(Str) -> string:reverse(Str) end},
-    [Do(Name,Fun,Mode) || {Name,Fun} <- [Reverse], Mode <- [list, binary]],
+    [Do(Name,Fun,Mode) || {Name,Fun} <- [Reverse], Mode <- Modes],
 
     ok.
 
@@ -1063,7 +1066,20 @@ time_func(N,Sum,SumSq, _, _, Res, _) ->
     {N, Mean, Stdev, Res}.
 
 mode(binary, Bin) -> Bin;
-mode(list, Bin) -> unicode:characters_to_list(Bin).
+mode(list, Bin) -> unicode:characters_to_list(Bin);
+mode(lefty_list, Bin) ->
+    L = unicode:characters_to_list(Bin),
+    to_left(L).
+
+to_left([]) ->
+    [];
+to_left([H|L]) ->
+    to_left([H], L).
+
+to_left(V, []) ->
+    V;
+to_left(V, [H|L]) ->
+    to_left([V,H], L).
 
 %%
 %% Old string lists Test cases starts here.
diff --git a/lib/stdlib/uc_spec/gen_unicode_mod.escript b/lib/stdlib/uc_spec/gen_unicode_mod.escript
old mode 100755
new mode 100644
index 70eec1a6f2..780353af58
--- a/lib/stdlib/uc_spec/gen_unicode_mod.escript
+++ b/lib/stdlib/uc_spec/gen_unicode_mod.escript
@@ -4,7 +4,7 @@
 %%
 %% %CopyrightBegin%
 %%
-%% Copyright Ericsson AB 2017. All Rights Reserved.
+%% Copyright Ericsson AB 2017-2019. All Rights Reserved.
 %%
 %% Licensed under the Apache License, Version 2.0 (the "License");
 %% you may not use this file except in compliance with the License.
@@ -461,16 +461,27 @@ gen_cp(Fd) ->
     io:put_chars(Fd, "cp([C|_]=L) when is_integer(C) -> L;\n"),
     io:put_chars(Fd, "cp([List]) -> cp(List);\n"),
     io:put_chars(Fd, "cp([List|R]) ->\n"),
-    io:put_chars(Fd, "    case cp(List) of\n"),
-    io:put_chars(Fd, "        [] -> cp(R);\n"),
-    io:put_chars(Fd, "        [CP] -> [CP|R];\n"),
-    io:put_chars(Fd, "        [C|R0] -> [C|[R0|R]];\n"),
-    io:put_chars(Fd, "        {error,Error} -> {error,[Error|R]}\n"),
-    io:put_chars(Fd, "    end;\n"),
+    io:put_chars(Fd, "    cpl(List, R);\n"),
     io:put_chars(Fd, "cp([]) -> [];\n"),
     io:put_chars(Fd, "cp(<<C/utf8, R/binary>>) -> [C|R];\n"),
     io:put_chars(Fd, "cp(<<>>) -> [];\n"),
-    io:put_chars(Fd, "cp(<<R/binary>>) -> {error,R}.\n\n"),
+    io:put_chars(Fd, "cp(<<R/binary>>) -> {error,R}.\n"),
+    io:put_chars(Fd, "\n"),
+    io:put_chars(Fd, "cpl([C], R) when is_integer(C) ->\n"),
+    io:put_chars(Fd, "    [C|R];\n"),
+    io:put_chars(Fd, "cpl([C|T], R) when is_integer(C) ->\n"),
+    io:put_chars(Fd, "    [C,T|R];\n"),
+    io:put_chars(Fd, "cpl([List], R) ->\n"),
+    io:put_chars(Fd, "    cpl(List, R);\n"),
+    io:put_chars(Fd, "cpl([List|T], R) ->\n"),
+    io:put_chars(Fd, "    cpl(List, [T|R]);\n"),
+    io:put_chars(Fd, "cpl([], R) ->\n"),
+    io:put_chars(Fd, "    cp(R);\n"),
+    io:put_chars(Fd, "cpl(<<C/utf8, T/binary>>, R) ->\n"),
+    io:put_chars(Fd, "    [C,T|R];\n"),
+    io:put_chars(Fd, "cpl(<<>>, R) ->\n"),
+    io:put_chars(Fd, "    cp(R);\n"),
+    io:put_chars(Fd, "cpl(<<B/binary>>, R) -> {error,[B|R]}.\n\n"),
     ok.
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -482,10 +493,10 @@ gen_gc(Fd, GBP) ->
                  " maybe_improper_list() | {error, unicode:chardata()}.\n"),
     io:put_chars(Fd,
                  "gc([CP1, CP2|_]=T)\n"
-                 "  when CP1 < 256, CP2 < 256, CP1 =/= $\r -> %% Ascii Fast path\n"
+                 "  when CP1 < 256, CP2 < 256, CP1 =/= $\\r -> %% Ascii Fast path\n"
                  "       T;\n"
                  "gc(<<CP1/utf8, Rest/binary>>) ->\n"
-                 "    if CP1 < 256, CP1 =/= $\r ->\n"
+                 "    if CP1 < 256, CP1 =/= $\\r ->\n"
                  "           case Rest of\n"
                  "               <<CP2/utf8, _/binary>> when CP2 < 256 -> %% Ascii Fast path\n"
                  "                   [CP1|Rest];\n"
-- 
2.16.4

openSUSE Build Service is sponsored by