File 6832-Fix-category-generation.patch of Package erlang

From bd27796767f38bf9912f3e3910f0314a1f471e2b Mon Sep 17 00:00:00 2001
From: Dan Gudmundsson <dgud@erlang.org>
Date: Fri, 21 Nov 2025 16:33:49 +0100
Subject: [PATCH 2/3] Fix category generation

---
 lib/stdlib/uc_spec/gen_unicode_mod.escript | 234 +++++++++++----------
 1 file changed, 122 insertions(+), 112 deletions(-)

diff --git a/lib/stdlib/uc_spec/gen_unicode_mod.escript b/lib/stdlib/uc_spec/gen_unicode_mod.escript
index aa353f6325..2088531aa0 100644
--- a/lib/stdlib/uc_spec/gen_unicode_mod.escript
+++ b/lib/stdlib/uc_spec/gen_unicode_mod.escript
@@ -312,8 +312,8 @@ gen_header(Fd) ->
      {punctuation,dash} |
      {punctuation,open} |
      {punctuation,close} |
-     {punctuation,initial} |
-     {punctuation,final} |
+     {punctuation,initial} | % Punctuation, Initial quote (may behave like open or close depending on usage)
+     {punctuation,final} |   % Punctuation, Final quote (may behave like open or close depending on usage)
      {punctuation,other} |
      {symbol,math} |
      {symbol,currency} |
@@ -337,12 +337,12 @@ gen_static(Fd) ->
        'category':= category()}.
 lookup(Codepoint) when ?IS_CP(Codepoint) ->
     {CCC,Can,Comp,Cat} = unicode_table(Codepoint),
-    #{ccc=>CCC, canon=>Can, compat=>Comp, category=>category(Codepoint,Cat)}.
+    #{ccc=>CCC, canon=>Can, compat=>Comp, category=>category(Cat,Codepoint)}.
 
 -spec category(char()) -> category().
 category(Codepoint) when ?IS_CP(Codepoint) ->
     {_,_,_,Cat} = unicode_table(Codepoint),
-    category(Codepoint,Cat).
+    category(Cat,Codepoint).
 
 "),
     io:put_chars(Fd, "-spec get_case(char()) -> #{'fold':=gc(), 'lower':=gc(), 'title':=gc(), 'upper':=gc()}.\n"),
@@ -420,9 +420,9 @@ category(Codepoint) when ?IS_CP(Codepoint) ->
     io:put_chars(Fd, "    is_wide_cp(C) orelse is_wide(Cs);\n"),
     io:put_chars(Fd, "is_wide([]) ->\n    false.\n\n"),
 
-    io:put_chars(Fd, "category(CP, lookup_category) ->\n"
-                 "    cat_translate(lookup_category(CP));\n"
-                 "category(_, Def) -> cat_translate(Def).\n\n"),
+    io:put_chars(Fd, "category(lookup_category, Cp) ->\n"
+                 "    lookup_category(Cp);\n"
+                 "category(Def, _) -> Def.\n\n"),
     ok.
 
 gen_norm(Fd) ->
@@ -674,13 +674,13 @@ gen_props(Fd, Props, Data) ->
     OIDS = maps:get(other_id_start, Props),
     io:put_chars(Fd, "-spec is_other_id_start(gc()) -> boolean().\n"),
     IsODIS = fun(Range) -> io:format(Fd, "is_other_id_start~s true;\n", [gen_single_clause(Range)]) end,
-    [IsODIS(CP) || CP <- OIDS],
+    [IsODIS(CP) || CP <- merge_ranges(OIDS)],
     io:put_chars(Fd, "is_other_id_start(_) -> false.\n\n"),
 
     OICS = maps:get(other_id_continue, Props),
     io:put_chars(Fd, "-spec is_other_id_continue(gc()) -> boolean().\n"),
     IsOICS = fun(Range) -> io:format(Fd, "is_other_id_continue~s true;\n", [gen_single_clause(Range)]) end,
-    [IsOICS(CP) || CP <- OICS],
+    [IsOICS(CP) || CP <- merge_ranges(OICS)],
     io:put_chars(Fd, "is_other_id_continue(_) -> false.\n\n"),
 
     PS0 = maps:get(pattern_syntax, Props),
@@ -697,7 +697,7 @@ gen_props(Fd, Props, Data) ->
               end,
     PS = [{PSC, undefined} || {PSC, undefined} <- split_ranges(PS0, []), KeepCat(PSC)],
     %% [io:format("~p ~p~n", [P, (array:get(P, Data))#cp.cat]) || {P,_} <- PS],
-    [IsNLPS(CP) || CP <- PS],
+    [IsNLPS(CP) || CP <- merge_ranges(PS)],
     io:put_chars(Fd, "is_letter_not_pattern_syntax(_) -> true.\n\n"),
 
     ok.
@@ -848,7 +848,7 @@ gen_gc(Fd, GBP) ->
                  "gc_1([CP|_]) when not ?IS_CP(CP) ->\n"
                  "    error({badarg,CP});\n"),
     io:put_chars(Fd, "\n%% Continue control\n"),
-    [GenControl(CP) || CP <- Crs],
+    [GenControl(CP) || CP <- merge_ranges(Crs)],
     %% One clause per CP
     %% CRs0 = merge_ranges(maps:get(cr, GBP) ++ maps:get(lf, GBP) ++ maps:get(control, GBP)),
     %% [GenControl(CP) || CP <- CRs0, CP =/= {$\r, undefined}],
@@ -1153,8 +1153,7 @@ gen_unicode_table(Fd, Data, UpdateTests) ->
     case UpdateTests of
         true ->
             Dict1 = lists:map(fun({Id,{CCC, Canon, Compat, Cat}}) ->
-                                      {_, ECat} = lists:keyfind(Cat, 1, category_translate()),
-                                      {Id, {CCC, Canon, Compat, ECat}}
+                                      {Id, {CCC, Canon, Compat, Cat}}
                               end, Dict0),
             TestFile = "../test/unicode_util_SUITE_data/unicode_table.bin",
             io:format("Updating: ~s~n", [TestFile]),
@@ -1166,103 +1165,126 @@ gen_unicode_table(Fd, Data, UpdateTests) ->
     [io:format(Fd, "unicode_table(~w) -> ~w;~n", [CP, Map]) || {CP,Map} <- NonDef],
     io:format(Fd, "unicode_table(_) -> ~w.~n~n",[Def]),
 
-    [io:format(Fd, "cat_translate(~w) -> ~w;~n", [Cat, EC]) || {Cat,EC} <- category_translate()],
-    io:format(Fd, "cat_translate(Cat) -> error({internal_error, Cat}).~n~n",[]),
+    %% [io:format(Fd, "cat_translate(~w) -> ~w;~n", [Cat, EC]) || {Cat,EC} <- category_translate()],
+    %% io:format(Fd, "cat_translate(Cat) -> error({internal_error, Cat}).~n~n",[]),
     gen_category(Fd, CatTable, Data),
     ok.
 
 category([C,Sub]) ->
-    list_to_atom([C-$A+$a, Sub]).
+    Map = category_translate(),
+    maps:get(list_to_atom([C-$A+$a, Sub]), Map).
 
 category_translate() ->
-    [{lu, {letter, uppercase}},       % Letter, Uppercase
-     {ll, {letter, lowercase}},       % Letter, Lowercase
-     {lt, {letter, titlecase}},       % Letter, Titlecase
-     {mn, {mark, non_spacing}},       % Mark, Non-Spacing
-     {mc, {mark, spacing_combining}}, % Mark, Spacing Combining
-     {me, {mark, enclosing}},         % Mark, Enclosing
-     {nd, {number, decimal}},         % Number, Decimal Digit
-     {nl, {number, letter}},          % Number, Letter
-     {no, {number, other}},           % Number, Other
-     {zs, {separator, space}},        % Separator, Space
-     {zl, {separator, line}},         % Separator, Line
-     {zp, {separator, paragraph}},    % Separator, Paragraph
-     {cc, {other, control}},          % Other, Control
-     {cf, {other, format}},           % Other, Format
-     {cs, {other, surrogate}},        % Other, Surrogate
-     {co, {other, private}},          % Other, Private Use
-     {cn, {other, not_assigned}},     % Other, Not Assigned (no characters in the file have this property)
-     {lm, {letter, modifier}},        % Letter, Modifier
-     {lo, {letter, other}},           % Letter, Other
-     {pc, {punctuation, connector}},  % Punctuation, Connector
-     {pd, {punctuation, dash}},       % Punctuation, Dash
-     {ps, {punctuation, open}},       % Punctuation, Open
-     {pe, {punctuation, close}},      % Punctuation, Close
-     {pi, {punctuation, initial}},    % Punctuation, Initial quote (may behave like Ps or Pe depending on usage)
-     {pf, {punctuation, final}},      % Punctuation, Final quote (may behave like Ps or Pe depending on usage)
-     {po, {punctuation, other}},      % Punctuation, Other
-     {sm, {symbol, math}},            % Symbol, Math
-     {sc, {symbol, currency}},        % Symbol, Currency
-     {sk, {symbol, modifier}},        % Symbol, Modifier
-     {so, {symbol, other}}].          % Symbol, Other
+    #{lu => {letter, uppercase},       % Letter, Uppercase
+      ll => {letter, lowercase},       % Letter, Lowercase
+      lt => {letter, titlecase},       % Letter, Titlecase
+      mn => {mark, non_spacing},       % Mark, Non-Spacing
+      mc => {mark, spacing_combining}, % Mark, Spacing Combining
+      me => {mark, enclosing},         % Mark, Enclosing
+      nd => {number, decimal},         % Number, Decimal Digit
+      nl => {number, letter},          % Number, Letter
+      no => {number, other},           % Number, Other
+      zs => {separator, space},        % Separator, Space
+      zl => {separator, line},         % Separator, Line
+      zp => {separator, paragraph},    % Separator, Paragraph
+      cc => {other, control},          % Other, Control
+      cf => {other, format},           % Other, Format
+      cs => {other, surrogate},        % Other, Surrogate
+      co => {other, private},          % Other, Private Use
+      cn => {other, not_assigned},     % Other, Not Assigned (no characters in the file have this property)
+      lm => {letter, modifier},        % Letter, Modifier
+      lo => {letter, other},           % Letter, Other
+      pc => {punctuation, connector},  % Punctuation, Connector
+      pd => {punctuation, dash},       % Punctuation, Dash
+      ps => {punctuation, open},       % Punctuation, Open
+      pe => {punctuation, close},      % Punctuation, Close
+      pi => {punctuation, initial},    % Punctuation, Initial quote (may behave like Ps or Pe depending on usage)
+      pf => {punctuation, final},      % Punctuation, Final quote (may behave like Ps or Pe depending on usage)
+      po => {punctuation, other},      % Punctuation, Other
+      sm => {symbol, math},            % Symbol, Math
+      sc => {symbol, currency},        % Symbol, Currency
+      sk => {symbol, modifier},        % Symbol, Modifier
+      so => {symbol, other}            % Symbol, Other
+     }.
 
 gen_category(Fd, [{CP, {_, _, _, Cat}}|Rest], All) ->
-    gen_category(Fd, Rest, Cat, CP, CP, All, []).
+    {Single, Range, SubCat} = gen_category(Rest, Cat, CP, CP, All, [], [], []),
+    [io:format(Fd, "lookup_category(~w) -> ~w;~n", [X, C]) || {X,C} <- Single],
+
+    Fun = fun(subcat) -> "subcat_letter(CP)";
+             (Category) -> io_lib:format("~w", [Category])
+          end,
+    [io:format(Fd, "lookup_category(CP) when is_integer(CP), ~w =< CP, CP =< ~w -> ~s;~n",
+               [S, E, Fun(C)]) || {S,E,C} <- optimize_ranges_1(Range)],
+    io:put_chars(Fd, "lookup_category(Cp) -> {other, not_assigned}.\n\n"),
+
+    {SubSingle, SubRange} = gen_letter(SubCat, All),
+    [io:format(Fd, "subcat_letter(~w) -> ~w;~n", [X, C]) || {X,C} <- SubSingle],
+    [io:format(Fd, "subcat_letter(CP) when is_integer(CP), ~w =< CP, CP =< ~w -> ~w;~n",
+               [S, E, C]) || {S,E,C} <- optimize_ranges_1(SubRange)],
+    io:put_chars(Fd,
+                 "subcat_letter(CP) ->\n"
+                 "    case case_table(CP) of\n"
+                 "        {CP, CP} -> {letter,other};\n"
+                 "        {CP, _}  -> {letter,uppercase};\n"
+                 "        {_, CP}  -> {letter,lowercase};\n"
+                 "        {_, _, CP, _} -> {letter,titlecase};\n"
+                 "        {CP, _, _, _} -> {letter,uppercase};\n"
+                 "        {_,CP,_,_} -> {letter,lowercase}\n"
+                 "    end.\n\n"),
+    ok.
 
-gen_category(Fd, [{CP, {_, _, _, NextCat}}|Rest], Cat, Start, End, All, Acc)
+gen_category([{CP, {_, _, _, NextCat}}|Rest], Cat, Start, End, All, Single, Range, SubCats)
   when End+1 =:= CP ->
     IsLetterCat = letter_cat(NextCat, Cat),
     if NextCat =:= Cat ->
-            gen_category(Fd, Rest, Cat, Start, CP, All, Acc);
+            gen_category(Rest, Cat, Start, CP, All, Single, Range, SubCats);
        IsLetterCat ->
-            gen_category(Fd, Rest, letter, Start, CP, All, Acc);
+            gen_category(Rest, letter, Start, CP, All, Single, Range, SubCats);
        Start =:= End ->
-            io:format(Fd, "lookup_category(~w) -> ~w;~n", [Start, Cat]),
-            gen_category(Fd, Rest, NextCat, CP, CP, All, Acc);
+            gen_category(Rest, NextCat, CP, CP, All, [{Start, Cat}|Single], Range, SubCats);
        true ->
             case Cat of
                 letter ->
-                    io:format(Fd, "lookup_category(CP) when ~w =< CP, CP =< ~w -> subcat_letter(CP);~n",
-                              [Start, End]),
-                    gen_category(Fd, Rest, NextCat, CP, CP, All,
-                                 lists:reverse(lists:seq(Start, End)) ++ Acc);
+                    gen_category(Rest, NextCat, CP, CP, All,
+                                 Single, [{Start, End, subcat}|Range],
+                                 lists:reverse(lists:seq(Start, End)) ++ SubCats);
                 _ ->
-                    io:format(Fd, "lookup_category(CP) when ~w =< CP, CP =< ~w -> ~w;~n", [Start, End, Cat]),
-                    gen_category(Fd, Rest, NextCat, CP, CP, All, Acc)
+                    gen_category(Rest, NextCat, CP, CP, All,
+                                 Single, [{Start, End, Cat}|Range], SubCats)
             end
     end;
-gen_category(Fd, [{CP, {_, _, _, NewCat}}|Rest]=Cont, Cat, Start, End, All, Acc) ->
+gen_category([{CP, {_, _, _, NewCat}}|Rest]=Cont, Cat, Start, End, All, Single, Range, SubCats) ->
     case array:get(End+1, All) of
         undefined ->
             if Start =:= End ->
-                    io:format(Fd, "lookup_category(~w) -> ~w;~n", [Start, Cat]),
-                    gen_category(Fd, Rest, NewCat, CP, CP, All, Acc);
+                    gen_category(Rest, NewCat, CP, CP, All,
+                                 [{Start, Cat}|Single], Range, SubCats);
                true ->
                     case Cat of
                         letter ->
-                            io:format(Fd, "lookup_category(CP) when ~w =< CP, CP =< ~w -> subcat_letter(CP);~n",
-                                      [Start, End]),
-                            gen_category(Fd, Rest, NewCat, CP, CP, All,
-                                         lists:reverse(lists:seq(Start, End)) ++ Acc);
+                            gen_category(Rest, NewCat, CP, CP, All,
+                                         Single, [{Start, End, subcat}|Range],
+                                         lists:reverse(lists:seq(Start, End)) ++ SubCats);
                         _ ->
-                            io:format(Fd, "lookup_category(CP) when ~w =< CP, CP =< ~w -> ~w;~n",
-                                      [Start, End, Cat]),
-                            gen_category(Fd, Rest, NewCat, CP, CP, All, Acc)
+                            gen_category(Rest, NewCat, CP, CP, All,
+                                         Single, [{Start, End, Cat}|Range], SubCats)
                     end
             end;
         _ ->  %% We can make ranges larger by setting already assigned category
-            gen_category(Fd, Cont, Cat, Start, End+1, All, Acc)
+            gen_category(Cont, Cat, Start, End+1, All, Single, Range, SubCats)
     end;
-gen_category(Fd, [], Cat, Start, End, All, Acc) ->
+gen_category([], Cat, Start, End, _All, Single, Range, SubCats) ->
     case Start =:= End of
         true ->
-            io:format(Fd, "lookup_category(~w) -> ~w;~n", [Start, Cat]);
+            {lists:reverse([{Start, Cat}|Single]),
+             lists:reverse(Range),
+             lists:reverse(SubCats)};
         false ->
-            io:format(Fd, "lookup_category(CP) when ~w =< CP, CP =< ~w -> ~w;~n", [Start, End, Cat])
-    end,
-    io:put_chars(Fd, "lookup_category(Cp) -> cn.\n\n"),
-    gen_letter(Fd, lists:reverse(Acc), All),
-    ok.
+            {lists:reverse(Single),
+             lists:reverse([{Start, End,Cat}|Range]),
+             lists:reverse(SubCats)}
+    end.
 
 letter_cat(lm, _) ->
     false;
@@ -1271,59 +1293,47 @@ letter_cat(_, lm) ->
 letter_cat(L1, L2) ->
     is_letter(L1) andalso (L2 =:= letter orelse is_letter(L2)).
 
-is_letter(LC) ->
-    lists:member(LC, [lu,ll,lt,lo,lm]).
+is_letter({letter, _}) -> true;
+is_letter(_) -> false.
 
-gen_letter(Fd, Letters, All) ->
-    gen_letter(Fd, Letters, All, []).
-gen_letter(Fd, [CP|Rest], All, Acc) ->
+gen_letter(Letters, All) ->
+    gen_letter(Letters, All, []).
+gen_letter([CP|Rest], All, Acc) ->
     case array:get(CP, All) of
         undefined ->
-            gen_letter(Fd, Rest, All, Acc);
+            gen_letter(Rest, All, Acc);
         #cp{cat=Cat0, cs=Cs} ->
             case {category(Cat0), case_table(CP,case_data(CP, Cs))} of
                 {Sub,Sub} ->
-                    gen_letter(Fd, Rest, All, Acc);
+                    gen_letter(Rest, All, Acc);
                 {lm,_} ->
-                    gen_letter(Fd, Rest, All, Acc);
+                    gen_letter(Rest, All, Acc);
                 {Cat, _Dbg} ->
                     case is_letter(Cat) of
                         true ->
-                            gen_letter(Fd, Rest, All, [{CP, Cat}|Acc]);
+                            gen_letter(Rest, All, [{CP, Cat}|Acc]);
                         false ->
-                            gen_letter(Fd, Rest, All, Acc)
+                            gen_letter(Rest, All, Acc)
                     end
             end
     end;
-gen_letter(Fd, [], _, Acc) ->
+gen_letter([], _, Acc) ->
     [{Start, Cat}|SCletters] = lists:reverse(Acc),
-    subcat_letter(Fd, SCletters, Start, Start, Cat),
-    io:put_chars(Fd,
-                 "subcat_letter(CP) ->\n"
-                 "    case case_table(CP) of\n"
-                 "        {CP, CP} -> lo;      %{letter,other};\n"
-                 "        {CP, _}  -> lu;      %{letter,uppercase};\n"
-                 "        {_, CP}  -> ll;      %{letter,lowercase};\n"
-                 "        {_, _, CP, _} -> lt; %{letter,titlecase};\n"
-                 "        {CP, _, _, _} -> lu; %{letter,uppercase};\n"
-                 "        {_,CP,_,_} -> ll     %{letter,lowercase}\n"
-                 "    end.\n\n").
-
-subcat_letter(Fd, [{CP, Cat}|R], Start, End, Cat) when End+1 =:= CP ->
-    subcat_letter(Fd, R, Start, CP, Cat);
-subcat_letter(Fd, Rest, Start, Start, Cat) ->
-    io:format(Fd, "subcat_letter(~w) -> ~w;\n",[Start,Cat]),
-    case Rest of
-        [] -> ok;
-        [{CP, NewCat}|R] -> subcat_letter(Fd, R, CP, CP, NewCat)
-    end;
-subcat_letter(Fd, Rest, Start, End, Cat) ->
-    io:format(Fd, "subcat_letter(CP) when ~w =< CP, CP =< ~w -> ~w;\n",[Start,End,Cat]),
-    case Rest of
-        [] -> ok;
-        [{CP, NewCat}|R] -> subcat_letter(Fd, R, CP, CP, NewCat)
+    subcat_letter(SCletters, Start, Start, Cat, [], []).
+
+subcat_letter([{CP, Cat}|R], Start, End, Cat, Single, Range) when End+1 =:= CP ->
+    subcat_letter(R, Start, CP, Cat, Single, Range);
+subcat_letter([{CP, NewCat}|R], Start, Start, Cat, Single, Range) ->
+    subcat_letter(R, CP, CP, NewCat, [{Start, Cat}|Single], Range);
+subcat_letter([{CP, NewCat}|R], Start, End, Cat, Single, Range) ->
+    subcat_letter(R, CP, CP, NewCat, Single, [{Start, End, Cat}|Range]);
+subcat_letter([], Start, End, Cat, Single, Range) ->
+    case Start == End of
+        true -> {lists:reverse([{Start, Cat}|Single]), lists:reverse(Range)};
+        false -> {lists:reverse(Single), lists:reverse([{Start, End, Cat}|Range])}
     end.
 
+
 case_table(CP, CaseData) ->
     case CaseData of
         {CP, CP} -> lo;
-- 
2.51.0

openSUSE Build Service is sponsored by