File 6831-unicode_util-expand-range-of-2.patch of Package erlang
From eb8b2ff3dc64498fc9f7e79db445119e22626d98 Mon Sep 17 00:00:00 2001
From: Dan Gudmundsson <dgud@erlang.org>
Date: Fri, 21 Nov 2025 14:51:03 +0100
Subject: [PATCH 1/3] unicode_util: expand range of 2
Make jump tables larger
---
lib/stdlib/uc_spec/gen_unicode_mod.escript | 51 +++++++++++++---------
1 file changed, 31 insertions(+), 20 deletions(-)
diff --git a/lib/stdlib/uc_spec/gen_unicode_mod.escript b/lib/stdlib/uc_spec/gen_unicode_mod.escript
index 35c3f05ce2..aa353f6325 100644
--- a/lib/stdlib/uc_spec/gen_unicode_mod.escript
+++ b/lib/stdlib/uc_spec/gen_unicode_mod.escript
@@ -822,7 +822,7 @@ gen_gc(Fd, GBP) ->
GenExtP = fun(Range) -> io:format(Fd, "gc_1~s gc_ext_pict(R1,[CP]);\n", [gen_clause(Range)]) end,
ExtendedPictographic0 = merge_ranges(maps:get(extended_pictographic,GBP)),
%% Pick codepoints below 256 (some data knowledge here)
- {ExtendedPictographicLow,ExtendedPictographicHigh} =
+ {ExtendedPictographicLow,_ExtendedPictographicHigh} =
lists:splitwith(fun({Start,undefined}) -> Start < 256 end,ExtendedPictographic0),
io:put_chars(Fd,
"\ngc_1([$\\r|R0] = R) ->\n"
@@ -879,17 +879,19 @@ gen_gc(Fd, GBP) ->
%% GenEBG = fun(Range) -> io:format(Fd, "gc_1~s gc_e_cont(R1,[CP]);\n", [gen_clause(Range)]) end,
%% [GenEBG(CP) || CP <- merge_ranges(maps:get(e_base_gaz,GBP))],
- io:put_chars(Fd, "\n%% Handle extended_pictographic\n"),
- [GenExtP(CP) || CP <- merge_ranges(ExtendedPictographicHigh)],
+ %% io:put_chars(Fd, "\n%% Handle extended_pictographic\n"),
+ %% [GenExtP(CP) || CP <- merge_ranges(ExtendedPictographicHigh)],
io:put_chars(Fd, "\n%% default clauses\n"),
io:put_chars(Fd,
"gc_1([CP|R]) ->\n"
- " case is_indic_consonant(CP) of\n"
- " true ->\n"
- " gc_indic(cp(R), R, false, [CP]);\n"
+ " case is_ext_pict(CP) of\n"
+ " true -> gc_ext_pict(R, [CP]);\n"
" false ->\n"
- " gc_extend(cp(R), R, CP)\n"
+ " case is_indic_consonant(CP) of\n"
+ " true -> gc_indic(cp(R), R, false, [CP]);\n"
+ " false -> gc_extend(cp(R), R, CP)\n"
+ " end\n"
" end.\n"),
io:put_chars(Fd, "%% Handle Prepend\n"),
@@ -1341,7 +1343,6 @@ decompose([CP|CPs], Data) when is_integer(CP) ->
#cp{dec=Dec} -> decompose(Dec, Data) ++ decompose(CPs,Data)
end.
-
decompose_compat(Canon, [], Data) ->
case decompose_compat(Canon, Data) of
Canon -> [];
@@ -1405,7 +1406,7 @@ merge_ranges(List, Opt) ->
split ->
split_ranges(Res0,[]); % One clause per CP
true ->
- Res = Res0,
+ Res = split_small_ranges(Res0, []),
OptRes = optimize_ranges(Res),
true = lists:sort(Res) =:= lists:sort(OptRes), %Assertion.
OptRes;
@@ -1423,11 +1424,6 @@ merge_ranges_1([{Next, Stop}|R], [{Start,Prev}|Acc]) when Prev+1 =:= Next ->
undefined -> merge_ranges_1(R, [{Start, Next}|Acc]);
_ -> merge_ranges_1(R, [{Start,Stop}|Acc])
end;
-merge_ranges_1([{Next, Stop}|R], [{Start,undefined}|Acc]) when Start+1 =:= Next ->
- case Stop of
- undefined -> merge_ranges_1(R, [{Start, Next}|Acc]);
- _ -> merge_ranges_1(R, [{Start,Stop}|Acc])
- end;
merge_ranges_1([Next|R], Acc) ->
merge_ranges_1(R, [Next|Acc]);
merge_ranges_1([], Acc) ->
@@ -1442,6 +1438,21 @@ split_ranges([{L,L}|Rs], Acc) ->
split_ranges([], Acc) ->
lists:reverse(Acc).
+split_small_ranges([{_,undefined}=CP|Rs], Acc) ->
+ split_small_ranges(Rs,[CP|Acc]);
+split_small_ranges([{L,L}|Rs], Acc) ->
+ split_small_ranges(Rs,[{L, undefined}|Acc]);
+split_small_ranges([{F,L}=Range|Rs], Acc) ->
+ case L - F of
+ 1 ->
+ split_small_ranges(Rs, [{L, undefined}, {F, undefined}|Acc]);
+ N when N > 1 ->
+ split_small_ranges(Rs, [Range|Acc])
+ end;
+split_small_ranges([], Acc) ->
+ lists:reverse(Acc).
+
+
optimize_ranges(Rs0) ->
PF = fun({N,undefined}) when is_integer(N) -> true;
(_) -> false
--
2.51.0