File 6833-unicode_util-Optimize-gc_1.patch of Package erlang
From 027a4f57d2d52f4ea7f38db9f302c6da408a6aa0 Mon Sep 17 00:00:00 2001
From: Dan Gudmundsson <dgud@erlang.org>
Date: Mon, 24 Nov 2025 11:59:10 +0100
Subject: [PATCH 3/3] unicode_util: Optimize gc_1
Postpone cons creation, make gc_1 with two arguments instead of one.
Reduces move instructions.
---
lib/stdlib/uc_spec/gen_unicode_mod.escript | 74 +++++++++++-----------
1 file changed, 38 insertions(+), 36 deletions(-)
diff --git a/lib/stdlib/uc_spec/gen_unicode_mod.escript b/lib/stdlib/uc_spec/gen_unicode_mod.escript
index 2088531aa0..7f620eb2d7 100644
--- a/lib/stdlib/uc_spec/gen_unicode_mod.escript
+++ b/lib/stdlib/uc_spec/gen_unicode_mod.escript
@@ -797,8 +797,8 @@ gen_gc(Fd, GBP) ->
" _ -> %% Keep the tail binary.\n"
" case cp_no_bin(T1) of\n"
" [CP2|_]=T3 when is_integer(CP2), 0 =< CP2, CP2 < 256 -> [CP1|T3]; %% Asciii Fast path\n"
- " binary_found -> gc_1(T);\n"
- " T4 -> gc_1([CP1|T4])\n"
+ " binary_found -> gc_1(T1, CP1);\n"
+ " T4 -> gc_1(T4, CP1)\n"
" end\n"
" end;\n"
"gc(<<>>) -> [];\n"
@@ -807,11 +807,11 @@ gen_gc(Fd, GBP) ->
" case Rest of\n"
" <<CP2/utf8, _/binary>> when CP2 < 256 -> %% Ascii Fast path\n"
" [CP1|Rest];\n"
- " _ -> gc_1([CP1|Rest])\n"
+ " _ -> gc_1(Rest, CP1)\n"
" end;\n"
- " true -> gc_1([CP1|Rest])\n"
+ " true -> gc_1(Rest, CP1)\n"
" end;\n"
- "gc([CP|_]=T) when ?IS_CP(CP) -> gc_1(T);\n"
+ "gc([CP|T]) when ?IS_CP(CP) -> gc_1(T,CP);\n"
"gc(Str) ->\n"
" case cp(Str) of\n"
" {error,_}=Error -> Error;\n"
@@ -819,58 +819,57 @@ gen_gc(Fd, GBP) ->
" end.\n"
),
- GenExtP = fun(Range) -> io:format(Fd, "gc_1~s gc_ext_pict(R1,[CP]);\n", [gen_clause(Range)]) end,
- ExtendedPictographic0 = merge_ranges(maps:get(extended_pictographic,GBP)),
- %% Pick codepoints below 256 (some data knowledge here)
- {ExtendedPictographicLow,_ExtendedPictographicHigh} =
- lists:splitwith(fun({Start,undefined}) -> Start < 256 end,ExtendedPictographic0),
io:put_chars(Fd,
- "\ngc_1([$\\r|R0] = R) ->\n"
- " case cp(R0) of % Don't break CRLF\n"
- " [$\\n|R1] -> [[$\\r,$\\n]|R1];\n"
- " _ -> R\n"
- " end;\n"),
- io:put_chars(Fd, "\n%% Handle control\n"),
- GenControl = fun(Range) -> io:format(Fd, "gc_1~s R0;\n", [gen_clause(Range)]) end,
+ "\n"
+ "%% gc_1\n"
+ "gc_1(R0, $\r) ->\n"
+ " case cp(R0) of % Don't break CRLF\n"
+ " [$\n|R1] -> [[$\r,$\n]|R1];\n"
+ " _ -> [$\r|R0]\n"
+ " end;\n"
+ "%% Handle control\n"),
+ GenControl = fun(Range) -> io:format(Fd, "gc_1~s [CP|R0];\n", [gen_clause(Range)]) end,
CRs0 = merge_ranges(maps:get(cr, GBP) ++ maps:get(lf, GBP) ++ maps:get(control, GBP), false),
[R1,R2,R3|Crs] = CRs0,
[GenControl(CP) || CP <- merge_ranges([R1,R2,R3], split), CP =/= {$\r, undefined}],
%%GenControl(R1),GenControl(R2),GenControl(R3),
io:put_chars(Fd, "\n%% Optimize Latin-1\n"),
+ GenExtP = fun(Range) -> io:format(Fd, "gc_1~s gc_ext_pict(R0,[CP]);\n", [gen_clause(Range)]) end,
+ ExtendedPictographic0 = merge_ranges(maps:get(extended_pictographic,GBP)),
+ %% Pick codepoints below 256 (some data knowledge here)
+ {ExtendedPictographicLow,_ExtendedPictographicHigh} =
+ lists:splitwith(fun({Start,undefined}) -> Start < 256 end,ExtendedPictographic0),
[GenExtP(CP) || CP <- merge_ranges(ExtendedPictographicLow)],
io:put_chars(Fd,
- "gc_1([CP|R]=R0) when is_integer(CP), 0 =< CP, CP < 256 ->\n"
- " case R of\n"
- " [CP2|_] when is_integer(CP2), 0 =< CP2, CP2 < 256 -> R0;\n"
- " _ -> gc_extend(cp(R), R, CP)\n"
+ "gc_1(R0,CP) when is_integer(CP), 0 =< CP, CP < 256 ->\n"
+ " case R0 of\n"
+ " [CP2|_] when is_integer(CP2), 0 =< CP2, CP2 < 256 -> [CP|R0];\n"
+ " _ -> gc_extend(cp(R0), R0, CP)\n"
" end;\n"
- "gc_1([CP|_]) when not ?IS_CP(CP) ->\n"
+ "gc_1(_, CP) when not ?IS_CP(CP) ->\n"
" error({badarg,CP});\n"),
io:put_chars(Fd, "\n%% Continue control\n"),
[GenControl(CP) || CP <- merge_ranges(Crs)],
- %% One clause per CP
- %% CRs0 = merge_ranges(maps:get(cr, GBP) ++ maps:get(lf, GBP) ++ maps:get(control, GBP)),
- %% [GenControl(CP) || CP <- CRs0, CP =/= {$\r, undefined}],
io:put_chars(Fd, "\n%% Handle prepend\n"),
- GenPrepend = fun(Range) -> io:format(Fd, "gc_1~s gc_prepend(R1, CP);\n", [gen_clause(Range)]) end,
+ GenPrepend = fun(Range) -> io:format(Fd, "gc_1~s gc_prepend(R0, CP);\n", [gen_clause(Range)]) end,
[GenPrepend(CP) || CP <- merge_ranges(maps:get(prepend,GBP))],
io:put_chars(Fd, "\n%% Handle Hangul L\n"),
- GenHangulL = fun(Range) -> io:format(Fd, "gc_1~s gc_h_L(R1,[CP]);\n", [gen_clause(Range)]) end,
+ GenHangulL = fun(Range) -> io:format(Fd, "gc_1~s gc_h_L(R0,[CP]);\n", [gen_clause(Range)]) end,
[GenHangulL(CP) || CP <- merge_ranges(maps:get(l,GBP))],
io:put_chars(Fd, "%% Handle Hangul V\n"),
- GenHangulV = fun(Range) -> io:format(Fd, "gc_1~s gc_h_V(R1,[CP]);\n", [gen_clause(Range)]) end,
+ GenHangulV = fun(Range) -> io:format(Fd, "gc_1~s gc_h_V(R0,[CP]);\n", [gen_clause(Range)]) end,
[GenHangulV(CP) || CP <- merge_ranges(maps:get(v,GBP))],
io:put_chars(Fd, "%% Handle Hangul T\n"),
- GenHangulT = fun(Range) -> io:format(Fd, "gc_1~s gc_h_T(R1,[CP]);\n", [gen_clause(Range)]) end,
+ GenHangulT = fun(Range) -> io:format(Fd, "gc_1~s gc_h_T(R0,[CP]);\n", [gen_clause(Range)]) end,
[GenHangulT(CP) || CP <- merge_ranges(maps:get(t,GBP))],
io:put_chars(Fd, "%% Handle Hangul LV and LVT special, since they are large\n"),
- io:put_chars(Fd, "gc_1([CP|_]=R0) when is_integer(CP), 44000 < CP, CP < 56000 -> gc_h_lv_lvt(R0, R0, []);\n"),
+ io:put_chars(Fd, "gc_1(R0,CP) when is_integer(CP), 44000 < CP, CP < 56000 -> R=[CP|R0], gc_h_lv_lvt(R, R, []);\n"),
io:put_chars(Fd, "\n%% Handle Regional\n"),
- GenRegional = fun(Range) -> io:format(Fd, "gc_1~s gc_regional(R1,CP);\n", [gen_clause(Range)]) end,
+ GenRegional = fun(Range) -> io:format(Fd, "gc_1~s gc_regional(R0,CP);\n", [gen_clause(Range)]) end,
[GenRegional(CP) || CP <- merge_ranges(maps:get(regional_indicator,GBP))],
%% io:put_chars(Fd, "%% Handle E_Base\n"),
%% GenEBase = fun(Range) -> io:format(Fd, "gc_1~s gc_e_cont(R1,[CP]);\n", [gen_clause(Range)]) end,
@@ -884,7 +886,7 @@ gen_gc(Fd, GBP) ->
io:put_chars(Fd, "\n%% default clauses\n"),
io:put_chars(Fd,
- "gc_1([CP|R]) ->\n"
+ "gc_1(R,CP) ->\n"
" case is_ext_pict(CP) of\n"
" true -> gc_ext_pict(R, [CP]);\n"
" false ->\n"
@@ -901,11 +903,11 @@ gen_gc(Fd, GBP) ->
io:put_chars(Fd,
"gc_prepend(R00, CP0) ->\n"
" case cp(R00) of\n"
- " [CP1|_] = R0 ->\n"
+ " [CP1|R0] ->\n"
" case is_control(CP1) of\n"
" true -> [CP0|R00];\n"
" false ->\n"
- " case gc_1(R0) of\n"
+ " case gc_1(R0, CP1) of\n"
" [GC|R1] when is_integer(GC) -> [[CP0,GC]|R1];\n"
" [GC|R1] -> [[CP0|GC]|R1]\n"
" end\n"
@@ -1386,9 +1388,9 @@ gen_width_table(Fd, WideChars) ->
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
gen_clause({R0, undefined}) ->
- io_lib:format("([~w=CP|R1]=R0) ->", [R0]);
+ io_lib:format("(R0, ~w=CP) ->", [R0]);
gen_clause({R0, R1}) ->
- io_lib:format("([CP|R1]=R0) when is_integer(CP), ~w =< CP, CP =< ~w ->", [R0,R1]).
+ io_lib:format("(R0, CP) when is_integer(CP), ~w =< CP, CP =< ~w ->", [R0,R1]).
gen_clause2({R0, undefined}) ->
io_lib:format("([~w=CP|R1], R0, Acc) ->", [R0]);
--
2.51.0