File 1831-stdlib-Sync-re_SUITE-testoutput4-with-PCRE2-10.46.patch of Package erlang
From 883d88706063f292981847326512bcbb3c140073 Mon Sep 17 00:00:00 2001
From: Sverker Eriksson <sverker@erlang.org>
Date: Mon, 24 Nov 2025 15:58:56 +0100
Subject: [PATCH 1/7] stdlib: Sync re_SUITE 'testoutput4' with PCRE2 10.46
by parsing \N{U+hhh..} syntax in subject strings.
---
lib/stdlib/test/re_SUITE_data/testoutput4 | 8 +--
lib/stdlib/test/run_pcre_tests.erl | 78 +++++++++++------------
2 files changed, 41 insertions(+), 45 deletions(-)
diff --git a/lib/stdlib/test/re_SUITE_data/testoutput4 b/lib/stdlib/test/re_SUITE_data/testoutput4
index 17f67cdbee..b68ad2977b 100644
--- a/lib/stdlib/test/re_SUITE_data/testoutput4
+++ b/lib/stdlib/test/re_SUITE_data/testoutput4
@@ -3848,10 +3848,10 @@ No match
\x{1234}
0: \x{1234}
-#/(\x{1234}) \1/utf
-# \N{U+1234} \o{11064}
-# 0: \x{1234} \x{1234}
-# 1: \x{1234}
+/(\x{1234}) \1/utf
+ \N{U+1234} \o{11064}
+ 0: \x{1234} \x{1234}
+ 1: \x{1234}
# Test the full list of Unicode "Pattern White Space" characters that are to
# be ignored by /x. The pattern lines below may show up oddly in text editors
diff --git a/lib/stdlib/test/run_pcre_tests.erl b/lib/stdlib/test/run_pcre_tests.erl
index 544a8d026b..5327d3c9c2 100644
--- a/lib/stdlib/test/run_pcre_tests.erl
+++ b/lib/stdlib/test/run_pcre_tests.erl
@@ -1036,49 +1036,31 @@ multi_esc(Bin, Unicode) ->
{_Cha, Tpl} = multi_hex_esc(Bin, Unicode),
Tpl.
-multi_hex_esc(<<"x{",N,$},Rest/binary>>,Unicode) when ?is_hex_char(N) ->
- Cha = trx(N),
- case Unicode of
- false ->
- {Cha, {<<Cha:8>>,Rest}};
- _ ->
- {Cha, {int_to_utf8(Cha),Rest}}
+
+multi_hex_esc(<<"x{", Rest0/binary>>, Unicode) ->
+ case hex_num(Rest0, 6) of
+ {Cha, <<$}, Rest1/binary>>} ->
+ case {Unicode, Cha < 256} of
+ {false, true} ->
+ {Cha, {<<Cha:8>>, Rest1}};
+ _ ->
+ {Cha, {int_to_utf8(Cha), Rest1}}
+ end;
+ _Error ->
+ {no, no}
end;
-multi_hex_esc(<<"x{",N,O,$},Rest/binary>>,Unicode) when (?is_hex_char(N) and
- ?is_hex_char(O)) ->
- Cha = (trx(N) bsl 4) bor trx(O),
- case Unicode of
- false ->
- {Cha, {<<Cha:8>>,Rest}};
- _ ->
- {Cha, {int_to_utf8(Cha),Rest}}
+multi_hex_esc(<<"N{U+", Rest0/binary>>, Unicode) ->
+ case hex_num(Rest0, 6) of
+ {Cha, <<$}, Rest1/binary>>} ->
+ case {Unicode, Cha < 256} of
+ {false, true} ->
+ {Cha, {<<Cha:8>>, Rest1}};
+ _ ->
+ {Cha, {int_to_utf8(Cha), Rest1}}
+ end;
+ _Error ->
+ {no, no}
end;
-multi_hex_esc(<<"x{",N,O,P,$},Rest/binary>>,_) when (?is_hex_char(N) and
- ?is_hex_char(O) and
- ?is_hex_char(P)) ->
- Cha = (trx(N) bsl 8) bor (trx(O) bsl 4) bor trx(P),
- {Cha, {int_to_utf8(Cha),Rest}};
-multi_hex_esc(<<"x{",N,O,P,Q,$},Rest/binary>>,_) when (?is_hex_char(N) and
- ?is_hex_char(O) and
- ?is_hex_char(P) and
- ?is_hex_char(Q)) ->
- Cha = (trx(N) bsl 12) bor (trx(O) bsl 8) bor (trx(P) bsl 4) bor trx(Q),
- {Cha, {int_to_utf8(Cha),Rest}};
-multi_hex_esc(<<"x{",N,O,P,Q,R,$},Rest/binary>>,_) when (?is_hex_char(N) and
- ?is_hex_char(O) and
- ?is_hex_char(P) and
- ?is_hex_char(Q) and
- ?is_hex_char(R)) ->
- Cha = (trx(N) bsl 16) bor (trx(O) bsl 12) bor (trx(P) bsl 8) bor (trx(Q) bsl 4) bor trx(R),
- {Cha, {int_to_utf8(Cha),Rest}};
-multi_hex_esc(<<"x{",N,O,P,Q,R,S,$},Rest/binary>>,_) when (?is_hex_char(N) and
- ?is_hex_char(O) and
- ?is_hex_char(P) and
- ?is_hex_char(Q) and
- ?is_hex_char(R) and
- ?is_hex_char(S)) ->
- Cha = (trx(N) bsl 20) bor (trx(O) bsl 16) bor (trx(P) bsl 12) bor (trx(Q) bsl 8) bor (trx(R) bsl 4) bor trx(S),
- {Cha, {int_to_utf8(Cha),Rest}};
multi_hex_esc(<<$x,N,O,Rest/binary>>,_) when (?is_hex_char(N) and
?is_hex_char(O)) ->
Cha = (trx(N) bsl 4) bor trx(O),
@@ -1089,6 +1071,20 @@ multi_hex_esc(<<$x,N,Rest/binary>>,_) when ?is_hex_char(N) ->
multi_hex_esc(_,_) ->
{no, no}.
+hex_num(Bin, Maxlen) ->
+ hex_num(Bin, Maxlen, 0, 0).
+
+hex_num(Bin, MaxLen, MaxLen, Acc) ->
+ {Acc, Bin};
+hex_num(<<C, Rest/binary>>, Maxlen, Gotlen, Acc) when ?is_hex_char(C) ->
+ hex_num(Rest, Maxlen, Gotlen+1, (Acc bsl 4) bor trx(C));
+hex_num(Bin, _Maxlen, Gotlen, Acc) when Gotlen > 0 ->
+ {Acc, Bin};
+hex_num(Bin, _, 0, 0) ->
+ {error, "Expected hex number", Bin}.
+
+
+
single_esc($") ->
$";
single_esc(?SPACE) ->
--
2.51.0