File 1489-erts-Fix-re-utf-8-validation-yielding.patch of Package erlang
From 6d8f59dcc8622da2da10b33b8c393f784922aff6 Mon Sep 17 00:00:00 2001
From: Lukas Larsson <lukas@erlang.org>
Date: Wed, 6 Mar 2024 20:47:30 +0100
Subject: [PATCH] erts: Fix re utf-8 validation yielding
When an invalid utf-8 binary was given to re:run/3
and re:run/3 had to yield before encountering the invalid
byte, re:run/3 would view the error as a yield point
and loop indefinitely.
This commit fixes that problem and properly propagates
the invalid utf-8 error to the caller.
---
erts/emulator/beam/erl_bif_re.c | 27 +++++++--
erts/emulator/pcre/pcre_exec.c | 8 ++-
erts/emulator/pcre/pcre_valid_utf8.c | 1 +
lib/stdlib/test/re_SUITE.erl | 82 +++++++++++++++-------------
4 files changed, 74 insertions(+), 44 deletions(-)
diff --git a/erts/emulator/beam/erl_bif_re.c b/erts/emulator/beam/erl_bif_re.c
index 663fa3518f..bc8bbaac1c 100644
--- a/erts/emulator/beam/erl_bif_re.c
+++ b/erts/emulator/beam/erl_bif_re.c
@@ -1475,10 +1475,29 @@ static BIF_RETTYPE re_exec_trap(BIF_ALIST_3)
ASSERT(loop_count != 0xFFFFFFFF);
BUMP_REDS(BIF_P, loop_count / LOOP_FACTOR);
- if (rc == PCRE_ERROR_LOOP_LIMIT) {
- /* Trap */
- BUMP_ALL_REDS(BIF_P);
- BIF_TRAP3(&re_exec_trap_export, BIF_P, BIF_ARG_1, BIF_ARG_2, BIF_ARG_3);
+ if (rc < 0) {
+ switch (rc) {
+ /* No match... */
+ case PCRE_ERROR_NOMATCH:
+ case PCRE_ERROR_MATCHLIMIT:
+ case PCRE_ERROR_RECURSIONLIMIT:
+ break;
+ case PCRE_ERROR_LOOP_LIMIT:
+ /* Trap */
+ BUMP_ALL_REDS(BIF_P);
+ BIF_TRAP3(&re_exec_trap_export, BIF_P, BIF_ARG_1, BIF_ARG_2, BIF_ARG_3);
+ /* Bad utf8 in subject... */
+ case PCRE_ERROR_SHORTUTF8:
+ case PCRE_ERROR_BADUTF8:
+ case PCRE_ERROR_BADUTF8_OFFSET:
+ cleanup_restart_context(restartp);
+ BIF_ERROR(BIF_P, BADARG);
+ default:
+ /* Something unexpected happened... */
+ ASSERT(! "Unexpected erts_pcre_exec() result");
+ cleanup_restart_context(restartp);
+ BIF_ERROR(BIF_P, EXC_INTERNAL_ERROR);
+ }
}
res = build_exec_return(BIF_P, rc, restartp, BIF_ARG_1);
diff --git a/erts/emulator/pcre/pcre_exec.c b/erts/emulator/pcre/pcre_exec.c
index 99c37f0657..7c1b419627 100644
--- a/erts/emulator/pcre/pcre_exec.c
+++ b/erts/emulator/pcre/pcre_exec.c
@@ -6928,11 +6928,13 @@ if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
if (errorcode != 0)
{
#if defined(ERLANG_INTEGRATION)
- if (ystate && ystate->yielded) {
+ if (ystate) {
ERTS_UPDATE_CONSUMED(extra_data, NULL);
SWAPOUT();
- return PCRE_ERROR_LOOP_LIMIT;
- }
+ if (ystate->yielded) {
+ return PCRE_ERROR_LOOP_LIMIT;
+ }
+ }
#endif
if (offsetcount >= 2)
{
diff --git a/erts/emulator/pcre/pcre_valid_utf8.c b/erts/emulator/pcre/pcre_valid_utf8.c
index 1dc1f9ba0c..036efcab18 100644
--- a/erts/emulator/pcre/pcre_valid_utf8.c
+++ b/erts/emulator/pcre/pcre_valid_utf8.c
@@ -131,6 +131,7 @@ else {
if (ystate->yielded) {
p = ystate->p;
length = ystate->length;
+ ystate->yielded = 0;
if (length < 0)
goto restart_length;
else
diff --git a/lib/stdlib/test/re_SUITE.erl b/lib/stdlib/test/re_SUITE.erl
index 1c9919c8de..6171dd2523 100644
--- a/lib/stdlib/test/re_SUITE.erl
+++ b/lib/stdlib/test/re_SUITE.erl
@@ -964,47 +964,55 @@ bad_utf8_subject(Config) when is_list(Config) ->
%% even though subject contained illegal
%% utf8...
+ %% OTP-19015: re:run() ended up in an infinite loop
+ %% if both pattern and subject was binaries and
+ %% subject was long enough to trigger a yield.
+
nomatch = re:run(<<255,255,255>>, <<"a">>, []),
nomatch = re:run(<<255,255,255>>, "a", []),
nomatch = re:run(<<"aaa">>, <<255>>, []),
nomatch = re:run(<<"aaa">>, [255], []),
{match,[{0,1}]} = re:run(<<255,255,255>>, <<255>>, []),
{match,[{0,1}]} = re:run(<<255,255,255>>, [255], []),
- %% Badarg on illegal utf8 in subject as of OTP 23...
- try
- re:run(<<255,255,255>>, <<"a">>, [unicode]),
- error(unexpected)
- catch
- error:badarg ->
- ok
- end,
- try
- re:run(<<255,255,255>>, "a", [unicode]),
- error(unexpected)
- catch
- error:badarg ->
- ok
- end,
- try
- re:run(<<"aaa">>, <<255>>, [unicode]),
- error(unexpected)
- catch
- error:badarg ->
- ok
- end,
- nomatch = re:run(<<"aaa">>, [255], [unicode]),
- try
- re:run(<<255,255,255>>, <<255>>, [unicode]),
- error(unexpected)
- catch
- error:badarg ->
- ok
- end,
- try
- re:run(<<255,255,255>>, [255], [unicode]),
- error(unexpected)
- catch
- error:badarg ->
- ok
- end.
+ [
+ begin
+ %% Badarg on illegal utf8 in subject as of OTP 23...
+ try
+ re:run(<<Prefix/binary, 255,255,255>>, <<"a">>, [unicode]),
+ error(unexpected)
+ catch
+ error:badarg ->
+ ok
+ end,
+ try
+ re:run(<<Prefix/binary, 255,255,255>>, "a", [unicode]),
+ error(unexpected)
+ catch
+ error:badarg ->
+ ok
+ end,
+ try
+ re:run(<<Prefix/binary, "aaa">>, <<255>>, [unicode]),
+ error(unexpected)
+ catch
+ error:badarg ->
+ ok
+ end,
+ nomatch = re:run(<<Prefix/binary, "aaa">>, [255], [unicode]),
+ try
+ re:run(<<Prefix/binary, 255,255,255>>, <<255>>, [unicode]),
+ error(unexpected)
+ catch
+ error:badarg ->
+ ok
+ end,
+ try
+ re:run(<<Prefix/binary, 255,255,255>>, [255], [unicode]),
+ error(unexpected)
+ catch
+ error:badarg ->
+ ok
+ end
+ end || Prefix <- [<<>>, iolist_to_binary(lists:duplicate(100000, $a))]],
+ ok.
--
2.35.3