File 1489-erts-Fix-re-utf-8-validation-yielding.patch of Package erlang

From 6d8f59dcc8622da2da10b33b8c393f784922aff6 Mon Sep 17 00:00:00 2001
From: Lukas Larsson <lukas@erlang.org>
Date: Wed, 6 Mar 2024 20:47:30 +0100
Subject: [PATCH] erts: Fix re utf-8 validation yielding

When an invalid utf-8 binary was given to re:run/3
and re:run/3 had to yield before encountering the invalid
byte, re:run/3 would view the error as a yield point
and loop indefinitely.

This commit fixes that problem and properly propagates
the invalid utf-8 error to the caller.
---
 erts/emulator/beam/erl_bif_re.c      | 27 +++++++--
 erts/emulator/pcre/pcre_exec.c       |  8 ++-
 erts/emulator/pcre/pcre_valid_utf8.c |  1 +
 lib/stdlib/test/re_SUITE.erl         | 82 +++++++++++++++-------------
 4 files changed, 74 insertions(+), 44 deletions(-)

diff --git a/erts/emulator/beam/erl_bif_re.c b/erts/emulator/beam/erl_bif_re.c
index 663fa3518f..bc8bbaac1c 100644
--- a/erts/emulator/beam/erl_bif_re.c
+++ b/erts/emulator/beam/erl_bif_re.c
@@ -1475,10 +1475,29 @@ static BIF_RETTYPE re_exec_trap(BIF_ALIST_3)
 
     ASSERT(loop_count != 0xFFFFFFFF);
     BUMP_REDS(BIF_P, loop_count / LOOP_FACTOR);
-    if (rc == PCRE_ERROR_LOOP_LIMIT) {
-	/* Trap */
-	BUMP_ALL_REDS(BIF_P);
-	BIF_TRAP3(&re_exec_trap_export, BIF_P, BIF_ARG_1, BIF_ARG_2, BIF_ARG_3);
+    if (rc < 0) {
+        switch (rc) {
+            /* No match... */
+        case PCRE_ERROR_NOMATCH:
+        case PCRE_ERROR_MATCHLIMIT:
+        case PCRE_ERROR_RECURSIONLIMIT:
+            break;
+        case PCRE_ERROR_LOOP_LIMIT:
+            /* Trap */
+            BUMP_ALL_REDS(BIF_P);
+            BIF_TRAP3(&re_exec_trap_export, BIF_P, BIF_ARG_1, BIF_ARG_2, BIF_ARG_3);
+            /* Bad utf8 in subject... */
+        case PCRE_ERROR_SHORTUTF8:
+        case PCRE_ERROR_BADUTF8:
+        case PCRE_ERROR_BADUTF8_OFFSET:
+            cleanup_restart_context(restartp);
+            BIF_ERROR(BIF_P, BADARG);
+        default:
+            /* Something unexpected happened... */
+            ASSERT(! "Unexpected erts_pcre_exec() result");
+            cleanup_restart_context(restartp);
+            BIF_ERROR(BIF_P, EXC_INTERNAL_ERROR);
+        }
     }
     res = build_exec_return(BIF_P, rc, restartp, BIF_ARG_1);
  
diff --git a/erts/emulator/pcre/pcre_exec.c b/erts/emulator/pcre/pcre_exec.c
index 99c37f0657..7c1b419627 100644
--- a/erts/emulator/pcre/pcre_exec.c
+++ b/erts/emulator/pcre/pcre_exec.c
@@ -6928,11 +6928,13 @@ if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
   if (errorcode != 0)
     {
 #if defined(ERLANG_INTEGRATION)
-    if (ystate && ystate->yielded) {
+      if (ystate) {
         ERTS_UPDATE_CONSUMED(extra_data, NULL);
         SWAPOUT();
-        return PCRE_ERROR_LOOP_LIMIT;
-    }
+        if (ystate->yielded) {
+          return PCRE_ERROR_LOOP_LIMIT;
+        }
+      }
 #endif
     if (offsetcount >= 2)
       {
diff --git a/erts/emulator/pcre/pcre_valid_utf8.c b/erts/emulator/pcre/pcre_valid_utf8.c
index 1dc1f9ba0c..036efcab18 100644
--- a/erts/emulator/pcre/pcre_valid_utf8.c
+++ b/erts/emulator/pcre/pcre_valid_utf8.c
@@ -131,6 +131,7 @@ else {
     if (ystate->yielded) {
         p = ystate->p;
         length = ystate->length;
+        ystate->yielded = 0;
         if (length < 0) 
             goto restart_length;
         else
diff --git a/lib/stdlib/test/re_SUITE.erl b/lib/stdlib/test/re_SUITE.erl
index 1c9919c8de..6171dd2523 100644
--- a/lib/stdlib/test/re_SUITE.erl
+++ b/lib/stdlib/test/re_SUITE.erl
@@ -964,47 +964,55 @@ bad_utf8_subject(Config) when is_list(Config) ->
     %% even though subject contained illegal
     %% utf8...
 
+    %% OTP-19015: re:run() ended up in an infinite loop
+    %% if both pattern and subject was binaries and
+    %% subject was long enough to trigger a yield.
+
     nomatch = re:run(<<255,255,255>>, <<"a">>, []),
     nomatch = re:run(<<255,255,255>>, "a", []),
     nomatch = re:run(<<"aaa">>, <<255>>, []),
     nomatch = re:run(<<"aaa">>, [255], []),
     {match,[{0,1}]} = re:run(<<255,255,255>>, <<255>>, []),
     {match,[{0,1}]} = re:run(<<255,255,255>>, [255], []),
-    %% Badarg on illegal utf8 in subject as of OTP 23...
-    try
-        re:run(<<255,255,255>>, <<"a">>, [unicode]),
-        error(unexpected)
-    catch
-        error:badarg ->
-            ok
-    end,
-    try
-        re:run(<<255,255,255>>, "a", [unicode]),
-        error(unexpected)
-    catch
-        error:badarg ->
-            ok
-    end,
-    try
-        re:run(<<"aaa">>, <<255>>, [unicode]),
-        error(unexpected)
-    catch
-        error:badarg ->
-            ok
-    end,
-    nomatch = re:run(<<"aaa">>, [255], [unicode]),
-    try
-        re:run(<<255,255,255>>, <<255>>, [unicode]),
-        error(unexpected)
-    catch
-        error:badarg ->
-            ok
-    end,
-    try
-        re:run(<<255,255,255>>, [255], [unicode]),
-        error(unexpected)
-    catch
-        error:badarg ->
-            ok
-    end.
+    [
+     begin
+         %% Badarg on illegal utf8 in subject as of OTP 23...
+         try
+             re:run(<<Prefix/binary, 255,255,255>>, <<"a">>, [unicode]),
+             error(unexpected)
+         catch
+             error:badarg ->
+                 ok
+         end,
+         try
+             re:run(<<Prefix/binary, 255,255,255>>, "a", [unicode]),
+             error(unexpected)
+         catch
+             error:badarg ->
+                 ok
+         end,
+         try
+             re:run(<<Prefix/binary, "aaa">>, <<255>>, [unicode]),
+             error(unexpected)
+         catch
+             error:badarg ->
+                 ok
+         end,
+         nomatch = re:run(<<Prefix/binary, "aaa">>, [255], [unicode]),
+         try
+             re:run(<<Prefix/binary, 255,255,255>>, <<255>>, [unicode]),
+             error(unexpected)
+         catch
+             error:badarg ->
+                 ok
+         end,
+         try
+             re:run(<<Prefix/binary, 255,255,255>>, [255], [unicode]),
+             error(unexpected)
+         catch
+             error:badarg ->
+                 ok
+         end
+     end || Prefix <- [<<>>, iolist_to_binary(lists:duplicate(100000, $a))]],
+    ok.
 
-- 
2.35.3

openSUSE Build Service is sponsored by