File 6961-erts-crash-dump-improvements.patch of Package erlang

From 63e1d641eaa6bb861702400672acc6fbbbe88bb6 Mon Sep 17 00:00:00 2001
From: Maxim Fedorov <maximfca@gmail.com>
Date: Sun, 24 Oct 2021 18:27:32 -0700
Subject: [PATCH] erts: crash dump improvements

Crash dump slogan tends to be quite long, and limiting it to
200 characters does not allow to understand the cause even in simple
cases (like read-only HOME directory when `erl -name name` wants to
write a .cookie file). Extending `halt` reason helps to have a longer
slogan.

When SUSPEND signal is available, crash dump thread suspends all other
scheduler threads. It may happen that a scheduler thread suspends in
`malloc` call after taking the arena mutex, therefore all subsequent
malloc calls will hang. There are a few malloc calls made by system
libraries: one in fdopen (allocating locked_FD struct), and another in
ctime. Moving code that suspends schedulers helps to avoid this
deadlock, and also solves the problem when file descriptor cannot be
open and erl_crash_dump_v returns keeping schedulers suspended.
---
 erts/emulator/beam/bif.c   |  2 +-
 erts/emulator/beam/break.c | 57 ++++++++++++++++++++------------------
 2 files changed, 31 insertions(+), 28 deletions(-)

diff --git a/erts/emulator/beam/bif.c b/erts/emulator/beam/bif.c
index c81b2a9f48..f9d54070a1 100644
--- a/erts/emulator/beam/bif.c
+++ b/erts/emulator/beam/bif.c
@@ -4271,7 +4271,7 @@ BIF_RETTYPE halt_2(BIF_ALIST_2)
 	erts_exit(ERTS_ABORT_EXIT, "");
     }
     else if (is_list(BIF_ARG_1) || BIF_ARG_1 == NIL) {
-#       define HALT_MSG_SIZE 200
+#       define HALT_MSG_SIZE 1023
         static byte halt_msg[4*HALT_MSG_SIZE+1];
         Sint written;
 
diff --git a/erts/emulator/beam/break.c b/erts/emulator/beam/break.c
index 5527b62211..df4daff230 100644
--- a/erts/emulator/beam/break.c
+++ b/erts/emulator/beam/break.c
@@ -792,33 +792,6 @@ erl_crash_dump_v(char *file, int line, const char* fmt, va_list args)
     LimitedWriterInfo lwi;
     static char* write_buffer;  /* 'static' to avoid a leak warning in valgrind */
 
-    /* Order all managed threads to block, this has to be done
-       first to guarantee that this is the only thread to generate
-       crash dump. */
-    erts_thr_progress_fatal_error_block(&tpd_buf);
-
-#ifdef ERTS_SYS_SUSPEND_SIGNAL
-    /*
-     * We suspend all scheduler threads so that we can dump some
-     * data about the currently running processes and scheduler data.
-     * We have to be very very careful when doing this as the schedulers
-     * could be anywhere.
-     */
-    sys_init_suspend_handler();
-
-    for (i = 0; i < erts_no_schedulers; i++) {
-        erts_tid_t tid = ERTS_SCHEDULER_IX(i)->tid;
-        if (!erts_equal_tids(tid,erts_thr_self()))
-            sys_thr_suspend(tid);
-    }
-
-#endif
-
-    /* Allow us to pass certain places without locking... */
-    erts_atomic32_set_mb(&erts_writing_erl_crash_dump, 1);
-    erts_tsd_set(erts_is_crash_dumping_key, (void *) 1);
-
-
     envsz = sizeof(env);
     /* ERL_CRASH_DUMP_SECONDS not set
      * if we have a heart port, break immediately
@@ -916,6 +889,36 @@ erl_crash_dump_v(char *file, int line, const char* fmt, va_list args)
     time(&now);
     erts_cbprintf(to, to_arg, "=erl_crash_dump:0.5\n%s", ctime(&now));
 
+    /* Order all managed threads to block, this has to be done
+       first to guarantee that this is the only thread to generate
+       crash dump. */
+    erts_thr_progress_fatal_error_block(&tpd_buf);
+
+#ifdef ERTS_SYS_SUSPEND_SIGNAL
+    /*
+     * We suspend all scheduler threads so that we can dump some
+     * data about the currently running processes and scheduler data.
+     * We have to be very very careful when doing this as the schedulers
+     * could be anywhere.
+     * It may happen that scheduler thread is suspended while holding
+     * malloc lock. Therefore code running in this thread must not use
+     * it, or it will deadlock. ctime and fdopen calls both use malloc
+     * internally and must be executed prior to.
+     */
+    sys_init_suspend_handler();
+
+    for (i = 0; i < erts_no_schedulers; i++) {
+        erts_tid_t tid = ERTS_SCHEDULER_IX(i)->tid;
+        if (!erts_equal_tids(tid,erts_thr_self()))
+            sys_thr_suspend(tid);
+    }
+
+#endif
+
+    /* Allow us to pass certain places without locking... */
+    erts_atomic32_set_mb(&erts_writing_erl_crash_dump, 1);
+    erts_tsd_set(erts_is_crash_dumping_key, (void *) 1);
+
     if (file != NULL)
        erts_cbprintf(to, to_arg, "The error occurred in file %s, line %d\n", file, line);
 
-- 
2.31.1

openSUSE Build Service is sponsored by