File 6961-erts-crash-dump-improvements.patch of Package erlang
From 63e1d641eaa6bb861702400672acc6fbbbe88bb6 Mon Sep 17 00:00:00 2001
From: Maxim Fedorov <maximfca@gmail.com>
Date: Sun, 24 Oct 2021 18:27:32 -0700
Subject: [PATCH] erts: crash dump improvements
Crash dump slogan tends to be quite long, and limiting it to
200 characters does not allow to understand the cause even in simple
cases (like read-only HOME directory when `erl -name name` wants to
write a .cookie file). Extending `halt` reason helps to have a longer
slogan.
When SUSPEND signal is available, crash dump thread suspends all other
scheduler threads. It may happen that a scheduler thread suspends in
`malloc` call after taking the arena mutex, therefore all subsequent
malloc calls will hang. There are a few malloc calls made by system
libraries: one in fdopen (allocating locked_FD struct), and another in
ctime. Moving code that suspends schedulers helps to avoid this
deadlock, and also solves the problem when file descriptor cannot be
open and erl_crash_dump_v returns keeping schedulers suspended.
---
erts/emulator/beam/bif.c | 2 +-
erts/emulator/beam/break.c | 57 ++++++++++++++++++++------------------
2 files changed, 31 insertions(+), 28 deletions(-)
diff --git a/erts/emulator/beam/bif.c b/erts/emulator/beam/bif.c
index c81b2a9f48..f9d54070a1 100644
--- a/erts/emulator/beam/bif.c
+++ b/erts/emulator/beam/bif.c
@@ -4271,7 +4271,7 @@ BIF_RETTYPE halt_2(BIF_ALIST_2)
erts_exit(ERTS_ABORT_EXIT, "");
}
else if (is_list(BIF_ARG_1) || BIF_ARG_1 == NIL) {
-# define HALT_MSG_SIZE 200
+# define HALT_MSG_SIZE 1023
static byte halt_msg[4*HALT_MSG_SIZE+1];
Sint written;
diff --git a/erts/emulator/beam/break.c b/erts/emulator/beam/break.c
index 5527b62211..df4daff230 100644
--- a/erts/emulator/beam/break.c
+++ b/erts/emulator/beam/break.c
@@ -792,33 +792,6 @@ erl_crash_dump_v(char *file, int line, const char* fmt, va_list args)
LimitedWriterInfo lwi;
static char* write_buffer; /* 'static' to avoid a leak warning in valgrind */
- /* Order all managed threads to block, this has to be done
- first to guarantee that this is the only thread to generate
- crash dump. */
- erts_thr_progress_fatal_error_block(&tpd_buf);
-
-#ifdef ERTS_SYS_SUSPEND_SIGNAL
- /*
- * We suspend all scheduler threads so that we can dump some
- * data about the currently running processes and scheduler data.
- * We have to be very very careful when doing this as the schedulers
- * could be anywhere.
- */
- sys_init_suspend_handler();
-
- for (i = 0; i < erts_no_schedulers; i++) {
- erts_tid_t tid = ERTS_SCHEDULER_IX(i)->tid;
- if (!erts_equal_tids(tid,erts_thr_self()))
- sys_thr_suspend(tid);
- }
-
-#endif
-
- /* Allow us to pass certain places without locking... */
- erts_atomic32_set_mb(&erts_writing_erl_crash_dump, 1);
- erts_tsd_set(erts_is_crash_dumping_key, (void *) 1);
-
-
envsz = sizeof(env);
/* ERL_CRASH_DUMP_SECONDS not set
* if we have a heart port, break immediately
@@ -916,6 +889,36 @@ erl_crash_dump_v(char *file, int line, const char* fmt, va_list args)
time(&now);
erts_cbprintf(to, to_arg, "=erl_crash_dump:0.5\n%s", ctime(&now));
+ /* Order all managed threads to block, this has to be done
+ first to guarantee that this is the only thread to generate
+ crash dump. */
+ erts_thr_progress_fatal_error_block(&tpd_buf);
+
+#ifdef ERTS_SYS_SUSPEND_SIGNAL
+ /*
+ * We suspend all scheduler threads so that we can dump some
+ * data about the currently running processes and scheduler data.
+ * We have to be very very careful when doing this as the schedulers
+ * could be anywhere.
+ * It may happen that scheduler thread is suspended while holding
+ * malloc lock. Therefore code running in this thread must not use
+ * it, or it will deadlock. ctime and fdopen calls both use malloc
+ * internally and must be executed prior to.
+ */
+ sys_init_suspend_handler();
+
+ for (i = 0; i < erts_no_schedulers; i++) {
+ erts_tid_t tid = ERTS_SCHEDULER_IX(i)->tid;
+ if (!erts_equal_tids(tid,erts_thr_self()))
+ sys_thr_suspend(tid);
+ }
+
+#endif
+
+ /* Allow us to pass certain places without locking... */
+ erts_atomic32_set_mb(&erts_writing_erl_crash_dump, 1);
+ erts_tsd_set(erts_is_crash_dumping_key, (void *) 1);
+
if (file != NULL)
erts_cbprintf(to, to_arg, "The error occurred in file %s, line %d\n", file, line);
--
2.31.1