File 1001-jit-Refactor-sigaltstack-2-handling.patch of Package erlang
From 7468f1702cf29c1c69dd9b70d5983f34a504842d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?John=20H=C3=B6gberg?= <john@erlang.org>
Date: Wed, 26 Apr 2023 11:22:40 +0200
Subject: [PATCH 1/2] jit: Refactor sigaltstack(2) handling
We used to attempt to override the C library's signal handler
setup procedure with our own that added the SA_ONSTACK flag, but it
only worked with `GNU libc` which is not always the current libc.
As many of our users liked to run docker images with `Alpine` which
uses `musl` instead, they got needlessly bad performance without
knowing it.
Instead, we now explicitly add SA_ONSTACK to our own uses of
sigaction(2) and ignore the library problem altogether because:
1. We don't care about this problem on non-scheduler threads: if a
library wants to fiddle around with signals on its own threads
then it doesn't affect us.
2. We don't care about this problem when executing on the runtime
stack: if a NIF or driver uses signals in a creative manner
locally during a call, then that's fine as long as they restore
them before returning to Erlang code.
A NIF or driver that doesn't do this is misbehaving to begin
with and we can't shield ourselves against that.
3. If a library that we're statically linked to messes around with
signals in the initialization phase (think C++ constructors of
static objects), all of it will happen before `main` runs and
we'll set things straight in `sys_init_signal_stack`.
If a dynamically linked library does the same, the same
restrictions as ordinary NIF/driver calls apply to the
initialization phase and the library must restore the signals
before returning.
If any threads are created in either of these phases, they're
still not scheduler threads so we don't have to care then
either.
---
erts/configure.ac | 21 +-
erts/emulator/sys/unix/erl_main.c | 3 +
erts/emulator/sys/unix/sys.c | 22 +-
erts/emulator/sys/unix/sys_signal_stack.c | 240 +++-------------------
4 files changed, 57 insertions(+), 229 deletions(-)
diff --git a/erts/configure.ac b/erts/configure.ac
index 540cc4b3cb..a63d391fb3 100644
--- a/erts/configure.ac
+++ b/erts/configure.ac
@@ -2951,24 +2951,13 @@ AS_IF([test ${enable_jit} != no],
# https://undeadly.org/cgi?action=article;sid=20180310000858
enable_native_stack=no
],
- [win32*],
[
- # Windows never messes with the stack, so it's safe by default.
+ # Windows never messes with the stack under any circumstances (and RSP
+ # can safely be used as a general-purpose register).
+ #
+ # On other platforms we'll wrangle sigaltstack(2) to let signals execute
+ # on an alternate stack.
enable_native_stack=yes
- ],
- [
- # Use the native stack if we can safely redirect OS signals to a
- # different stack.
- AC_MSG_CHECKING([for safe signal delivery])
- AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include <signal.h>]], [[#if defined(__APPLE__) && defined(__MACH__) && !defined(__DARWIN__)
- #define __DARWIN__ 1
- #endif
- #if !(defined(__GLIBC__) || defined(__DARWIN__) || defined(__NetBSD__) || defined(__FreeBSD__) || defined(__sun__))
- #error "Unknown libc. Assume musl, which does not allow safe signals"
- #endif]])],[AC_MSG_RESULT([yes])
- enable_native_stack=yes],[AC_MSG_RESULT([no, disabling native stack in JIT])
- enable_native_stack=no
- ])
])
case "$JIT_ARCH" in
diff --git a/erts/emulator/sys/unix/erl_main.c b/erts/emulator/sys/unix/erl_main.c
index 972b93a505..05af1e6f02 100644
--- a/erts/emulator/sys/unix/erl_main.c
+++ b/erts/emulator/sys/unix/erl_main.c
@@ -27,6 +27,9 @@
int
main(int argc, char **argv)
{
+ /* Must be done before we have a chance to spawn any scheduler threads. */
+ sys_init_signal_stack();
+
erl_start(argc, argv);
return 0;
}
diff --git a/erts/emulator/sys/unix/sys.c b/erts/emulator/sys/unix/sys.c
index 210d7a5543..4f3656c3cb 100644
--- a/erts/emulator/sys/unix/sys.c
+++ b/erts/emulator/sys/unix/sys.c
@@ -256,9 +256,6 @@ erts_sys_pre_init(void)
/* After creation in parent */
eid.thread_create_parent_func = thr_create_cleanup,
- /* Must be done really early. */
- sys_init_signal_stack();
-
#ifdef ERTS_ENABLE_LOCK_COUNT
erts_lcnt_pre_thr_init();
#endif
@@ -341,12 +338,27 @@ erl_sys_init(void)
SIGFUNC sys_signal(int sig, SIGFUNC func)
{
struct sigaction act, oact;
+ int extra_flags = 0;
sigemptyset(&act.sa_mask);
- act.sa_flags = 0;
+
+#if (defined(BEAMASM) && defined(NATIVE_ERLANG_STACK))
+ /* The JIT assumes that signals don't execute on the current stack (as our
+ * Erlang process stacks may be too small to execute a signal handler).
+ *
+ * Make sure the SA_ONSTACK flag is set when needed so that signals execute
+ * on their own signal-specific stack. */
+ if (func != SIG_DFL && func != SIG_IGN) {
+ extra_flags |= SA_ONSTACK;
+ }
+#endif
+
+ act.sa_flags = extra_flags;
act.sa_handler = func;
+
sigaction(sig, &act, &oact);
- return(oact.sa_handler);
+
+ return oact.sa_handler;
}
#undef sigprocmask
diff --git a/erts/emulator/sys/unix/sys_signal_stack.c b/erts/emulator/sys/unix/sys_signal_stack.c
index 3bd7d98a89..9ce88cf358 100644
--- a/erts/emulator/sys/unix/sys_signal_stack.c
+++ b/erts/emulator/sys/unix/sys_signal_stack.c
@@ -31,12 +31,40 @@
* have room for the Unix signal handler.
*
* There is a way to redirect signal handlers to an "alternate" signal stack by
- * using the SA_ONSTACK flag with the sigaction() library call. Unfortunately,
- * this has to be specified explicitly for each signal, and it is difficult to
+ * using the SA_ONSTACK flag with the sigaction(2) system call. Unfortunately,
+ * this has to be specified explicitly for each signal, and it is impossible to
* enforce given the presence of libraries.
*
- * Our solution is to override the C library's signal handler setup procedure
- * with our own which enforces the SA_ONSTACK flag.
+ * We used to attempt to override the C library's signal handler setup
+ * procedure with our own that added the SA_ONSTACK flag, but it only worked
+ * with `GNU libc` which is not always the current libc. As many of our users
+ * liked to run docker images with `Alpine` which uses `musl` instead, they got
+ * needlessly bad performance without knowing it.
+ *
+ * Instead, we now explicitly add SA_ONSTACK to our own uses of sigaction(2)
+ * and ignore the library problem altogether because:
+ *
+ * 1. We don't care about this problem on non-scheduler threads: if a library
+ * wants to fiddle around with signals on its own threads then it doesn't
+ * affect us.
+ * 2. We don't care about this problem when executing on the runtime stack:
+ * if a NIF or driver uses signals in a creative manner locally during a
+ * call, then that's fine as long as they restore them before returning to
+ * Erlang code.
+ *
+ * A NIF or driver that doesn't do this is misbehaving to begin with and
+ * we can't shield ourselves against that.
+ * 3. If a library that we're statically linked to messes around with signals
+ * in the initialization phase (think C++ constructors of static objects),
+ * all of it will happen before `main` runs and we'll set things straight
+ * in `sys_init_signal_stack`.
+ *
+ * If a dynamically linked library does the same, the same restrictions as
+ * ordinary NIF/driver calls apply to the initialization phase and the
+ * library must restore the signals before returning.
+ *
+ * If any threads are created in either of these phases, they're still not
+ * scheduler threads so we don't have to care then either.
*/
#ifdef HAVE_CONFIG_H
@@ -53,208 +81,6 @@
#if (defined(BEAMASM) && defined(NATIVE_ERLANG_STACK))
-#if defined(__GLIBC__) && __GLIBC__ == 2 && (__GLIBC_MINOR__ >= 3)
-/*
- * __libc_sigaction() is the core routine.
- * Without libpthread, sigaction() and __sigaction() are both aliases
- * for __libc_sigaction().
- * libpthread redefines __sigaction() as a non-trivial wrapper around
- * __libc_sigaction(), and makes sigaction() an alias for __sigaction().
- * glibc has internal calls to both sigaction() and __sigaction().
- *
- * Overriding __libc_sigaction() would be ideal, but doing so breaks
- * libpthread (threads hang).
- *
- * Overriding __sigaction(), using dlsym RTLD_NEXT to find glibc's
- * version of __sigaction(), works with glibc-2.2.4 and 2.2.5.
- * Unfortunately, this solution doesn't work with earlier versions,
- * including glibc-2.2.2 and glibc-2.1.92 (2.2 despite its name):
- * 2.2.2 SIGSEGVs in dlsym RTLD_NEXT (known glibc bug), and 2.1.92
- * SIGSEGVs inexplicably in two test cases in the HiPE test suite.
- *
- * Instead we only override sigaction() and call __sigaction()
- * directly. This should work for HiPE/x86 as long as only the Posix
- * signal interface is used, i.e. there are no calls to simulated
- * old BSD or SysV interfaces.
- * glibc's internal calls to __sigaction() appear to be mostly safe.
- * sys_init_signal_stack() fixes some unsafe ones, e.g. the SIGPROF handler.
- */
-#ifndef __USE_GNU
-# define __USE_GNU /* to un-hide RTLD_NEXT */
-#endif
-#define NEXT_SIGACTION "__sigaction"
-#define LIBC_SIGACTION __sigaction
-#define OVERRIDE_SIGACTION
-#endif /* glibc >= 2.3 */
-
-/* Is there no standard identifier for Darwin/MacOSX ? */
-#if defined(__APPLE__) && defined(__MACH__) && !defined(__DARWIN__)
-#define __DARWIN__ 1
-#endif
-
-#if defined(__DARWIN__)
-/*
- * Assumes Mac OS X >= 10.3 (dlsym operations not available in 10.2 and
- * earlier).
- *
- * The code below assumes that is part of the main image (earlier
- * in the load order than libSystem and certainly before any dylib
- * that might use sigaction) -- a standard RTLD_NEXT caveat.
- *
- * _sigaction lives in /usr/lib/libSystem.B.dylib and can be found
- * with the standard dlsym(RTLD_NEXT) call. The proviso on Mac OS X
- * being that the symbol for dlsym doesn't include a leading '_'.
- *
- * The other _sigaction, _sigaction_no_bind I don't understand the purpose
- * of and don't modify.
- */
-#define NEXT_SIGACTION "sigaction"
-#define LIBC_SIGACTION _sigaction
-#undef OVERRIDE_SIGACTION
-#define _NSIG NSIG
-#endif /* __DARWIN__ */
-
-#if defined(__sun__)
-/*
- * Assume Solaris/x86 2.8.
- * There is a number of sigaction() procedures in libc:
- * * sigaction(): weak reference to _sigaction().
- * * _sigaction(): apparently a simple wrapper around __sigaction().
- * * __sigaction(): apparently the procedure doing the actual system call.
- * * _libc_sigaction(): apparently some thread-related wrapper, which ends
- * up calling __sigaction().
- * The threads library redefines sigaction() and _sigaction() to its
- * own wrapper, which checks for and restricts access to threads-related
- * signals. The wrapper appears to eventually call libc's __sigaction().
- *
- * We catch and override _sigaction() since overriding __sigaction()
- * causes fatal errors in some cases.
- *
- * When linked with thread support, there are calls to sigaction() before
- * our init routine has had a chance to find _sigaction()'s address.
- * This forces us to initialise at the first call.
- */
-#define NEXT_SIGACTION "_sigaction"
-#define LIBC_SIGACTION _sigaction
-#define OVERRIDE_SIGACTION
-#define _NSIG NSIG
-#endif /* __sun__ */
-
-#if defined(__FreeBSD__)
-/*
- * This is a copy of Darwin code for FreeBSD.
- * CAVEAT: detailed semantics are not verified yet.
- */
-#define NEXT_SIGACTION "sigaction"
-#define LIBC_SIGACTION _sigaction
-#undef OVERRIDE_SIGACTION
-#define _NSIG NSIG
-#endif /* __FreeBSD__ */
-
-#if defined(__NetBSD__)
-/*
- * Note: This is only stub code to allow the build to succeed.
- * Whether this actually provides the needed overrides for safe
- * signal delivery or not is unknown.
- */
-#undef NEXT_SIGACTION
-#undef OVERRIDE_SIGACTION
-#endif /* __NetBSD__ */
-
-#if !(defined(__GLIBC__) || defined(__DARWIN__) || defined(__NetBSD__) || \
- defined(__FreeBSD__) || defined(__sun__))
-/*
- * Unknown libc -- assume musl, which does not allow safe signals
- */
-#error "beamasm requires a libc that can guarantee that sigaltstack works"
-#endif /* !(__GLIBC__ || __DARWIN__ || __NetBSD__ || __FreeBSD__ || \
- * __sun__) \
- */
-
-#if defined(NEXT_SIGACTION)
-/*
- * Initialize a function pointer to the libc core sigaction routine,
- * to be used by our wrappers.
- */
-#include <dlfcn.h>
-
-static int (*next_sigaction)(int, const struct sigaction *, struct sigaction *);
-
-static void do_init(void) {
- next_sigaction = dlsym(RTLD_NEXT, NEXT_SIGACTION);
-
- if (next_sigaction != 0) {
- return;
- }
-
- perror("dlsym");
- abort();
-}
-
-#define INIT() \
- do { \
- if (!next_sigaction) \
- do_init(); \
- } while (0)
-#else /* !defined(NEXT_SIGACTION) */
-#define INIT() \
- do { \
- } while (0)
-#endif /* !defined(NEXT_SIGACTION) */
-
-#if defined(NEXT_SIGACTION)
-/*
- * This is our wrapper for sigaction(). sigaction() can be called before
- * sys_init_signal_stack() has been executed, especially when threads support
- * has been linked with the executable. Therefore, we must initialise
- * next_sigaction() dynamically, the first time it's needed.
- */
-static int my_sigaction(int signum,
- const struct sigaction *act,
- struct sigaction *oldact) {
- struct sigaction newact;
-
- INIT();
-
- if (act && act->sa_handler != SIG_DFL && act->sa_handler != SIG_IGN &&
- !(act->sa_flags & SA_ONSTACK)) {
- newact = *act;
- newact.sa_flags |= SA_ONSTACK;
- act = &newact;
- }
- return next_sigaction(signum, act, oldact);
-}
-#endif
-
-#if defined(LIBC_SIGACTION)
-
-/*
- * This overrides the C library's core sigaction() procedure, catching
- * all its internal calls.
- */
-extern int LIBC_SIGACTION(int, const struct sigaction *, struct sigaction *);
-
-int LIBC_SIGACTION(int signum,
- const struct sigaction *act,
- struct sigaction *oldact) {
- return my_sigaction(signum, act, oldact);
-}
-
-#endif
-
-#if defined(OVERRIDE_SIGACTION)
-
-/*
- * This catches the application's own sigaction() calls.
- */
-int sigaction(int signum,
- const struct sigaction *act,
- struct sigaction *oldact) {
- return my_sigaction(signum, act, oldact);
-}
-
-#endif
-
/*
* Set alternate signal stack for the invoking thread.
*/
@@ -287,8 +113,6 @@ void sys_init_signal_stack(void) {
struct sigaction sa;
int i;
- INIT();
-
sys_thread_init_signal_stack();
for (i = 1; i < _NSIG; ++i) {
--
2.35.3