File 4431-Revert-erts-Stop-marking-memory-regions-as-discardab.patch of Package erlang
From 06a866734b09d636b905e2a5a3423981144119ca Mon Sep 17 00:00:00 2001
From: Lukas Larsson <lukas@erlang.org>
Date: Wed, 12 Oct 2022 15:44:09 +0200
Subject: [PATCH 1/2] Revert "erts: Stop marking memory regions as discardable
(`madvise(2)`)"
This reverts commit df450823fcdb1657743c94023318abe3731366bb.
---
erts/emulator/beam/erl_alloc_util.c | 177 ++++++++++++++++++++++++++++
erts/emulator/sys/common/erl_mmap.c | 19 +++
erts/emulator/sys/common/erl_mmap.h | 64 ++++++++++
3 files changed, 260 insertions(+)
diff --git a/erts/emulator/beam/erl_alloc_util.c b/erts/emulator/beam/erl_alloc_util.c
index 84d67c3078..7f3a9b420b 100644
--- a/erts/emulator/beam/erl_alloc_util.c
+++ b/erts/emulator/beam/erl_alloc_util.c
@@ -2527,9 +2527,155 @@ mbc_alloc(Allctr_t *allctr, Uint size)
return BLK2UMEM(blk);
}
+typedef struct {
+ char *ptr;
+ UWord size;
+} ErtsMemDiscardRegion;
+
+/* Construct a discard region for the user memory of a free block, letting the
+ * OS reclaim its physical memory when required.
+ *
+ * Note that we're ignoring both the footer and everything that comes before
+ * the minimum block size as the allocator uses those areas to manage the
+ * block. */
+static void ERTS_INLINE
+mem_discard_start(Allctr_t *allocator, Block_t *block,
+ ErtsMemDiscardRegion *out)
+{
+ UWord size = BLK_SZ(block);
+
+ ASSERT(size >= allocator->min_block_size);
+
+ if (size > (allocator->min_block_size + FBLK_FTR_SZ)) {
+ out->size = size - allocator->min_block_size - FBLK_FTR_SZ;
+ } else {
+ out->size = 0;
+ }
+
+ out->ptr = (char*)block + allocator->min_block_size;
+}
+
+/* Expands a discard region into a neighboring free block, allowing us to
+ * discard the block header and first page.
+ *
+ * This is very important in small-allocation scenarios where no single block
+ * is large enough to be discarded on its own. */
+static void ERTS_INLINE
+mem_discard_coalesce(Allctr_t *allocator, Block_t *neighbor,
+ ErtsMemDiscardRegion *region)
+{
+ char *neighbor_start;
+
+ ASSERT(IS_FREE_BLK(neighbor));
+
+ neighbor_start = (char*)neighbor;
+
+ if (region->ptr >= neighbor_start) {
+ char *region_start_page;
+
+ region_start_page = region->ptr - SYS_PAGE_SIZE;
+ region_start_page = (char*)((UWord)region_start_page & ~SYS_PAGE_SZ_MASK);
+
+ /* Expand if our first page begins within the previous free block's
+ * unused data. */
+ if (region_start_page >= (neighbor_start + allocator->min_block_size)) {
+ region->size += (region->ptr - region_start_page) - FBLK_FTR_SZ;
+ region->ptr = region_start_page;
+ }
+ } else {
+ char *region_end_page;
+ UWord neighbor_size;
+
+ ASSERT(region->ptr <= neighbor_start);
+
+ region_end_page = region->ptr + region->size + SYS_PAGE_SIZE;
+ region_end_page = (char*)((UWord)region_end_page & ~SYS_PAGE_SZ_MASK);
+
+ neighbor_size = BLK_SZ(neighbor) - FBLK_FTR_SZ;
+
+ /* Expand if our last page ends anywhere within the next free block,
+ * sans the footer we'll inherit. */
+ if (region_end_page < neighbor_start + neighbor_size) {
+ region->size += region_end_page - (region->ptr + region->size);
+ }
+ }
+}
+
+static void ERTS_INLINE
+mem_discard_finish(Allctr_t *allocator, Block_t *block,
+ ErtsMemDiscardRegion *region)
+{
+#ifdef DEBUG
+ char *block_start, *block_end;
+ UWord block_size;
+
+ block_size = BLK_SZ(block);
+
+ /* Ensure that the region is completely covered by the legal area of the
+ * free block. This must hold even when the region is too small to be
+ * discarded. */
+ if (region->size > 0) {
+ ASSERT(block_size > allocator->min_block_size + FBLK_FTR_SZ);
+
+ block_start = (char*)block + allocator->min_block_size;
+ block_end = (char*)block + block_size - FBLK_FTR_SZ;
+
+ ASSERT(region->size == 0 ||
+ (region->ptr + region->size <= block_end &&
+ region->ptr >= block_start &&
+ region->size <= block_size));
+ }
+#else
+ (void)allocator;
+ (void)block;
+#endif
+
+ if (region->size > SYS_PAGE_SIZE) {
+ UWord align_offset, size;
+ char *ptr;
+
+ align_offset = SYS_PAGE_SIZE - ((UWord)region->ptr & SYS_PAGE_SZ_MASK);
+
+ size = (region->size - align_offset) & ~SYS_PAGE_SZ_MASK;
+ ptr = region->ptr + align_offset;
+
+ if (size > 0) {
+ ASSERT(!((UWord)ptr & SYS_PAGE_SZ_MASK));
+ ASSERT(!(size & SYS_PAGE_SZ_MASK));
+
+ erts_mem_discard(ptr, size);
+ }
+ }
+}
+
+static void
+carrier_mem_discard_free_blocks(Allctr_t *allocator, Carrier_t *carrier)
+{
+ static const int MAX_BLOCKS_TO_DISCARD = 100;
+ Block_t *block;
+ int i;
+
+ block = allocator->first_fblk_in_mbc(allocator, carrier);
+ i = 0;
+
+ while (block != NULL && i < MAX_BLOCKS_TO_DISCARD) {
+ ErtsMemDiscardRegion region;
+
+ ASSERT(IS_FREE_BLK(block));
+
+ mem_discard_start(allocator, block, ®ion);
+ mem_discard_finish(allocator, block, ®ion);
+
+ block = allocator->next_fblk_in_mbc(allocator, carrier, block);
+ i++;
+ }
+}
+
static void
mbc_free(Allctr_t *allctr, ErtsAlcType_t type, void *p, Carrier_t **busy_pcrr_pp)
{
+ ErtsMemDiscardRegion discard_region = {0};
+ int discard;
Uint is_first_blk;
Uint is_last_blk;
Uint blk_sz;
@@ -2545,6 +2691,21 @@ mbc_free(Allctr_t *allctr, ErtsAlcType_t type, void *p, Carrier_t **busy_pcrr_pp
ASSERT(IS_MBC_BLK(blk));
ASSERT(blk_sz >= allctr->min_block_size);
+#ifndef DEBUG
+ /* We want to mark freed blocks as reclaimable to the OS, but it's a fairly
+ * expensive operation which doesn't do much good if we use it again soon
+ * after, so we limit it to deallocations on pooled carriers. */
+ discard = busy_pcrr_pp && *busy_pcrr_pp;
+#else
+ /* Always discard in debug mode, regardless of whether we're in the pool or
+ * not. */
+ discard = 1;
+#endif
+
+ if (discard) {
+ mem_discard_start(allctr, blk, &discard_region);
+ }
+
HARD_CHECK_BLK_CARRIER(allctr, blk);
crr = ABLK_TO_MBC(blk);
@@ -2562,6 +2723,10 @@ mbc_free(Allctr_t *allctr, ErtsAlcType_t type, void *p, Carrier_t **busy_pcrr_pp
blk = PREV_BLK(blk);
(*allctr->unlink_free_block)(allctr, blk);
+ if (discard) {
+ mem_discard_coalesce(allctr, blk, &discard_region);
+ }
+
blk_sz += MBC_FBLK_SZ(blk);
is_first_blk = IS_MBC_FIRST_FBLK(allctr, blk);
SET_MBC_FBLK_SZ(blk, blk_sz);
@@ -2578,6 +2743,10 @@ mbc_free(Allctr_t *allctr, ErtsAlcType_t type, void *p, Carrier_t **busy_pcrr_pp
/* Coalesce with next block... */
(*allctr->unlink_free_block)(allctr, nxt_blk);
+ if (discard) {
+ mem_discard_coalesce(allctr, nxt_blk, &discard_region);
+ }
+
blk_sz += MBC_FBLK_SZ(nxt_blk);
SET_MBC_FBLK_SZ(blk, blk_sz);
@@ -2614,6 +2783,10 @@ mbc_free(Allctr_t *allctr, ErtsAlcType_t type, void *p, Carrier_t **busy_pcrr_pp
(*allctr->link_free_block)(allctr, blk);
HARD_CHECK_BLK_CARRIER(allctr, blk);
+ if (discard) {
+ mem_discard_finish(allctr, blk, &discard_region);
+ }
+
if (busy_pcrr_pp && *busy_pcrr_pp) {
update_pooled_tree(allctr, crr, blk_sz);
} else {
@@ -3758,8 +3931,12 @@ abandon_carrier(Allctr_t *allctr, Carrier_t *crr)
unlink_carrier(&allctr->mbc_list, crr);
allctr->remove_mbc(allctr, crr);
+ /* Mark our free blocks as unused and reclaimable to the OS. */
+ carrier_mem_discard_free_blocks(allctr, crr);
+
cpool_insert(allctr, crr);
+
iallctr = erts_atomic_read_nob(&crr->allctr);
if (allctr == crr->cpool.orig_allctr) {
/* preserve HOMECOMING flag */
diff --git a/erts/emulator/sys/common/erl_mmap.c b/erts/emulator/sys/common/erl_mmap.c
index 6ec21beb08..e3300e3c43 100644
--- a/erts/emulator/sys/common/erl_mmap.c
+++ b/erts/emulator/sys/common/erl_mmap.c
@@ -32,6 +32,25 @@
#include <sys/mman.h>
#endif
+int erts_mem_guard(void *p, UWord size) {
+#if defined(WIN32)
+ DWORD oldProtect;
+ BOOL success;
+
+ success = VirtualProtect((LPVOID*)p,
+ size,
+ PAGE_NOACCESS,
+ &oldProtect);
+
+ return success ? 0 : -1;
+#elif defined(HAVE_SYS_MMAN_H)
+ return mprotect(p, size, PROT_NONE);
+#else
+ errno = ENOTSUP;
+ return -1;
+#endif
+}
+
#if HAVE_ERTS_MMAP
/* #define ERTS_MMAP_OP_RINGBUF_SZ 100 */
diff --git a/erts/emulator/sys/common/erl_mmap.h b/erts/emulator/sys/common/erl_mmap.h
index c13a35516b..e4e9dfb7ae 100644
--- a/erts/emulator/sys/common/erl_mmap.h
+++ b/erts/emulator/sys/common/erl_mmap.h
@@ -181,4 +181,68 @@ void hard_dbg_remove_mseg(void* seg, UWord sz);
#endif /* HAVE_ERTS_MMAP */
+/* Marks the given memory region as permanently inaccessible.
+ *
+ * Returns 0 on success, and -1 on error. */
+int erts_mem_guard(void *p, UWord size);
+
+/* Marks the given memory region as unused without freeing it, letting the OS
+ * reclaim its physical memory with the promise that we'll get it back (without
+ * its contents) the next time it's accessed. */
+ERTS_GLB_INLINE void erts_mem_discard(void *p, UWord size);
+
+#if ERTS_GLB_INLINE_INCL_FUNC_DEF
+
+#ifdef VALGRIND
+ #include <valgrind/memcheck.h>
+
+ ERTS_GLB_INLINE void erts_mem_discard(void *ptr, UWord size) {
+ VALGRIND_MAKE_MEM_UNDEFINED(ptr, size);
+ }
+#elif defined(DEBUG)
+ /* Try to provoke crashes by filling the discard region with garbage. It's
+ * extremely hard to find bugs where we've discarded too much, as the
+ * region often retains its old contents if it's accessed before the OS
+ * reclaims it. */
+ ERTS_GLB_INLINE void erts_mem_discard(void *ptr, UWord size) {
+ static const char pattern[] = "DISCARDED";
+ char *data;
+ int i;
+
+ for(i = 0, data = ptr; i < size; i++) {
+ data[i] = pattern[i % sizeof(pattern)];
+ }
+ }
+#elif defined(HAVE_SYS_MMAN_H) && defined(HAVE_MADVISE) && !(defined(__sun) || defined(__sun__))
+ #include <sys/mman.h>
+
+ ERTS_GLB_INLINE void erts_mem_discard(void *ptr, UWord size) {
+ /* Note that we don't fall back to MADV_DONTNEED since it promises that
+ * the given region will be zeroed on access, which turned out to be
+ * too much of a performance hit. */
+ #ifdef MADV_FREE
+ madvise(ptr, size, MADV_FREE);
+ #else
+ (void)ptr;
+ (void)size;
+ #endif
+ }
+#elif defined(_WIN32)
+ #include <winbase.h>
+
+ /* MEM_RESET is defined on all supported versions of Windows, and has the
+ * same semantics as MADV_FREE. */
+ ERTS_GLB_INLINE void erts_mem_discard(void *ptr, UWord size) {
+ VirtualAlloc(ptr, size, MEM_RESET, PAGE_READWRITE);
+ }
+#else
+ /* Dummy implementation. */
+ ERTS_GLB_INLINE void erts_mem_discard(void *ptr, UWord size) {
+ (void)ptr;
+ (void)size;
+ }
+#endif
+
+#endif /* ERTS_GLB_INLINE_INCL_FUNC_DEF */
+
#endif /* ERL_MMAP_H__ */
--
2.35.3