File makedumpfile-exclude-unnecessary-hugepages.patch of Package makedumpfile

From: Atsushi Kumagai <kumagai-atsushi@mxc.nes.nec.co.jp>
Date: Thu Aug 21 08:55:54 2014 +0900
Subject: [PATCH v4] Exclude unnecessary hugepages.
References: bnc#873232
Patch-mainline: v1.5.7
Git-commit: e8b4f93b3260defe86f5e13ca7536c07f2e32914

There are 2 types of hugepages in the kernel, the both should be
excluded as user pages.

1. Transparent huge pages (THP)
All the pages are anonymous pages (at least for now), so we should
just get how many pages are in the corresponding hugepage.
It can be gotten from the page->lru.prev of the second page in the
hugepage.

2. Hugetlbfs pages
The pages aren't anonymous pages but kind of user pages, we should
exclude also these pages in any way.
Luckily, it's possible to detect these pages by looking the
page->lru.next of the second page in the hugepage. This idea came
from the kernel's PageHuge().
The number of pages can be gotten in the same way as THP.

Changelog:
v4:
  - Cleaned up according to Petr's and Baoquan's comments.
v3:
  - Cleaned up according to Petr's comments.
  - Fix misdetection of hugetlb pages.
v2:
  - Rebased to "Generic multi-page exclusion".

Signed-off-by: Atsushi Kumagai <kumagai-atsushi@mxc.nes.nec.co.jp>
Acked-by: Petr Tesarik <ptesarik@suse.cz>

---
 makedumpfile.c |   86 +++++++++++++++++++++++++++++++++++++++++++++++----------
 makedumpfile.h |    7 ++++
 2 files changed, 78 insertions(+), 15 deletions(-)

--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -1177,6 +1177,7 @@ get_symbol_info(void)
 	SYMBOL_INIT(vmemmap_list, "vmemmap_list");
 	SYMBOL_INIT(mmu_psize_defs, "mmu_psize_defs");
 	SYMBOL_INIT(mmu_vmemmap_psize, "mmu_vmemmap_psize");
+	SYMBOL_INIT(free_huge_page, "free_huge_page");
 
 	return TRUE;
 }
@@ -1290,6 +1291,15 @@ get_structure_info(void)
 	ENUM_NUMBER_INIT(PG_slab, "PG_slab");
 	ENUM_NUMBER_INIT(PG_hwpoison, "PG_hwpoison");
 
+	ENUM_NUMBER_INIT(PG_head_mask, "PG_head_mask");
+	if (NUMBER(PG_head_mask) == NOT_FOUND_NUMBER) {
+		ENUM_NUMBER_INIT(PG_head, "PG_head");
+		if (NUMBER(PG_head) == NOT_FOUND_NUMBER)
+			ENUM_NUMBER_INIT(PG_head, "PG_compound");
+		if (NUMBER(PG_head) != NOT_FOUND_NUMBER)
+			NUMBER(PG_head_mask) = 1UL << NUMBER(PG_head);
+	}
+
 	ENUM_TYPE_SIZE_INIT(pageflags, "pageflags");
 
 	TYPEDEF_SIZE_INIT(nodemask_t, "nodemask_t");
@@ -1524,6 +1534,9 @@ get_value_for_old_linux(void)
 		NUMBER(PG_swapcache) = PG_swapcache_ORIGINAL;
 	if (NUMBER(PG_slab) == NOT_FOUND_NUMBER)
 		NUMBER(PG_slab) = PG_slab_ORIGINAL;
+	if (NUMBER(PG_head_mask) == NOT_FOUND_NUMBER)
+		NUMBER(PG_head_mask) = 1L << PG_compound_ORIGINAL;
+
 	/*
 	 * The values from here are for free page filtering based on
 	 * mem_map array. These are minimum effort to cover old
@@ -1691,6 +1704,7 @@ write_vmcoreinfo_data(void)
 	WRITE_SYMBOL("vmemmap_list", vmemmap_list);
 	WRITE_SYMBOL("mmu_psize_defs", mmu_psize_defs);
 	WRITE_SYMBOL("mmu_vmemmap_psize", mmu_vmemmap_psize);
+	WRITE_SYMBOL("free_huge_page", free_huge_page);
 
 	/*
 	 * write the structure size of 1st kernel
@@ -1780,6 +1794,7 @@ write_vmcoreinfo_data(void)
 
 	WRITE_NUMBER("PG_lru", PG_lru);
 	WRITE_NUMBER("PG_private", PG_private);
+	WRITE_NUMBER("PG_head_mask", PG_head_mask);
 	WRITE_NUMBER("PG_swapcache", PG_swapcache);
 	WRITE_NUMBER("PG_buddy", PG_buddy);
 	WRITE_NUMBER("PG_slab", PG_slab);
@@ -2030,6 +2045,7 @@ read_vmcoreinfo(void)
 	READ_SYMBOL("vmemmap_list", vmemmap_list);
 	READ_SYMBOL("mmu_psize_defs", mmu_psize_defs);
 	READ_SYMBOL("mmu_vmemmap_psize", mmu_vmemmap_psize);
+	READ_SYMBOL("free_huge_page", free_huge_page);
 
 	READ_STRUCTURE_SIZE("page", page);
 	READ_STRUCTURE_SIZE("mem_section", mem_section);
@@ -2106,6 +2122,7 @@ read_vmcoreinfo(void)
 
 	READ_NUMBER("PG_lru", PG_lru);
 	READ_NUMBER("PG_private", PG_private);
+	READ_NUMBER("PG_head_mask", PG_head_mask);
 	READ_NUMBER("PG_swapcache", PG_swapcache);
 	READ_NUMBER("PG_slab", PG_slab);
 	READ_NUMBER("PG_buddy", PG_buddy);
@@ -4633,11 +4650,14 @@ __exclude_unnecessary_pages(unsigned lon
     unsigned long long pfn_start, unsigned long long pfn_end, struct cycle *cycle)
 {
 	unsigned long long pfn, pfn_mm, maddr;
+	unsigned long long *pfn_counter;
+	unsigned long long nr_pages;
 	unsigned long long pfn_read_start, pfn_read_end, index_pg;
 	unsigned char page_cache[SIZE(page) * PGMM_CACHED];
 	unsigned char *pcache;
-	unsigned int _count, _mapcount = 0;
+	unsigned int _count, _mapcount = 0, compound_order = 0;
 	unsigned long flags, mapping, private = 0;
+	unsigned long compound_dtor;
 
 	/*
 	 * If a multi-page exclusion is pending, do it first
@@ -4703,11 +4723,36 @@ __exclude_unnecessary_pages(unsigned lon
 		flags   = ULONG(pcache + OFFSET(page.flags));
 		_count  = UINT(pcache + OFFSET(page._count));
 		mapping = ULONG(pcache + OFFSET(page.mapping));
+
+		if ((index_pg < PGMM_CACHED - 1) &&
+		    isCompoundHead(flags)) {
+			compound_order = ULONG(pcache + SIZE(page) + OFFSET(page.lru)
+					       + OFFSET(list_head.prev));
+			compound_dtor = ULONG(pcache + SIZE(page) + OFFSET(page.lru)
+					     + OFFSET(list_head.next));
+
+			if ((compound_order >= sizeof(unsigned long) * 8)
+			    || ((pfn & ((1UL << compound_order) - 1)) != 0)) {
+				/* Invalid order */
+				compound_order = 0;
+			}
+		} else {
+			/*
+			 * The last pfn of the mem_map cache must not be compound page
+			 * since all compound pages are aligned to its page order and
+			 * PGMM_CACHED is a power of 2.
+			 */
+			compound_order = 0;
+			compound_dtor = 0;
+		}
+
 		if (OFFSET(page._mapcount) != NOT_FOUND_STRUCTURE)
 			_mapcount = UINT(pcache + OFFSET(page._mapcount));
 		if (OFFSET(page.private) != NOT_FOUND_STRUCTURE)
 			private = ULONG(pcache + OFFSET(page.private));
 
+		nr_pages = 1 << compound_order;
+		pfn_counter = NULL;
 		/*
 		 * Exclude the free page managed by a buddy
 		 * Use buddy identification of free pages whether cyclic or not.
@@ -4715,12 +4760,8 @@ __exclude_unnecessary_pages(unsigned lon
 		if ((info->dump_level & DL_EXCLUDE_FREE)
 		    && info->page_is_buddy
 		    && info->page_is_buddy(flags, _mapcount, private, _count)) {
-			int nr_pages = 1 << private;
-
-			exclude_range(&pfn_free, pfn, pfn + nr_pages, cycle);
-
-			pfn += nr_pages - 1;
-			mem_map += (nr_pages - 1) * SIZE(page);
+			nr_pages = 1 << private;
+			pfn_counter = &pfn_free;
 		}
 		/*
 		 * Exclude the cache page without the private page.
@@ -4728,8 +4769,7 @@ __exclude_unnecessary_pages(unsigned lon
 		else if ((info->dump_level & DL_EXCLUDE_CACHE)
 		    && (isLRU(flags) || isSwapCache(flags))
 		    && !isPrivate(flags) && !isAnon(mapping)) {
-			if (clear_bit_on_2nd_bitmap_for_kernel(pfn, cycle))
-				pfn_cache++;
+			pfn_counter = &pfn_cache;
 		}
 		/*
 		 * Exclude the cache page with the private page.
@@ -4737,23 +4777,39 @@ __exclude_unnecessary_pages(unsigned lon
 		else if ((info->dump_level & DL_EXCLUDE_CACHE_PRI)
 		    && (isLRU(flags) || isSwapCache(flags))
 		    && !isAnon(mapping)) {
-			if (clear_bit_on_2nd_bitmap_for_kernel(pfn, cycle))
-				pfn_cache_private++;
+			pfn_counter = &pfn_cache_private;
 		}
 		/*
 		 * Exclude the data page of the user process.
+		 *  - anonymous pages
+		 *  - hugetlbfs pages
 		 */
 		else if ((info->dump_level & DL_EXCLUDE_USER_DATA)
-		    && isAnon(mapping)) {
-			if (clear_bit_on_2nd_bitmap_for_kernel(pfn, cycle))
-				pfn_user++;
+			 && (isAnon(mapping) || isHugetlb(compound_dtor))) {
+			pfn_counter = &pfn_user;
 		}
 		/*
 		 * Exclude the hwpoison page.
 		 */
 		else if (isHWPOISON(flags)) {
+			pfn_counter = &pfn_hwpoison;
+		}
+		/*
+		 * Unexcludable page
+		 */
+		else
+			continue;
+
+		/*
+		 * Execute exclusion
+		 */
+		if (nr_pages == 1) {
 			if (clear_bit_on_2nd_bitmap_for_kernel(pfn, cycle))
-				pfn_hwpoison++;
+				(*pfn_counter)++;
+		} else {
+			exclude_range(pfn_counter, pfn, pfn + nr_pages, cycle);
+			pfn += nr_pages - 1;
+			mem_map += (nr_pages - 1) * SIZE(page);
 		}
 	}
 	return TRUE;
--- a/makedumpfile.h
+++ b/makedumpfile.h
@@ -74,6 +74,7 @@ int get_mem_type(void);
 #define PG_lru_ORIGINAL	 	(5)
 #define PG_slab_ORIGINAL	(7)
 #define PG_private_ORIGINAL	(11)	/* Has something at ->private */
+#define PG_compound_ORIGINAL	(14)	/* Is part of a compound page */
 #define PG_swapcache_ORIGINAL	(15)	/* Swap page: swp_entry_t in private */
 
 #define PAGE_BUDDY_MAPCOUNT_VALUE_v2_6_38	(-2)
@@ -148,6 +149,9 @@ test_bit(int nr, unsigned long addr)
 
 #define isLRU(flags)		test_bit(NUMBER(PG_lru), flags)
 #define isPrivate(flags)	test_bit(NUMBER(PG_private), flags)
+#define isCompoundHead(flags)   (!!((flags) & NUMBER(PG_head_mask)))
+#define isHugetlb(dtor)         ((SYMBOL(free_huge_page) != NOT_FOUND_SYMBOL) \
+				 && (SYMBOL(free_huge_page) == dtor))
 #define isSwapCache(flags)	test_bit(NUMBER(PG_swapcache), flags)
 #define isHWPOISON(flags)	(test_bit(NUMBER(PG_hwpoison), flags) \
 				&& (NUMBER(PG_hwpoison) != NOT_FOUND_NUMBER))
@@ -1141,6 +1145,7 @@ struct symbol_table {
 	unsigned long long	node_remap_start_vaddr;
 	unsigned long long	node_remap_end_vaddr;
 	unsigned long long	node_remap_start_pfn;
+	unsigned long long      free_huge_page;
 
 	/*
 	 * for Xen extraction
@@ -1426,6 +1431,8 @@ struct number_table {
 	 */
 	long	PG_lru;
 	long	PG_private;
+	long	PG_head;
+	long	PG_head_mask;
 	long	PG_swapcache;
 	long	PG_buddy;
 	long	PG_slab;
openSUSE Build Service is sponsored by