File 20120-x86-srat-check-discontig.patch of Package xen

# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1251208722 -3600
# Node ID 50a5950e6a243f10e0b9cd67b95518e1ef4efe87
# Parent  dc2aebb0e1d08b98a11d2e8a3bf7dc6435338e7f
x86 numa: Fix SRAT check for discontig memory
References: bnc#534146

We currently compare the sum of the pages found in the SRAT table to
the address of the highest memory page found via the e820 table to
validate the SRAT.  This is completely bogus if there's any kind of
discontiguous memory, where the sum of the pages could be much smaller
than the address of the highest page.  I think all that's necessary is
to validate that each usable memory range in the e820 is covered by an
SRAT entry.  This might not be the most efficient way to do it, but
there are usually a relatively small number of entries on each side.

Signed-off-by: Alex Williamson <alex.williamson@hp.com>

# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1251709817 -3600
# Node ID 27b3bd0a47fd8b020ce06567658237f3d25227f9
# Parent  cf95641ba1d0a6a868f161a45a48f8aac9dd54d3
x86/numa: fix c/s 20120 (Fix SRAT check for discontig memory)

That change converted the (wrong) assumption of contiguous nodes'
memory to a similarly wrong one of assuming discontiguous memory (i.e.
each node having separate E820 table entries). The code ought to be
able to deal with both, though, and I hope this change makes it so.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Acked-by: Alex Williamson <alex.williamson@hp.com>

--- a/xen/arch/x86/srat.c
+++ b/xen/arch/x86/srat.c
@@ -17,6 +17,7 @@
 #include <xen/nodemask.h>
 #include <xen/acpi.h>
 #include <xen/numa.h>
+#include <asm/e820.h>
 #include <asm/page.h>
 
 static struct acpi_table_slit *acpi_slit;
@@ -217,23 +218,39 @@ acpi_numa_memory_affinity_init(struct ac
 static int nodes_cover_memory(void)
 {
 	int i;
-	u64 pxmram, e820ram;
 
-	pxmram = 0;
-	for_each_node_mask(i, nodes_parsed) {
-		u64 s = nodes[i].start >> PAGE_SHIFT;
-		u64 e = nodes[i].end >> PAGE_SHIFT;
-		pxmram += e - s;
-	}
+	for (i = 0; i < e820.nr_map; i++) {
+		int j, found;
+		unsigned long long start, end;
 
-	e820ram = max_page;
-	/* We seem to lose 3 pages somewhere. Allow a bit of slack. */
-	if ((long)(e820ram - pxmram) >= 1*1024*1024) {
-		printk(KERN_ERR "SRAT: PXMs only cover %"PRIu64"MB of your %"
-			PRIu64"MB e820 RAM. Not used.\n",
-			(pxmram << PAGE_SHIFT) >> 20,
-			(e820ram << PAGE_SHIFT) >> 20);
-		return 0;
+		if (e820.map[i].type != E820_RAM) {
+			continue;
+		}
+
+		start = e820.map[i].addr;
+		end = e820.map[i].addr + e820.map[i].size - 1;
+
+		do {
+			found = 0;
+			for_each_node_mask(j, nodes_parsed)
+				if (start < nodes[j].end
+				    && end > nodes[j].start) {
+					if (start >= nodes[j].start) {
+						start = nodes[j].end;
+						found = 1;
+					}
+					if (end <= nodes[j].end) {
+						end = nodes[j].start;
+						found = 1;
+					}
+				}
+		} while (found && start < end);
+
+		if (start < end) {
+			printk(KERN_ERR "SRAT: No PXM for e820 range: "
+				"%016Lx - %016Lx\n", start, end);
+			return 0;
+		}
 	}
 	return 1;
 }
openSUSE Build Service is sponsored by