File 591c6ae3-x86-mm-fix-unmap-of-large-pages.patch of Package xen.5001

# Commit efa9596e9d167c8fb7d1c4446c10f7ca30453646
# Date 2017-05-17 17:23:15 +0200
# Author Igor Druzhinin <igor.druzhinin@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/mm: fix incorrect unmapping of 2MB and 1GB pages

The same set of functions is used to set as well as to clean
P2M entries, except that for clean operations INVALID_MFN (~0UL)
is passed as a parameter. Unfortunately, when calculating an
appropriate target order for a particular mapping INVALID_MFN
is not taken into account which leads to 4K page target order
being set each time even for 2MB and 1GB mappings. This eventually
breaks down an EPT structure irreversibly into 4K mappings which
prevents consecutive high order mappings to this area.

Signed-off-by: Igor Druzhinin <igor.druzhinin@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Acked-by: George Dunlap <george.dunlap@citrix.com>

# Commit 83520cb4aa39ebeb4eb1a7cac2e85b413e75a336
# Date 2017-06-06 14:32:54 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/NPT: deal with fallout from 2Mb/1Gb unmapping change

Commit efa9596e9d ("x86/mm: fix incorrect unmapping of 2MB and 1GB
pages") left the NPT code untouched, as there is no explicit alignment
check matching the one in EPT code. However, the now more widespread
storing of INVALID_MFN into PTEs requires adjustments:
- calculations when shattering large pages may spill into the p2m type
  field (converting p2m_populate_on_demand to p2m_grant_map_rw) - use
  OR instead of PLUS,
- the use of plain l{2,3}e_from_pfn() in p2m_pt_set_entry() results in
  all upper (flag) bits being clobbered - introduce and use
  p2m_l{2,3}e_from_pfn(), paralleling the existing L1 variant.

Reported-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Acked-by: George Dunlap <george.dunlap@citrix.com>

--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -479,12 +479,15 @@ int p2m_set_entry(struct p2m_domain *p2m
     while ( todo )
     {
         if ( hap_enabled(d) )
-            order = (!((gfn | mfn_x(mfn) | todo) &
-                       ((1ul << PAGE_ORDER_1G) - 1)) &&
+        {
+            unsigned long fn_mask = mfn_x(mfn) != INVALID_MFN ?
+                                    (gfn | mfn_x(mfn) | todo) : (gfn | todo);
+
+            order = (!(fn_mask & ((1ul << PAGE_ORDER_1G) - 1)) &&
                      hap_has_1gb) ? PAGE_ORDER_1G :
-                    (!((gfn | mfn_x(mfn) | todo) &
-                       ((1ul << PAGE_ORDER_2M) - 1)) &&
+                    (!(fn_mask & ((1ul << PAGE_ORDER_2M) - 1)) &&
                      hap_has_2mb) ? PAGE_ORDER_2M : PAGE_ORDER_4K;
+        }
         else
             order = 0;
 
--- a/xen/arch/x86/mm/p2m-ept.c
+++ b/xen/arch/x86/mm/p2m-ept.c
@@ -666,6 +666,7 @@ ept_set_entry(struct p2m_domain *p2m, un
     ept_entry_t *table, *ept_entry = NULL;
     unsigned long gfn_remainder = gfn;
     unsigned int i, target = order / EPT_TABLE_ORDER;
+    unsigned long fn_mask = mfn_x(mfn) != INVALID_MFN ? (gfn | mfn_x(mfn)) : gfn;
     int ret, rc = 0;
     bool_t direct_mmio = (p2mt == p2m_mmio_direct);
     uint8_t ipat = 0;
@@ -685,7 +686,7 @@ ept_set_entry(struct p2m_domain *p2m, un
      * 2. gfn not exceeding guest physical address width.
      * 3. passing a valid order.
      */
-    if ( ((gfn | mfn_x(mfn)) & ((1UL << order) - 1)) ||
+    if ( (fn_mask & ((1UL << order) - 1)) ||
          ((u64)gfn >> ((ept_get_wl(ept) + 1) * EPT_TABLE_ORDER)) ||
          (order % EPT_TABLE_ORDER) )
         return -EINVAL;
--- a/xen/arch/x86/mm/p2m-pt.c
+++ b/xen/arch/x86/mm/p2m-pt.c
@@ -47,13 +47,20 @@
 #undef page_to_mfn
 #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
 
-/* We may store INVALID_MFN in l1 PTEs. We need to clip this
- * to avoid trampling over higher-order bits (NX, p2m type, IOMMU flags).  We
- * seem to not need to unclip on the return path, as callers are concerned only
- * with p2m type in such cases. 
+/*
+ * We may store INVALID_MFN in PTEs.  We need to clip this to avoid trampling
+ * over higher-order bits (NX, p2m type, IOMMU flags).  We seem to not need
+ * to unclip on the read path, as callers are concerned only with p2m type in
+ * such cases.
  */
 #define p2m_l1e_from_pfn(pfn, flags)    \
     l1e_from_pfn((pfn) & (PADDR_MASK >> PAGE_SHIFT), (flags))
+#define p2m_l2e_from_pfn(pfn, flags)    \
+    l2e_from_pfn((pfn) & ((PADDR_MASK & ~(_PAGE_PSE_PAT | 0UL)) \
+                          >> PAGE_SHIFT), (flags) | _PAGE_PSE)
+#define p2m_l3e_from_pfn(pfn, flags)    \
+    l3e_from_pfn((pfn) & ((PADDR_MASK & ~(_PAGE_PSE_PAT | 0UL)) \
+                          >> PAGE_SHIFT), (flags) | _PAGE_PSE)
 
 /* PTE flags for the various types of p2m entry */
 #define P2M_BASE_FLAGS \
@@ -239,7 +246,7 @@ p2m_next_level(struct p2m_domain *p2m, v
         l1_entry = __map_domain_page(pg);
         for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
         {
-            new_entry = l1e_from_pfn(pfn + (i * L1_PAGETABLE_ENTRIES), flags);
+            new_entry = l1e_from_pfn(pfn | (i * L1_PAGETABLE_ENTRIES), flags);
             p2m_add_iommu_flags(&new_entry, 1, IOMMUF_readable|IOMMUF_writable);
             p2m->write_p2m_entry(p2m, gfn, l1_entry + i, new_entry, 2);
         }
@@ -273,7 +280,7 @@ p2m_next_level(struct p2m_domain *p2m, v
         l1_entry = __map_domain_page(pg);
         for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
         {
-            new_entry = l1e_from_pfn(pfn + i, flags);
+            new_entry = l1e_from_pfn(pfn | i, flags);
             p2m_add_iommu_flags(&new_entry, 0, 0);
             p2m->write_p2m_entry(p2m, gfn, l1_entry + i, new_entry, 1);
         }
@@ -578,8 +585,7 @@ p2m_pt_set_entry(struct p2m_domain *p2m,
 
         ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct);
         l3e_content = mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt)
-            ? l3e_from_pfn(mfn_x(mfn),
-                           p2m_type_to_flags(p2mt, mfn, 2) | _PAGE_PSE)
+            ? p2m_l3e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt, mfn, 2))
             : l3e_empty();
         entry_content.l1 = l3e_content.l3;
 
@@ -649,13 +655,9 @@ p2m_pt_set_entry(struct p2m_domain *p2m,
         }
         
         ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct);
-        if ( mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt) )
-            l2e_content = l2e_from_pfn(mfn_x(mfn),
-                                       p2m_type_to_flags(p2mt, mfn, 1) |
-                                       _PAGE_PSE);
-        else
-            l2e_content = l2e_empty();
-        
+        l2e_content = mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt)
+            ? p2m_l2e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt, mfn, 1))
+            : l2e_empty();
         entry_content.l1 = l2e_content.l2;
 
         if ( entry_content.l1 != 0 )
openSUSE Build Service is sponsored by