File 5eda60cb-SVM-split-recalc-NPT-fault-handling.patch of Package xen.15648

# Commit 51ca66c37371b10b378513af126646de22eddb17
# Date 2020-06-05 17:12:11 +0200
# Author Igor Druzhinin <igor.druzhinin@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/svm: do not try to handle recalc NPT faults immediately

A recalculation NPT fault doesn't always require additional handling
in hvm_hap_nested_page_fault(), moreover in general case if there is no
explicit handling done there - the fault is wrongly considered fatal.

This covers a specific case of migration with vGPU assigned which
uses direct MMIO mappings made by XEN_DOMCTL_memory_mapping hypercall:
at a moment log-dirty is enabled globally, recalculation is requested
for the whole guest memory including those mapped MMIO regions
which causes a page fault being raised at the first access to them;
but due to MMIO P2M type not having any explicit handling in
hvm_hap_nested_page_fault() a domain is erroneously crashed with unhandled
SVM violation.

Instead of trying to be opportunistic - use safer approach and handle
P2M recalculation in a separate NPT fault by attempting to retry after
making the necessary adjustments. This is aligned with Intel behavior
where there are separate VMEXITs for recalculation and EPT violations
(faults) and only faults are handled in hvm_hap_nested_page_fault().
Do it by also unifying do_recalc return code with Intel implementation
where returning 1 means P2M was actually changed.

Since there was no case previously where p2m_pt_handle_deferred_changes()
could return a positive value - it's safe to replace ">= 0" with just "== 0"
in VMEXIT_NPF handler. finish_type_change() is also not affected by the
change as being able to deal with >0 return value of p2m->recalc from
EPT implementation.

Signed-off-by: Igor Druzhinin <igor.druzhinin@citrix.com>
Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -2947,9 +2947,10 @@ void svm_vmexit_handler(struct cpu_user_
             v->arch.hvm.svm.cached_insn_len = vmcb->guest_ins_len & 0xf;
         rc = vmcb->exitinfo1 & PFEC_page_present
              ? p2m_pt_handle_deferred_changes(vmcb->exitinfo2) : 0;
-        if ( rc >= 0 )
+        if ( rc == 0 )
+            /* If no recal adjustments were being made - handle this fault */
             svm_do_nested_pgfault(v, regs, vmcb->exitinfo1, vmcb->exitinfo2);
-        else
+        else if ( rc < 0 )
         {
             printk(XENLOG_G_ERR
                    "%pv: Error %d handling NPF (gpa=%08lx ec=%04lx)\n",
--- a/xen/arch/x86/mm/p2m-pt.c
+++ b/xen/arch/x86/mm/p2m-pt.c
@@ -341,6 +341,7 @@ static int do_recalc(struct p2m_domain *
     unsigned int level = 4;
     l1_pgentry_t *pent;
     int err = 0;
+    bool recalc_done = false;
 
     table = map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m)));
     while ( --level )
@@ -402,6 +403,8 @@ static int do_recalc(struct p2m_domain *
                 clear_recalc(l1, e);
                 err = p2m->write_p2m_entry(p2m, gfn, pent, e, level + 1);
                 ASSERT(!err);
+
+                recalc_done = true;
             }
         }
         unmap_domain_page((void *)((unsigned long)pent & PAGE_MASK));
@@ -448,12 +451,14 @@ static int do_recalc(struct p2m_domain *
             clear_recalc(l1, e);
         err = p2m->write_p2m_entry(p2m, gfn, pent, e, level + 1);
         ASSERT(!err);
+
+        recalc_done = true;
     }
 
  out:
     unmap_domain_page(table);
 
-    return err;
+    return err ?: recalc_done;
 }
 
 int p2m_pt_handle_deferred_changes(uint64_t gpa)
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -1194,7 +1194,7 @@ static int finish_type_change(struct p2m
         rc = p2m->recalc(p2m, gfn);
         /*
          * ept->recalc could return 0/1/-ENOMEM. pt->recalc could return
-         * 0/-ENOMEM/-ENOENT, -ENOENT isn't an error as we are looping
+         * 0/1/-ENOMEM/-ENOENT, -ENOENT isn't an error as we are looping
          * gfn here. If rc is 1 we need to have it 0 for success.
          */
         if ( rc == -ENOENT || rc > 0 )
openSUSE Build Service is sponsored by