File 627549d6-IO-shutdown-race.patch of Package xen.30241
# Commit b7e0d8978810b534725e94a321736496928f00a5
# Date 2022-05-06 17:16:22 +0100
# Author Julien Grall <jgrall@amazon.com>
# Committer Julien Grall <jgrall@amazon.com>
xen: io: Fix race between sending an I/O and domain shutdown
Xen provides hypercalls to shutdown (SCHEDOP_shutdown{,_code}) and
resume a domain (XEN_DOMCTL_resumedomain). They can be used for checkpoint
where the expectation is the domain should continue as nothing happened
afterwards.
hvmemul_do_io() and handle_pio() will act differently if the return
code of hvm_send_ioreq() (resp. hvmemul_do_pio_buffer()) is X86EMUL_RETRY.
In this case, the I/O state will be reset to STATE_IOREQ_NONE (i.e
no I/O is pending) and/or the PC will not be advanced.
If the shutdown request happens right after the I/O was sent to the
IOREQ, then emulation code will end up to re-execute the instruction
and therefore forward again the same I/O (at least when reading IO port).
This would be problem if the access has a side-effect. A dumb example,
is a device implementing a counter which is incremented by one for every
access. When running shutdown/resume in a loop, the value read by the
OS may not be the old value + 1.
Add an extra boolean in the structure hvm_vcpu_io to indicate whether
the I/O was suspended. This is then used in place of checking the domain
is shutting down in hvmemul_do_io() and handle_pio() as they should
act on suspend (i.e. vcpu_start_shutdown_deferral() returns false) rather
than shutdown.
Signed-off-by: Julien Grall <jgrall@amazon.com>
Reviewed-by: Paul Durrant <paul@xen.org>
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -239,6 +239,7 @@ static int hvmemul_do_io(
     ASSERT(p.count);
 
     vio->io_req = p;
+    vio->suspended = false;
 
     rc = hvm_io_intercept(&p);
 
@@ -334,7 +335,7 @@ static int hvmemul_do_io(
         else
         {
             rc = hvm_send_ioreq(s, &p, 0);
-            if ( rc != X86EMUL_RETRY || currd->is_shutting_down )
+            if ( rc != X86EMUL_RETRY || vio->suspended )
                 vio->io_req.state = STATE_IOREQ_NONE;
             else if ( !hvm_ioreq_needs_completion(&vio->io_req) )
                 rc = X86EMUL_OKAY;
--- a/xen/arch/x86/hvm/io.c
+++ b/xen/arch/x86/hvm/io.c
@@ -152,10 +152,11 @@ bool handle_pio(uint16_t port, unsigned
 
     case X86EMUL_RETRY:
         /*
-         * We should not advance RIP/EIP if the domain is shutting down or
-         * if X86EMUL_RETRY has been returned by an internal handler.
+         * We should not advance RIP/EIP if the vio was suspended (e.g.
+         * because the domain is shutting down) or if X86EMUL_RETRY has
+         * been returned by an internal handler.
          */
-        if ( curr->domain->is_shutting_down || !hvm_io_pending(curr) )
+        if ( vio->suspended || !hvm_io_pending(curr) )
             return false;
         break;
 
--- a/xen/arch/x86/hvm/ioreq.c
+++ b/xen/arch/x86/hvm/ioreq.c
@@ -1444,6 +1444,7 @@ int hvm_send_ioreq(struct hvm_ioreq_serv
     struct vcpu *curr = current;
     struct domain *d = curr->domain;
     struct hvm_ioreq_vcpu *sv;
+    struct hvm_vcpu_io *vio = &curr->arch.hvm.hvm_io;
 
     ASSERT(s);
 
@@ -1451,7 +1452,10 @@ int hvm_send_ioreq(struct hvm_ioreq_serv
         return hvm_send_buffered_ioreq(s, proto_p);
 
     if ( unlikely(!vcpu_start_shutdown_deferral(curr)) )
+    {
+        vio->suspended = true;
         return X86EMUL_RETRY;
+    }
 
     list_for_each_entry ( sv,
                           &s->ioreq_vcpu_list,
--- a/xen/include/asm-x86/hvm/vcpu.h
+++ b/xen/include/asm-x86/hvm/vcpu.h
@@ -54,6 +54,11 @@ struct hvm_mmio_cache {
 struct hvm_vcpu_io {
     /* I/O request in flight to device model. */
     enum hvm_io_completion io_completion;
+    /*
+     * Indicate whether the I/O was not handled because the domain
+     * is about to be paused.
+     */
+    bool                   suspended;
     ioreq_t                io_req;
 
     /*