File 18844-shared-page-EOI.patch of Package xen

# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1227878852 0
# Node ID c820bf73a914f643ab48864629c0559e68ceede1
# Parent  8dbf23c89cc6a4fbd7b9063b14e706c065ba1678
x86: add a shared page indicating the need for an EOI notification

To simplify the interface for the guest, when a guest uses this new
(sub-)hypercall, PHYSDEVOP_eoi behavior changes to unmask the
corresponding event channel at once, avoiding the eventual need for a
second hypercall from the guest.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>

18846:
x86: Fix PHYSDEVOP_pirq_eoi_mfn, which I modified and broke.

Signed-off-by: Keir Fraser <keir.fraser@citrix.com>

18851:
x86: Fix mfn_to_virt() to cast MFN to address size.
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>

Index: xen-3.3.1-testing/xen/arch/x86/domain.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/domain.c
+++ xen-3.3.1-testing/xen/arch/x86/domain.c
@@ -1812,6 +1812,13 @@ int domain_relinquish_resources(struct d
             unmap_vcpu_info(v);
         }
 
+        if ( d->arch.pirq_eoi_map != NULL )
+        {
+            unmap_domain_page_global(d->arch.pirq_eoi_map);
+            put_page_and_type(mfn_to_page(d->arch.pirq_eoi_map_mfn));
+            d->arch.pirq_eoi_map = NULL;
+        }
+
         d->arch.relmem = RELMEM_xen;
         /* fallthrough */
 
Index: xen-3.3.1-testing/xen/arch/x86/irq.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/irq.c
+++ xen-3.3.1-testing/xen/arch/x86/irq.c
@@ -18,6 +18,7 @@
 #include <xen/iommu.h>
 #include <asm/msi.h>
 #include <asm/current.h>
+#include <asm/flushtlb.h>
 #include <public/physdev.h>
 
 /* opt_noirqbalance: If true, software IRQ balancing/affinity is disabled. */
@@ -206,16 +207,42 @@ struct pending_eoi {
 static DEFINE_PER_CPU(struct pending_eoi, pending_eoi[NR_VECTORS]);
 #define pending_eoi_sp(p) ((p)[NR_VECTORS-1].vector)
 
+static inline void set_pirq_eoi(struct domain *d, unsigned int irq)
+{
+    if ( d->arch.pirq_eoi_map )
+        set_bit(irq, d->arch.pirq_eoi_map);
+}
+
+static inline void clear_pirq_eoi(struct domain *d, unsigned int irq)
+{
+    if ( d->arch.pirq_eoi_map )
+        clear_bit(irq, d->arch.pirq_eoi_map);
+}
+
+static void _irq_guest_eoi(irq_desc_t *desc)
+{
+    irq_guest_action_t *action = (irq_guest_action_t *)desc->action;
+    unsigned int i, vector = desc - irq_desc;
+
+    if ( !(desc->status & IRQ_GUEST_EOI_PENDING) )
+        return;
+
+    for ( i = 0; i < action->nr_guests; ++i )
+        clear_pirq_eoi(action->guest[i],
+                       domain_vector_to_irq(action->guest[i], vector));
+
+    desc->status &= ~(IRQ_INPROGRESS|IRQ_GUEST_EOI_PENDING);
+    desc->handler->enable(vector);
+}
+
 static struct timer irq_guest_eoi_timer[NR_IRQS];
 static void irq_guest_eoi_timer_fn(void *data)
 {
     irq_desc_t *desc = data;
-    unsigned vector = desc - irq_desc;
     unsigned long flags;
 
     spin_lock_irqsave(&desc->lock, flags);
-    desc->status &= ~IRQ_INPROGRESS;
-    desc->handler->enable(vector);
+    _irq_guest_eoi(desc);
     spin_unlock_irqrestore(&desc->lock, flags);
 }
 
@@ -272,8 +299,22 @@ static void __do_IRQ_guest(int vector)
 
     if ( already_pending == action->nr_guests )
     {
-        desc->handler->disable(vector);
         stop_timer(&irq_guest_eoi_timer[vector]);
+        desc->handler->disable(vector);
+        desc->status |= IRQ_GUEST_EOI_PENDING;
+        for ( i = 0; i < already_pending; ++i )
+        {
+            d = action->guest[i];
+            set_pirq_eoi(d, domain_vector_to_irq(d, vector));
+            /*
+             * Could check here whether the guest unmasked the event by now
+             * (or perhaps just re-issue the send_guest_pirq()), and if it
+             * can now accept the event,
+             * - clear all the pirq_eoi bits we already set,
+             * - re-enable the vector, and
+             * - skip the timer setup below.
+             */
+        }
         init_timer(&irq_guest_eoi_timer[vector],
                    irq_guest_eoi_timer_fn, desc, smp_processor_id());
         set_timer(&irq_guest_eoi_timer[vector], NOW() + MILLISECS(1));
@@ -382,8 +423,12 @@ static void __pirq_guest_eoi(struct doma
     action = (irq_guest_action_t *)desc->action;
     vector = desc - irq_desc;
 
-    ASSERT(!test_bit(irq, d->pirq_mask) ||
-           (action->ack_type != ACKTYPE_NONE));
+    if ( action->ack_type == ACKTYPE_NONE )
+    {
+        ASSERT(!test_bit(irq, d->pirq_mask));
+        stop_timer(&irq_guest_eoi_timer[vector]);
+        _irq_guest_eoi(desc);
+    }
 
     if ( unlikely(!test_and_clear_bit(irq, d->pirq_mask)) ||
          unlikely(--action->in_flight != 0) )
@@ -604,6 +649,11 @@ int pirq_guest_bind(struct vcpu *v, int 
 
     action->guest[action->nr_guests++] = v->domain;
 
+    if ( action->ack_type != ACKTYPE_NONE )
+        set_pirq_eoi(v->domain, irq);
+    else
+        clear_pirq_eoi(v->domain, irq);
+
  unlock_out:
     spin_unlock_irq(&desc->lock);
  out:
Index: xen-3.3.1-testing/xen/arch/x86/physdev.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/physdev.c
+++ xen-3.3.1-testing/xen/arch/x86/physdev.c
@@ -204,10 +204,50 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
         ret = -EFAULT;
         if ( copy_from_guest(&eoi, arg, 1) != 0 )
             break;
+        ret = -EINVAL;
+        if ( eoi.irq < 0 || eoi.irq >= NR_IRQS )
+            break;
+        if ( v->domain->arch.pirq_eoi_map )
+            evtchn_unmask(v->domain->pirq_to_evtchn[eoi.irq]);
         ret = pirq_guest_eoi(v->domain, eoi.irq);
         break;
     }
 
+    case PHYSDEVOP_pirq_eoi_mfn: {
+        struct physdev_pirq_eoi_mfn info;
+
+        BUILD_BUG_ON(NR_IRQS > (PAGE_SIZE * 8));
+
+        ret = -EFAULT;
+        if ( copy_from_guest(&info, arg, 1) != 0 )
+            break;
+
+        ret = -EINVAL;
+        if ( !mfn_valid(info.mfn) ||
+             !get_page_and_type(mfn_to_page(info.mfn), v->domain,
+                                PGT_writable_page) )
+            break;
+
+        if ( cmpxchg(&v->domain->arch.pirq_eoi_map_mfn, 0, info.mfn) != 0 )
+        {
+            put_page_and_type(mfn_to_page(info.mfn));
+            ret = -EBUSY;
+            break;
+        }
+
+        v->domain->arch.pirq_eoi_map = map_domain_page_global(info.mfn);
+        if ( v->domain->arch.pirq_eoi_map == NULL )
+        {
+            v->domain->arch.pirq_eoi_map_mfn = 0;
+            put_page_and_type(mfn_to_page(info.mfn));
+            ret = -ENOSPC;
+            break;
+        }
+
+        ret = 0;
+        break;
+    }
+
     /* Legacy since 0x00030202. */
     case PHYSDEVOP_IRQ_UNMASK_NOTIFY: {
         ret = pirq_guest_unmask(v->domain);
Index: xen-3.3.1-testing/xen/arch/x86/x86_64/physdev.c
===================================================================
--- xen-3.3.1-testing.orig/xen/arch/x86/x86_64/physdev.c
+++ xen-3.3.1-testing/xen/arch/x86/x86_64/physdev.c
@@ -18,6 +18,9 @@
 #define physdev_eoi                compat_physdev_eoi
 #define physdev_eoi_t              physdev_eoi_compat_t
 
+#define physdev_pirq_eoi_mfn       compat_physdev_pirq_eoi_mfn
+#define physdev_pirq_eoi_mfn_t     physdev_pirq_eoi_mfn_compat_t
+
 #define physdev_set_iobitmap       compat_physdev_set_iobitmap
 #define physdev_set_iobitmap_t     physdev_set_iobitmap_compat_t
 
Index: xen-3.3.1-testing/xen/common/event_channel.c
===================================================================
--- xen-3.3.1-testing.orig/xen/common/event_channel.c
+++ xen-3.3.1-testing/xen/common/event_channel.c
@@ -762,10 +762,9 @@ long evtchn_bind_vcpu(unsigned int port,
 }
 
 
-static long evtchn_unmask(evtchn_unmask_t *unmask)
+int evtchn_unmask(unsigned int port)
 {
     struct domain *d = current->domain;
-    int            port = unmask->port;
     struct vcpu   *v;
 
     spin_lock(&d->event_lock);
@@ -916,7 +915,7 @@ long do_event_channel_op(int cmd, XEN_GU
         struct evtchn_unmask unmask;
         if ( copy_from_guest(&unmask, arg, 1) != 0 )
             return -EFAULT;
-        rc = evtchn_unmask(&unmask);
+        rc = evtchn_unmask(unmask.port);
         break;
     }
 
Index: xen-3.3.1-testing/xen/include/asm-x86/domain.h
===================================================================
--- xen-3.3.1-testing.orig/xen/include/asm-x86/domain.h
+++ xen-3.3.1-testing/xen/include/asm-x86/domain.h
@@ -239,6 +239,10 @@ struct arch_domain
     int vector_pirq[NR_VECTORS];
     int pirq_vector[NR_PIRQS];
 
+    /* Shared page for notifying that explicit PIRQ EOI is required. */
+    unsigned long *pirq_eoi_map;
+    unsigned long pirq_eoi_map_mfn;
+
     /* Pseudophysical e820 map (XENMEM_memory_map).  */
     struct e820entry e820[3];
     unsigned int nr_e820;
Index: xen-3.3.1-testing/xen/include/public/physdev.h
===================================================================
--- xen-3.3.1-testing.orig/xen/include/public/physdev.h
+++ xen-3.3.1-testing/xen/include/public/physdev.h
@@ -41,6 +41,21 @@ typedef struct physdev_eoi physdev_eoi_t
 DEFINE_XEN_GUEST_HANDLE(physdev_eoi_t);
 
 /*
+ * Register a shared page for the hypervisor to indicate whether the guest
+ * must issue PHYSDEVOP_eoi. The semantics of PHYSDEVOP_eoi change slightly
+ * once the guest used this function in that the associated event channel
+ * will automatically get unmasked. The page registered is used as a bit
+ * array indexed by Xen's PIRQ value.
+ */
+#define PHYSDEVOP_pirq_eoi_mfn          17
+struct physdev_pirq_eoi_mfn {
+    /* IN */
+    xen_pfn_t mfn;
+};
+typedef struct physdev_pirq_eoi_mfn physdev_pirq_eoi_mfn_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_pirq_eoi_mfn_t);
+
+/*
  * Query the status of an IRQ line.
  * @arg == pointer to physdev_irq_status_query structure.
  */
Index: xen-3.3.1-testing/xen/include/xen/event.h
===================================================================
--- xen-3.3.1-testing.orig/xen/include/xen/event.h
+++ xen-3.3.1-testing/xen/include/xen/event.h
@@ -44,6 +44,9 @@ int evtchn_send(struct domain *d, unsign
 /* Bind a local event-channel port to the specified VCPU. */
 long evtchn_bind_vcpu(unsigned int port, unsigned int vcpu_id);
 
+/* Unmask a local event-channel port. */
+int evtchn_unmask(unsigned int port);
+
 /* Allocate/free a Xen-attached event channel port. */
 int alloc_unbound_xen_event_channel(
     struct vcpu *local_vcpu, domid_t remote_domid);
Index: xen-3.3.1-testing/xen/include/xen/irq.h
===================================================================
--- xen-3.3.1-testing.orig/xen/include/xen/irq.h
+++ xen-3.3.1-testing/xen/include/xen/irq.h
@@ -22,6 +22,7 @@ struct irqaction
 #define IRQ_PENDING	4	/* IRQ pending - replay on enable */
 #define IRQ_REPLAY	8	/* IRQ has been replayed but not acked yet */
 #define IRQ_GUEST       16      /* IRQ is handled by guest OS(es) */
+#define IRQ_GUEST_EOI_PENDING 32 /* IRQ was disabled, pending a guest EOI */
 #define IRQ_PER_CPU     256     /* IRQ is per CPU */
 
 /*
openSUSE Build Service is sponsored by