File 557eb55f-gnttab-per-active-entry-locking.patch of Package xen.6117

# Commit b4650e9a96d78b87ccf7deb4f74733ccfcc64db5
# Date 2015-06-15 13:22:07 +0200
# Author David Vrabel <david.vrabel@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
gnttab: per-active entry locking

Introduce a per-active entry spin lock to protect active entry state
The grant table lock must be locked before acquiring (locking) an
active entry.

This is a step in reducing contention on the grant table lock, but
will only do so once the grant table lock is turned into a read-write
lock.

Based on a patch originally by Matt Wilson <msw@amazon.com>.

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>

Index: xen-4.5.5-testing/docs/misc/grant-tables.txt
===================================================================
--- xen-4.5.5-testing.orig/docs/misc/grant-tables.txt
+++ xen-4.5.5-testing/docs/misc/grant-tables.txt
@@ -63,6 +63,7 @@ is complete.
   act->domid : remote domain being granted rights
   act->frame : machine frame being granted
   act->pin   : used to hold reference counts
+  act->lock  : spinlock used to serialize access to active entry state
 
  Map tracking
  ~~~~~~~~~~~~
@@ -74,7 +75,46 @@ is complete.
  matching map track entry is then removed, as if unmap had been invoked.
  These are not used by the transfer mechanism.
   map->domid         : owner of the mapped frame
-  map->ref_and_flags : grant reference, ro/rw, mapped for host or device access
+  map->ref           : grant reference
+  map->flags         : ro/rw, mapped for host or device access
+
+********************************************************************************
+ Locking
+ ~~~~~~~
+ Xen uses several locks to serialize access to the internal grant table state.
+
+  grant_table->lock          : lock used to prevent readers from accessing
+                               inconsistent grant table state such as current
+                               version, partially initialized active table pages,
+                               etc.
+  active_grant_entry->lock   : spinlock used to serialize modifications to
+                               active entries
+
+ The primary lock for the grant table is a spinlock. All functions
+ that access members of struct grant_table must acquire the lock
+ around critical sections.
+
+ Active entries are obtained by calling active_entry_acquire(gt, ref).
+ This function returns a pointer to the active entry after locking its
+ spinlock. The caller must hold the grant table lock for the gt in
+ question before calling active_entry_acquire(). This is because the
+ grant table can be dynamically extended via gnttab_grow_table() while
+ a domain is running and must be fully initialized. Once all access to
+ the active entry is complete, release the lock by calling
+ active_entry_release(act).
+
+ Summary of rules for locking:
+  active_entry_acquire() and active_entry_release() can only be
+  called when holding the relevant grant table's lock. I.e.:
+    spin_lock(&gt->lock);
+    act = active_entry_acquire(gt, ref);
+    ...
+    active_entry_release(act);
+    spin_unlock(&gt->lock);
+
+ Active entries cannot be acquired while holding the maptrack lock.
+ Multiple active entries can be acquired while holding the grant table
+ lock.
 
 ********************************************************************************
 
Index: xen-4.5.5-testing/xen/common/grant_table.c
===================================================================
--- xen-4.5.5-testing.orig/xen/common/grant_table.c
+++ xen-4.5.5-testing/xen/common/grant_table.c
@@ -157,10 +157,13 @@ struct active_grant_entry {
                                in the page.                           */
     unsigned      length:16; /* For sub-page grants, the length of the
                                 grant.                                */
+    spinlock_t    lock;      /* lock to protect access of this entry.
+                                see docs/misc/grant-tables.txt for
+                                locking protocol                      */
 };
 
 #define ACGNT_PER_PAGE (PAGE_SIZE / sizeof(struct active_grant_entry))
-#define active_entry(t, e) \
+#define _active_entry(t, e) \
     ((t)->active[(e)/ACGNT_PER_PAGE][(e)%ACGNT_PER_PAGE])
 
 static inline void gnttab_flush_tlb(const struct domain *d)
@@ -188,6 +191,24 @@ nr_active_grant_frames(struct grant_tabl
     return num_act_frames_from_sha_frames(nr_grant_frames(gt));
 }
 
+static inline struct active_grant_entry *
+active_entry_acquire(struct grant_table *t, grant_ref_t e)
+{
+    struct active_grant_entry *act;
+
+    ASSERT(spin_is_locked(&t->lock));
+
+    act = &_active_entry(t, e);
+    spin_lock(&act->lock);
+
+    return act;
+}
+
+static inline void active_entry_release(struct active_grant_entry *act)
+{
+    spin_unlock(&act->lock);
+}
+
 /* Check if the page has been paged out, or needs unsharing. 
    If rc == GNTST_okay, *page contains the page struct with a ref taken.
    Caller must do put_page(*page).
@@ -505,7 +526,6 @@ static int grant_map_exists(const struct
                             unsigned long mfn,
                             unsigned int *ref_count)
 {
-    const struct active_grant_entry *act;
     unsigned int ref, max_iter;
     
     ASSERT(spin_is_locked(&rgt->lock));
@@ -514,18 +534,19 @@ static int grant_map_exists(const struct
                    nr_grant_entries(rgt));
     for ( ref = *ref_count; ref < max_iter; ref++ )
     {
-        act = &active_entry(rgt, ref);
+        struct active_grant_entry *act;
+        bool_t exists;
 
-        if ( !act->pin )
-            continue;
+        act = active_entry_acquire(rgt, ref);
 
-        if ( act->domid != ld->domain_id )
-            continue;
+        exists = act->pin
+            && act->domid == ld->domain_id
+            && act->frame == mfn;
 
-        if ( act->frame != mfn )
-            continue;
+        active_entry_release(act);
 
-        return 0;
+        if ( exists )
+            return 0;
     }
 
     if ( ref < nr_grant_entries(rgt) )
@@ -546,13 +567,24 @@ static void mapcount(
 
     *wrc = *rdc = 0;
 
+    /*
+     * Must have the local domain's grant table lock when iterating
+     * over its maptrack entries.
+     */
+    ASSERT(spin_is_locked(&lgt->lock));
+    /*
+     * Must have the remote domain's grant table lock while counting
+     * its active entries.
+     */
+    ASSERT(spin_is_locked(&rd->grant_table->lock));
+
     for ( handle = 0; handle < lgt->maptrack_limit; handle++ )
     {
         map = &maptrack_entry(lgt, handle);
         if ( !(map->flags & (GNTMAP_device_map|GNTMAP_host_map)) ||
              map->domid != rd->domain_id )
             continue;
-        if ( active_entry(rd->grant_table, map->ref).frame == mfn )
+        if ( _active_entry(rd->grant_table, map->ref).frame == mfn )
             (map->flags & GNTMAP_readonly) ? (*rdc)++ : (*wrc)++;
     }
 }
@@ -639,7 +671,7 @@ __gnttab_map_grant_ref(
     if ( unlikely(op->ref >= nr_grant_entries(rgt)))
         PIN_FAIL(unlock_out, GNTST_bad_gntref, "Bad ref (%d).\n", op->ref);
 
-    act = &active_entry(rgt, op->ref);
+    act = active_entry_acquire(rgt, op->ref);
     shah = shared_entry_header(rgt, op->ref);
     if (rgt->gt_version == 1) {
         sha1 = &shared_entry_v1(rgt, op->ref);
@@ -656,7 +688,7 @@ __gnttab_map_grant_ref(
          ((act->domid != ld->domain_id) ||
           (act->pin & 0x80808080U) != 0 ||
           (act->is_sub_page)) )
-        PIN_FAIL(unlock_out, GNTST_general_error,
+        PIN_FAIL(act_release_out, GNTST_general_error,
                  "Bad domain (%d != %d), or risk of counter overflow %08x, or subpage %d\n",
                  act->domid, ld->domain_id, act->pin, act->is_sub_page);
 
@@ -667,7 +699,7 @@ __gnttab_map_grant_ref(
         if ( (rc = _set_status(rgt->gt_version, ld->domain_id,
                                op->flags & GNTMAP_readonly,
                                1, shah, act, status) ) != GNTST_okay )
-             goto unlock_out;
+            goto act_release_out;
 
         if ( !act->pin )
         {
@@ -702,6 +734,7 @@ __gnttab_map_grant_ref(
 
     cache_flags = (shah->flags & (GTF_PAT | GTF_PWT | GTF_PCD) );
 
+    active_entry_release(act);
     spin_unlock(&rgt->lock);
 
     /* pg may be set, with a refcount included, from __get_paged_frame */
@@ -851,7 +884,7 @@ __gnttab_map_grant_ref(
 
     spin_lock(&rgt->lock);
 
-    act = &active_entry(rgt, op->ref);
+    act = active_entry_acquire(rgt, op->ref);
 
     if ( op->flags & GNTMAP_device_map )
         act->pin -= (op->flags & GNTMAP_readonly) ?
@@ -868,6 +901,9 @@ __gnttab_map_grant_ref(
     if ( !act->pin )
         gnttab_clear_flag(_GTF_reading, status);
 
+ act_release_out:
+    active_entry_release(act);
+
  unlock_out:
     spin_unlock(&rgt->lock);
     op->status = rc;
@@ -970,23 +1006,31 @@ __gnttab_unmap_common(
         goto unmap_out;
     }
 
+    op->rd = rd;
+    op->ref = map->ref;
+    act = active_entry_acquire(rgt, map->ref);
+
+    /*
+     * Note that we (ab)use the active entry lock here to protect against
+     * multiple unmaps of the same mapping here. We don't want to hold lgt's
+     * lock, and we only hold rgt's lock for reading (but the latter wouldn't
+     * be the right one anyway). Hence the easiest is to rely on a lock we
+     * hold anyway; see docs/misc/grant-tables.txt's "Locking" section.
+     */
+
     flags = map->flags;
     if ( unlikely(!flags) || unlikely(map->domid != dom) )
     {
         gdprintk(XENLOG_WARNING, "Unstable handle %u\n", op->handle);
         rc = GNTST_bad_handle;
-        goto unmap_out;
+        goto act_release_out;
     }
 
-    op->rd = rd;
-    op->ref = map->ref;
-    act = &active_entry(rgt, map->ref);
-
     op->frame = act->frame;
 
     if ( op->dev_bus_addr &&
          unlikely(op->dev_bus_addr != pfn_to_paddr(act->frame)) )
-        PIN_FAIL(unmap_out, GNTST_general_error,
+        PIN_FAIL(act_release_out, GNTST_general_error,
                  "Bus address doesn't match gntref (%"PRIx64" != %"PRIpaddr")\n",
                  op->dev_bus_addr, pfn_to_paddr(act->frame));
 
@@ -995,7 +1039,7 @@ __gnttab_unmap_common(
         if ( (rc = replace_grant_host_mapping(op->host_addr,
                                               op->frame, op->new_addr, 
                                               flags)) < 0 )
-            goto unmap_out;
+            goto act_release_out;
 
         map->flags &= ~GNTMAP_host_map;
         op->done |= GNTMAP_host_map | (flags & GNTMAP_readonly);
@@ -1013,6 +1057,8 @@ __gnttab_unmap_common(
         put_handle = 1;
     }
 
+ act_release_out:
+    active_entry_release(act);
  unmap_out:
     double_gt_unlock(lgt, rgt);
 
@@ -1068,7 +1114,7 @@ __gnttab_unmap_common_complete(struct gn
     rgt = rd->grant_table;
     spin_lock(&rgt->lock);
 
-    act = &active_entry(rgt, op->ref);
+    act = active_entry_acquire(rgt, op->ref);
     sha = shared_entry_header(rgt, op->ref);
 
     if ( rgt->gt_version == 1 )
@@ -1119,6 +1165,7 @@ __gnttab_unmap_common_complete(struct gn
     if ( act->pin == 0 )
         gnttab_clear_flag(_GTF_reading, status);
 
+    active_entry_release(act);
     spin_unlock(&rgt->lock);
     rcu_unlock_domain(rd);
 }
@@ -1310,7 +1357,7 @@ gnttab_grow_table(struct domain *d, unsi
     /* d's grant table lock must be held by the caller */
 
     struct grant_table *gt = d->grant_table;
-    unsigned int i;
+    unsigned int i, j;
 
     ASSERT(req_nr_frames <= max_grant_frames);
 
@@ -1325,6 +1372,8 @@ gnttab_grow_table(struct domain *d, unsi
         if ( (gt->active[i] = alloc_xenheap_page()) == NULL )
             goto active_alloc_failed;
         clear_page(gt->active[i]);
+        for ( j = 0; j < ACGNT_PER_PAGE; j++ )
+            spin_lock_init(&gt->active[i][j].lock);
     }
 
     /* Shared */
@@ -1812,7 +1861,7 @@ __release_grant_for_copy(
 
     spin_lock(&rgt->lock);
 
-    act = &active_entry(rgt, gref);
+    act = active_entry_acquire(rgt, gref);
     sha = shared_entry_header(rgt, gref);
     r_frame = act->frame;
 
@@ -1845,6 +1894,7 @@ __release_grant_for_copy(
     if ( !act->pin )
         gnttab_clear_flag(_GTF_reading, status);
 
+    active_entry_release(act);
     spin_unlock(&rgt->lock);
 
     if ( td != rd )
@@ -1905,14 +1955,14 @@ __acquire_grant_for_copy(
     spin_lock(&rgt->lock);
 
     if ( rgt->gt_version == 0 )
-        PIN_FAIL(unlock_out, GNTST_general_error,
+        PIN_FAIL(gt_unlock_out, GNTST_general_error,
                  "remote grant table not ready\n");
 
     if ( unlikely(gref >= nr_grant_entries(rgt)) )
-        PIN_FAIL(unlock_out, GNTST_bad_gntref,
+        PIN_FAIL(gt_unlock_out, GNTST_bad_gntref,
                  "Bad grant reference %ld\n", gref);
 
-    act = &active_entry(rgt, gref);
+    act = active_entry_acquire(rgt, gref);
     shah = shared_entry_header(rgt, gref);
     if ( rgt->gt_version == 1 )
     {
@@ -1974,6 +2024,7 @@ __acquire_grant_for_copy(
          * remote table (if rd == td), so we have to drop the lock
          * here and reacquire.
          */
+        active_entry_release(act);
         spin_unlock(&rgt->lock);
 
         rc = __acquire_grant_for_copy(td, trans_gref, rd->domain_id,
@@ -1981,11 +2032,13 @@ __acquire_grant_for_copy(
                                       &trans_page_off, &trans_length, 0);
 
         spin_lock(&rgt->lock);
+        act = active_entry_acquire(rgt, gref);
 
         if ( rc != GNTST_okay )
         {
             __fixup_status_for_copy_pin(act, status);
             rcu_unlock_domain(td);
+            active_entry_release(act);
             spin_unlock(&rgt->lock);
             return rc;
         }
@@ -2007,6 +2060,7 @@ __acquire_grant_for_copy(
             __release_grant_for_copy(td, trans_gref, readonly);
             __fixup_status_for_copy_pin(act, status);
             rcu_unlock_domain(td);
+            active_entry_release(act);
             spin_unlock(&rgt->lock);
             put_page(*page);
             *page = NULL;
@@ -2111,6 +2165,7 @@ __acquire_grant_for_copy(
     *length = act->length;
     *frame = act->frame;
 
+    active_entry_release(act);
     spin_unlock(&rgt->lock);
     return rc;
  
@@ -2123,7 +2178,11 @@ __acquire_grant_for_copy(
         gnttab_clear_flag(_GTF_reading, status);
 
  unlock_out:
+    active_entry_release(act);
+
+ gt_unlock_out:
     spin_unlock(&rgt->lock);
+
     return rc;
 }
 
@@ -2438,7 +2497,6 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARA
     gnttab_set_version_t op;
     struct domain *d = current->domain;
     struct grant_table *gt = d->grant_table;
-    struct active_grant_entry *act;
     grant_entry_v1_t reserved_entries[GNTTAB_NR_RESERVED_ENTRIES];
     long res;
     int i;
@@ -2463,8 +2521,7 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARA
     {
         for ( i = GNTTAB_NR_RESERVED_ENTRIES; i < nr_grant_entries(gt); i++ )
         {
-            act = &active_entry(gt, i);
-            if ( act->pin != 0 )
+            if ( read_atomic(&_active_entry(gt, i).pin) != 0 )
             {
                 gdprintk(XENLOG_WARNING,
                          "tried to change grant table version from %d to %d, but some grant entries still in use\n",
@@ -2672,7 +2729,8 @@ __gnttab_swap_grant_ref(grant_ref_t ref_
 {
     struct domain *d = rcu_lock_current_domain();
     struct grant_table *gt = d->grant_table;
-    struct active_grant_entry *act;
+    struct active_grant_entry *act_a = NULL;
+    struct active_grant_entry *act_b = NULL;
     s16 rc = GNTST_okay;
 
     spin_lock(&gt->lock);
@@ -2686,12 +2744,16 @@ __gnttab_swap_grant_ref(grant_ref_t ref_
     if ( unlikely(ref_b >= nr_grant_entries(d->grant_table)))
         PIN_FAIL(out, GNTST_bad_gntref, "Bad ref-b (%d).\n", ref_b);
 
-    act = &active_entry(gt, ref_a);
-    if ( act->pin )
+    /* Swapping the same ref is a no-op. */
+    if ( ref_a == ref_b )
+        goto out;
+
+    act_a = active_entry_acquire(gt, ref_a);
+    if ( act_a->pin )
         PIN_FAIL(out, GNTST_eagain, "ref a %ld busy\n", (long)ref_a);
 
-    act = &active_entry(gt, ref_b);
-    if ( act->pin )
+    act_b = active_entry_acquire(gt, ref_b);
+    if ( act_b->pin )
         PIN_FAIL(out, GNTST_eagain, "ref b %ld busy\n", (long)ref_b);
 
     if ( gt->gt_version == 1 )
@@ -2718,6 +2780,10 @@ __gnttab_swap_grant_ref(grant_ref_t ref_
     }
 
 out:
+    if ( act_b != NULL )
+        active_entry_release(act_b);
+    if ( act_a != NULL )
+        active_entry_release(act_a);
     spin_unlock(&gt->lock);
 
     rcu_unlock_domain(d);
@@ -3028,7 +3094,7 @@ grant_table_create(
     struct domain *d)
 {
     struct grant_table *t;
-    int                 i;
+    unsigned int i, j;
 
     if ( (t = xzalloc(struct grant_table)) == NULL )
         goto no_mem_0;
@@ -3047,6 +3113,8 @@ grant_table_create(
         if ( (t->active[i] = alloc_xenheap_page()) == NULL )
             goto no_mem_2;
         clear_page(t->active[i]);
+        for ( j = 0; j < ACGNT_PER_PAGE; j++ )
+            spin_lock_init(&t->active[i][j].lock);
     }
 
     /* Tracking of mapped foreign frames table */
@@ -3143,7 +3211,7 @@ gnttab_release_mappings(
         rgt = rd->grant_table;
         spin_lock(&rgt->lock);
 
-        act = &active_entry(rgt, ref);
+        act = active_entry_acquire(rgt, ref);
         sha = shared_entry_header(rgt, ref);
         if (rgt->gt_version == 1)
             status = &sha->flags;
@@ -3203,6 +3271,7 @@ gnttab_release_mappings(
         if ( act->pin == 0 )
             gnttab_clear_flag(_GTF_reading, status);
 
+        active_entry_release(act);
         spin_unlock(&rgt->lock);
 
         rcu_unlock_domain(rd);
@@ -3265,9 +3334,12 @@ static void gnttab_usage_print(struct do
         uint16_t status;
         uint64_t frame;
 
-        act = &active_entry(gt, ref);
+        act = active_entry_acquire(gt, ref);
         if ( !act->pin )
+        {
+            active_entry_release(act);
             continue;
+        }
 
         sha = shared_entry_header(gt, ref);
 
@@ -3297,6 +3369,7 @@ static void gnttab_usage_print(struct do
         printk("[%3d]    %5d 0x%06lx 0x%08x      %5d 0x%06"PRIx64" 0x%02x\n",
                ref, act->domid, act->frame, act->pin,
                sha->domid, frame, status);
+        active_entry_release(act);
     }
 
  out:
openSUSE Build Service is sponsored by