File 557eb55f-gnttab-per-active-entry-locking.patch of Package xen.6117
# Commit b4650e9a96d78b87ccf7deb4f74733ccfcc64db5
# Date 2015-06-15 13:22:07 +0200
# Author David Vrabel <david.vrabel@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
gnttab: per-active entry locking
Introduce a per-active entry spin lock to protect active entry state
The grant table lock must be locked before acquiring (locking) an
active entry.
This is a step in reducing contention on the grant table lock, but
will only do so once the grant table lock is turned into a read-write
lock.
Based on a patch originally by Matt Wilson <msw@amazon.com>.
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Index: xen-4.5.5-testing/docs/misc/grant-tables.txt
===================================================================
--- xen-4.5.5-testing.orig/docs/misc/grant-tables.txt
+++ xen-4.5.5-testing/docs/misc/grant-tables.txt
@@ -63,6 +63,7 @@ is complete.
act->domid : remote domain being granted rights
act->frame : machine frame being granted
act->pin : used to hold reference counts
+ act->lock : spinlock used to serialize access to active entry state
Map tracking
~~~~~~~~~~~~
@@ -74,7 +75,46 @@ is complete.
matching map track entry is then removed, as if unmap had been invoked.
These are not used by the transfer mechanism.
map->domid : owner of the mapped frame
- map->ref_and_flags : grant reference, ro/rw, mapped for host or device access
+ map->ref : grant reference
+ map->flags : ro/rw, mapped for host or device access
+
+********************************************************************************
+ Locking
+ ~~~~~~~
+ Xen uses several locks to serialize access to the internal grant table state.
+
+ grant_table->lock : lock used to prevent readers from accessing
+ inconsistent grant table state such as current
+ version, partially initialized active table pages,
+ etc.
+ active_grant_entry->lock : spinlock used to serialize modifications to
+ active entries
+
+ The primary lock for the grant table is a spinlock. All functions
+ that access members of struct grant_table must acquire the lock
+ around critical sections.
+
+ Active entries are obtained by calling active_entry_acquire(gt, ref).
+ This function returns a pointer to the active entry after locking its
+ spinlock. The caller must hold the grant table lock for the gt in
+ question before calling active_entry_acquire(). This is because the
+ grant table can be dynamically extended via gnttab_grow_table() while
+ a domain is running and must be fully initialized. Once all access to
+ the active entry is complete, release the lock by calling
+ active_entry_release(act).
+
+ Summary of rules for locking:
+ active_entry_acquire() and active_entry_release() can only be
+ called when holding the relevant grant table's lock. I.e.:
+ spin_lock(>->lock);
+ act = active_entry_acquire(gt, ref);
+ ...
+ active_entry_release(act);
+ spin_unlock(>->lock);
+
+ Active entries cannot be acquired while holding the maptrack lock.
+ Multiple active entries can be acquired while holding the grant table
+ lock.
********************************************************************************
Index: xen-4.5.5-testing/xen/common/grant_table.c
===================================================================
--- xen-4.5.5-testing.orig/xen/common/grant_table.c
+++ xen-4.5.5-testing/xen/common/grant_table.c
@@ -157,10 +157,13 @@ struct active_grant_entry {
in the page. */
unsigned length:16; /* For sub-page grants, the length of the
grant. */
+ spinlock_t lock; /* lock to protect access of this entry.
+ see docs/misc/grant-tables.txt for
+ locking protocol */
};
#define ACGNT_PER_PAGE (PAGE_SIZE / sizeof(struct active_grant_entry))
-#define active_entry(t, e) \
+#define _active_entry(t, e) \
((t)->active[(e)/ACGNT_PER_PAGE][(e)%ACGNT_PER_PAGE])
static inline void gnttab_flush_tlb(const struct domain *d)
@@ -188,6 +191,24 @@ nr_active_grant_frames(struct grant_tabl
return num_act_frames_from_sha_frames(nr_grant_frames(gt));
}
+static inline struct active_grant_entry *
+active_entry_acquire(struct grant_table *t, grant_ref_t e)
+{
+ struct active_grant_entry *act;
+
+ ASSERT(spin_is_locked(&t->lock));
+
+ act = &_active_entry(t, e);
+ spin_lock(&act->lock);
+
+ return act;
+}
+
+static inline void active_entry_release(struct active_grant_entry *act)
+{
+ spin_unlock(&act->lock);
+}
+
/* Check if the page has been paged out, or needs unsharing.
If rc == GNTST_okay, *page contains the page struct with a ref taken.
Caller must do put_page(*page).
@@ -505,7 +526,6 @@ static int grant_map_exists(const struct
unsigned long mfn,
unsigned int *ref_count)
{
- const struct active_grant_entry *act;
unsigned int ref, max_iter;
ASSERT(spin_is_locked(&rgt->lock));
@@ -514,18 +534,19 @@ static int grant_map_exists(const struct
nr_grant_entries(rgt));
for ( ref = *ref_count; ref < max_iter; ref++ )
{
- act = &active_entry(rgt, ref);
+ struct active_grant_entry *act;
+ bool_t exists;
- if ( !act->pin )
- continue;
+ act = active_entry_acquire(rgt, ref);
- if ( act->domid != ld->domain_id )
- continue;
+ exists = act->pin
+ && act->domid == ld->domain_id
+ && act->frame == mfn;
- if ( act->frame != mfn )
- continue;
+ active_entry_release(act);
- return 0;
+ if ( exists )
+ return 0;
}
if ( ref < nr_grant_entries(rgt) )
@@ -546,13 +567,24 @@ static void mapcount(
*wrc = *rdc = 0;
+ /*
+ * Must have the local domain's grant table lock when iterating
+ * over its maptrack entries.
+ */
+ ASSERT(spin_is_locked(&lgt->lock));
+ /*
+ * Must have the remote domain's grant table lock while counting
+ * its active entries.
+ */
+ ASSERT(spin_is_locked(&rd->grant_table->lock));
+
for ( handle = 0; handle < lgt->maptrack_limit; handle++ )
{
map = &maptrack_entry(lgt, handle);
if ( !(map->flags & (GNTMAP_device_map|GNTMAP_host_map)) ||
map->domid != rd->domain_id )
continue;
- if ( active_entry(rd->grant_table, map->ref).frame == mfn )
+ if ( _active_entry(rd->grant_table, map->ref).frame == mfn )
(map->flags & GNTMAP_readonly) ? (*rdc)++ : (*wrc)++;
}
}
@@ -639,7 +671,7 @@ __gnttab_map_grant_ref(
if ( unlikely(op->ref >= nr_grant_entries(rgt)))
PIN_FAIL(unlock_out, GNTST_bad_gntref, "Bad ref (%d).\n", op->ref);
- act = &active_entry(rgt, op->ref);
+ act = active_entry_acquire(rgt, op->ref);
shah = shared_entry_header(rgt, op->ref);
if (rgt->gt_version == 1) {
sha1 = &shared_entry_v1(rgt, op->ref);
@@ -656,7 +688,7 @@ __gnttab_map_grant_ref(
((act->domid != ld->domain_id) ||
(act->pin & 0x80808080U) != 0 ||
(act->is_sub_page)) )
- PIN_FAIL(unlock_out, GNTST_general_error,
+ PIN_FAIL(act_release_out, GNTST_general_error,
"Bad domain (%d != %d), or risk of counter overflow %08x, or subpage %d\n",
act->domid, ld->domain_id, act->pin, act->is_sub_page);
@@ -667,7 +699,7 @@ __gnttab_map_grant_ref(
if ( (rc = _set_status(rgt->gt_version, ld->domain_id,
op->flags & GNTMAP_readonly,
1, shah, act, status) ) != GNTST_okay )
- goto unlock_out;
+ goto act_release_out;
if ( !act->pin )
{
@@ -702,6 +734,7 @@ __gnttab_map_grant_ref(
cache_flags = (shah->flags & (GTF_PAT | GTF_PWT | GTF_PCD) );
+ active_entry_release(act);
spin_unlock(&rgt->lock);
/* pg may be set, with a refcount included, from __get_paged_frame */
@@ -851,7 +884,7 @@ __gnttab_map_grant_ref(
spin_lock(&rgt->lock);
- act = &active_entry(rgt, op->ref);
+ act = active_entry_acquire(rgt, op->ref);
if ( op->flags & GNTMAP_device_map )
act->pin -= (op->flags & GNTMAP_readonly) ?
@@ -868,6 +901,9 @@ __gnttab_map_grant_ref(
if ( !act->pin )
gnttab_clear_flag(_GTF_reading, status);
+ act_release_out:
+ active_entry_release(act);
+
unlock_out:
spin_unlock(&rgt->lock);
op->status = rc;
@@ -970,23 +1006,31 @@ __gnttab_unmap_common(
goto unmap_out;
}
+ op->rd = rd;
+ op->ref = map->ref;
+ act = active_entry_acquire(rgt, map->ref);
+
+ /*
+ * Note that we (ab)use the active entry lock here to protect against
+ * multiple unmaps of the same mapping here. We don't want to hold lgt's
+ * lock, and we only hold rgt's lock for reading (but the latter wouldn't
+ * be the right one anyway). Hence the easiest is to rely on a lock we
+ * hold anyway; see docs/misc/grant-tables.txt's "Locking" section.
+ */
+
flags = map->flags;
if ( unlikely(!flags) || unlikely(map->domid != dom) )
{
gdprintk(XENLOG_WARNING, "Unstable handle %u\n", op->handle);
rc = GNTST_bad_handle;
- goto unmap_out;
+ goto act_release_out;
}
- op->rd = rd;
- op->ref = map->ref;
- act = &active_entry(rgt, map->ref);
-
op->frame = act->frame;
if ( op->dev_bus_addr &&
unlikely(op->dev_bus_addr != pfn_to_paddr(act->frame)) )
- PIN_FAIL(unmap_out, GNTST_general_error,
+ PIN_FAIL(act_release_out, GNTST_general_error,
"Bus address doesn't match gntref (%"PRIx64" != %"PRIpaddr")\n",
op->dev_bus_addr, pfn_to_paddr(act->frame));
@@ -995,7 +1039,7 @@ __gnttab_unmap_common(
if ( (rc = replace_grant_host_mapping(op->host_addr,
op->frame, op->new_addr,
flags)) < 0 )
- goto unmap_out;
+ goto act_release_out;
map->flags &= ~GNTMAP_host_map;
op->done |= GNTMAP_host_map | (flags & GNTMAP_readonly);
@@ -1013,6 +1057,8 @@ __gnttab_unmap_common(
put_handle = 1;
}
+ act_release_out:
+ active_entry_release(act);
unmap_out:
double_gt_unlock(lgt, rgt);
@@ -1068,7 +1114,7 @@ __gnttab_unmap_common_complete(struct gn
rgt = rd->grant_table;
spin_lock(&rgt->lock);
- act = &active_entry(rgt, op->ref);
+ act = active_entry_acquire(rgt, op->ref);
sha = shared_entry_header(rgt, op->ref);
if ( rgt->gt_version == 1 )
@@ -1119,6 +1165,7 @@ __gnttab_unmap_common_complete(struct gn
if ( act->pin == 0 )
gnttab_clear_flag(_GTF_reading, status);
+ active_entry_release(act);
spin_unlock(&rgt->lock);
rcu_unlock_domain(rd);
}
@@ -1310,7 +1357,7 @@ gnttab_grow_table(struct domain *d, unsi
/* d's grant table lock must be held by the caller */
struct grant_table *gt = d->grant_table;
- unsigned int i;
+ unsigned int i, j;
ASSERT(req_nr_frames <= max_grant_frames);
@@ -1325,6 +1372,8 @@ gnttab_grow_table(struct domain *d, unsi
if ( (gt->active[i] = alloc_xenheap_page()) == NULL )
goto active_alloc_failed;
clear_page(gt->active[i]);
+ for ( j = 0; j < ACGNT_PER_PAGE; j++ )
+ spin_lock_init(>->active[i][j].lock);
}
/* Shared */
@@ -1812,7 +1861,7 @@ __release_grant_for_copy(
spin_lock(&rgt->lock);
- act = &active_entry(rgt, gref);
+ act = active_entry_acquire(rgt, gref);
sha = shared_entry_header(rgt, gref);
r_frame = act->frame;
@@ -1845,6 +1894,7 @@ __release_grant_for_copy(
if ( !act->pin )
gnttab_clear_flag(_GTF_reading, status);
+ active_entry_release(act);
spin_unlock(&rgt->lock);
if ( td != rd )
@@ -1905,14 +1955,14 @@ __acquire_grant_for_copy(
spin_lock(&rgt->lock);
if ( rgt->gt_version == 0 )
- PIN_FAIL(unlock_out, GNTST_general_error,
+ PIN_FAIL(gt_unlock_out, GNTST_general_error,
"remote grant table not ready\n");
if ( unlikely(gref >= nr_grant_entries(rgt)) )
- PIN_FAIL(unlock_out, GNTST_bad_gntref,
+ PIN_FAIL(gt_unlock_out, GNTST_bad_gntref,
"Bad grant reference %ld\n", gref);
- act = &active_entry(rgt, gref);
+ act = active_entry_acquire(rgt, gref);
shah = shared_entry_header(rgt, gref);
if ( rgt->gt_version == 1 )
{
@@ -1974,6 +2024,7 @@ __acquire_grant_for_copy(
* remote table (if rd == td), so we have to drop the lock
* here and reacquire.
*/
+ active_entry_release(act);
spin_unlock(&rgt->lock);
rc = __acquire_grant_for_copy(td, trans_gref, rd->domain_id,
@@ -1981,11 +2032,13 @@ __acquire_grant_for_copy(
&trans_page_off, &trans_length, 0);
spin_lock(&rgt->lock);
+ act = active_entry_acquire(rgt, gref);
if ( rc != GNTST_okay )
{
__fixup_status_for_copy_pin(act, status);
rcu_unlock_domain(td);
+ active_entry_release(act);
spin_unlock(&rgt->lock);
return rc;
}
@@ -2007,6 +2060,7 @@ __acquire_grant_for_copy(
__release_grant_for_copy(td, trans_gref, readonly);
__fixup_status_for_copy_pin(act, status);
rcu_unlock_domain(td);
+ active_entry_release(act);
spin_unlock(&rgt->lock);
put_page(*page);
*page = NULL;
@@ -2111,6 +2165,7 @@ __acquire_grant_for_copy(
*length = act->length;
*frame = act->frame;
+ active_entry_release(act);
spin_unlock(&rgt->lock);
return rc;
@@ -2123,7 +2178,11 @@ __acquire_grant_for_copy(
gnttab_clear_flag(_GTF_reading, status);
unlock_out:
+ active_entry_release(act);
+
+ gt_unlock_out:
spin_unlock(&rgt->lock);
+
return rc;
}
@@ -2438,7 +2497,6 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARA
gnttab_set_version_t op;
struct domain *d = current->domain;
struct grant_table *gt = d->grant_table;
- struct active_grant_entry *act;
grant_entry_v1_t reserved_entries[GNTTAB_NR_RESERVED_ENTRIES];
long res;
int i;
@@ -2463,8 +2521,7 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARA
{
for ( i = GNTTAB_NR_RESERVED_ENTRIES; i < nr_grant_entries(gt); i++ )
{
- act = &active_entry(gt, i);
- if ( act->pin != 0 )
+ if ( read_atomic(&_active_entry(gt, i).pin) != 0 )
{
gdprintk(XENLOG_WARNING,
"tried to change grant table version from %d to %d, but some grant entries still in use\n",
@@ -2672,7 +2729,8 @@ __gnttab_swap_grant_ref(grant_ref_t ref_
{
struct domain *d = rcu_lock_current_domain();
struct grant_table *gt = d->grant_table;
- struct active_grant_entry *act;
+ struct active_grant_entry *act_a = NULL;
+ struct active_grant_entry *act_b = NULL;
s16 rc = GNTST_okay;
spin_lock(>->lock);
@@ -2686,12 +2744,16 @@ __gnttab_swap_grant_ref(grant_ref_t ref_
if ( unlikely(ref_b >= nr_grant_entries(d->grant_table)))
PIN_FAIL(out, GNTST_bad_gntref, "Bad ref-b (%d).\n", ref_b);
- act = &active_entry(gt, ref_a);
- if ( act->pin )
+ /* Swapping the same ref is a no-op. */
+ if ( ref_a == ref_b )
+ goto out;
+
+ act_a = active_entry_acquire(gt, ref_a);
+ if ( act_a->pin )
PIN_FAIL(out, GNTST_eagain, "ref a %ld busy\n", (long)ref_a);
- act = &active_entry(gt, ref_b);
- if ( act->pin )
+ act_b = active_entry_acquire(gt, ref_b);
+ if ( act_b->pin )
PIN_FAIL(out, GNTST_eagain, "ref b %ld busy\n", (long)ref_b);
if ( gt->gt_version == 1 )
@@ -2718,6 +2780,10 @@ __gnttab_swap_grant_ref(grant_ref_t ref_
}
out:
+ if ( act_b != NULL )
+ active_entry_release(act_b);
+ if ( act_a != NULL )
+ active_entry_release(act_a);
spin_unlock(>->lock);
rcu_unlock_domain(d);
@@ -3028,7 +3094,7 @@ grant_table_create(
struct domain *d)
{
struct grant_table *t;
- int i;
+ unsigned int i, j;
if ( (t = xzalloc(struct grant_table)) == NULL )
goto no_mem_0;
@@ -3047,6 +3113,8 @@ grant_table_create(
if ( (t->active[i] = alloc_xenheap_page()) == NULL )
goto no_mem_2;
clear_page(t->active[i]);
+ for ( j = 0; j < ACGNT_PER_PAGE; j++ )
+ spin_lock_init(&t->active[i][j].lock);
}
/* Tracking of mapped foreign frames table */
@@ -3143,7 +3211,7 @@ gnttab_release_mappings(
rgt = rd->grant_table;
spin_lock(&rgt->lock);
- act = &active_entry(rgt, ref);
+ act = active_entry_acquire(rgt, ref);
sha = shared_entry_header(rgt, ref);
if (rgt->gt_version == 1)
status = &sha->flags;
@@ -3203,6 +3271,7 @@ gnttab_release_mappings(
if ( act->pin == 0 )
gnttab_clear_flag(_GTF_reading, status);
+ active_entry_release(act);
spin_unlock(&rgt->lock);
rcu_unlock_domain(rd);
@@ -3265,9 +3334,12 @@ static void gnttab_usage_print(struct do
uint16_t status;
uint64_t frame;
- act = &active_entry(gt, ref);
+ act = active_entry_acquire(gt, ref);
if ( !act->pin )
+ {
+ active_entry_release(act);
continue;
+ }
sha = shared_entry_header(gt, ref);
@@ -3297,6 +3369,7 @@ static void gnttab_usage_print(struct do
printk("[%3d] %5d 0x%06lx 0x%08x %5d 0x%06"PRIx64" 0x%02x\n",
ref, act->domid, act->frame, act->pin,
sha->domid, frame, status);
+ active_entry_release(act);
}
out: