File 5aeaeaf0-sched-fix-races-in-vcpu-migration.patch of Package xen.8389
# Commit 9a36de177c16d6423a07ad61f1c7af5274769aae
# Date 2018-05-03 11:56:48 +0100
# Author George Dunlap <george.dunlap@citrix.com>
# Committer George Dunlap <george.dunlap@citrix.com>
xen/schedule: Fix races in vcpu migration
The current sequence to initiate vcpu migration is inefficent and error-prone:
- The initiator sets VPF_migraging with the lock held, then drops the
lock and calls vcpu_sleep_nosync(), which immediately grabs the lock
again
- A number of places unnecessarily check for v->pause_flags in between
those two
- Every call to vcpu_migrate() must be prefaced with
vcpu_sleep_nosync() or introduce a race condition; this code
duplication is error-prone
- In the event that v->is_running is true at the beginning of
vcpu_migrate(), it's almost certain that vcpu_migrate() will end up
being called in context_switch() as well; we might as well simply
let it run there and save the duplicated effort (which will be
non-negligible).
The result is that Credit1 has several races which result in runqueue
<-> v->processor invariants being violated (triggering ASSERTs in
debug builds and strange bugs in production builds).
Instead, introduce vcpu_migrate_start() to initiate the process.
vcpu_migrate_start() is called with the scheduling lock held. It not
only sets VPF_migrating, but also calls vcpu_sleep_nosync_locked()
(which will automatically do nothing if there's nothing to do).
Rename vcpu_migrate() to vcpu_migrate_finish(). Check for v->is_running and
pause_flags & VPF_migrating at the top and return if appropriate.
Then the way to initiate migration is consistently:
* Grab lock
* vcpu_migrate_start()
* Release lock
* vcpu_migrate_finish()
Signed-off-by: George Dunlap <george.dunlap@citrix.com>
Reviewed-by: Dario Faggioli <dfaggioli@suse.com>
Tested-by: Olaf Hering <olaf@aepfle.de>
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -574,13 +574,54 @@ static void vcpu_move_nosched(struct vcp
sched_move_irqs(v);
}
-static void vcpu_migrate(struct vcpu *v)
+/*
+ * Initiating migration
+ *
+ * In order to migrate, we need the vcpu in question to have stopped
+ * running and had SCHED_OP(sleep) called (to take it off any
+ * runqueues, for instance); and if it is currently running, it needs
+ * to be scheduled out. Finally, we need to hold the scheduling locks
+ * for both the processor we're migrating from, and the processor
+ * we're migrating to.
+ *
+ * In order to avoid deadlock while satisfying the final requirement,
+ * we must release any scheduling lock we hold, then try to grab both
+ * locks we want, then double-check to make sure that what we started
+ * to do hasn't been changed in the mean time.
+ *
+ * These steps are encapsulated in the following two functions; they
+ * should be called like this:
+ *
+ * lock = vcpu_schedule_lock_irq(v);
+ * vcpu_migrate_start(v);
+ * vcpu_schedule_unlock_irq(lock, v)
+ * vcpu_migrate_finish(v);
+ *
+ * vcpu_migrate_finish() will do the work now if it can, or simply
+ * return if it can't (because v is still running); in that case
+ * vcpu_migrate_finish() will be called by context_saved().
+ */
+void vcpu_migrate_start(struct vcpu *v)
+{
+ set_bit(_VPF_migrating, &v->pause_flags);
+ vcpu_sleep_nosync_locked(v);
+}
+
+static void vcpu_migrate_finish(struct vcpu *v)
{
unsigned long flags;
unsigned int old_cpu, new_cpu;
spinlock_t *old_lock, *new_lock;
bool_t pick_called = 0;
+ /*
+ * If the vcpu is currently running, this will be handled by
+ * context_saved(); and in any case, if the bit is cleared, then
+ * someone else has already done the work so we don't need to.
+ */
+ if ( v->is_running || !test_bit(_VPF_migrating, &v->pause_flags) )
+ return;
+
old_cpu = new_cpu = v->processor;
for ( ; ; )
{
@@ -660,14 +701,11 @@ void vcpu_force_reschedule(struct vcpu *
spinlock_t *lock = vcpu_schedule_lock_irq(v);
if ( v->is_running )
- set_bit(_VPF_migrating, &v->pause_flags);
+ vcpu_migrate_start(v);
+
vcpu_schedule_unlock_irq(lock, v);
- if ( v->pause_flags & VPF_migrating )
- {
- vcpu_sleep_nosync(v);
- vcpu_migrate(v);
- }
+ vcpu_migrate_finish(v);
}
void restore_vcpu_affinity(struct domain *d)
@@ -819,10 +857,10 @@ int cpu_disable_scheduler(unsigned int c
* * the scheduler will always fine a suitable solution, or
* things would have failed before getting in here.
*/
- set_bit(_VPF_migrating, &v->pause_flags);
+ vcpu_migrate_start(v);
vcpu_schedule_unlock_irqrestore(lock, flags, v);
- vcpu_sleep_nosync(v);
- vcpu_migrate(v);
+
+ vcpu_migrate_finish(v);
/*
* The only caveat, in this case, is that if a vcpu active in
@@ -856,18 +894,14 @@ static int vcpu_set_affinity(
* Always ask the scheduler to re-evaluate placement
* when changing the affinity.
*/
- set_bit(_VPF_migrating, &v->pause_flags);
+ vcpu_migrate_start(v);
}
vcpu_schedule_unlock_irq(lock, v);
domain_update_node_affinity(v->domain);
- if ( v->pause_flags & VPF_migrating )
- {
- vcpu_sleep_nosync(v);
- vcpu_migrate(v);
- }
+ vcpu_migrate_finish(v);
return ret;
}
@@ -1095,7 +1129,6 @@ int vcpu_pin_override(struct vcpu *v, in
{
cpumask_copy(v->cpu_hard_affinity, v->cpu_hard_affinity_saved);
v->affinity_broken = 0;
- set_bit(_VPF_migrating, &v->pause_flags);
ret = 0;
}
}
@@ -1108,20 +1141,18 @@ int vcpu_pin_override(struct vcpu *v, in
cpumask_copy(v->cpu_hard_affinity_saved, v->cpu_hard_affinity);
v->affinity_broken = 1;
cpumask_copy(v->cpu_hard_affinity, cpumask_of(cpu));
- set_bit(_VPF_migrating, &v->pause_flags);
ret = 0;
}
}
+ if ( ret == 0 )
+ vcpu_migrate_start(v);
+
vcpu_schedule_unlock_irq(lock, v);
domain_update_node_affinity(v->domain);
- if ( v->pause_flags & VPF_migrating )
- {
- vcpu_sleep_nosync(v);
- vcpu_migrate(v);
- }
+ vcpu_migrate_finish(v);
return ret;
}
@@ -1508,8 +1539,7 @@ void context_saved(struct vcpu *prev)
SCHED_OP(vcpu_scheduler(prev), context_saved, prev);
- if ( unlikely(prev->pause_flags & VPF_migrating) )
- vcpu_migrate(prev);
+ vcpu_migrate_finish(prev);
}
/* The scheduler timer: force a run through the scheduler */