File 5aeaeaf0-sched-fix-races-in-vcpu-migration.patch of Package xen.23721
commit baf3935a9cb19341b551dbd6cb97c69b8db8dc60
Author: George Dunlap <george.dunlap@citrix.com>
Date: Fri Apr 13 13:06:38 2018 +0200
xen: Refactor migration
The current sequence to initiate vcpu migration is inefficent and error-prone:
- The initiator sets VPF_migraging with the lock held, then drops the
lock and calls vcpu_sleep_nosync(), which immediately grabs the lock
again
- A number of places unnecessarily check for v->pause_flags in between
those two
- Every call to vcpu_migrate() must be prefaced with
vcpu_sleep_nosync() or introduce a race condition; this code
duplication is error-prone
- In the event that v->is_running is true at the beginning of
vcpu_migrate(), it's almost certain that vcpu_migrate() will end up
being called in context_switch() as well; we might as well simply
let it run there and save the duplicated effort (which will be
non-negligible).
Instead, introduce vcpu_migrate_start() to initiate the process.
vcpu_migrate_start() is called with the scheduling lock held. It not
only sets VPF_migrating, but also calls vcpu_sleep_nosync_locked()
(which will automatically do nothing if there's nothing to do).
Rename vcpu_migrate() to vcpu_migrate_finish(). Check for v->is_running and
pause_flags & VPF_migrating at the top and return if appropriate.
Then the way to initiate migration is consistently:
* Grab lock
* vcpu_migrate_start()
* Release lock
* vcpu_migrate_finish()
Signed-off-by: George Dunlap <george.dunlap@citrix.com>
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index 9594a4a3e4..7771be9dbc 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -562,13 +562,33 @@ static void vcpu_move_nosched(struct vcpu *v, unsigned int new_cpu)
sched_move_irqs(v);
}
-static void vcpu_migrate(struct vcpu *v)
+/*
+ * Initiating migration
+ *
+ * In order to migrate, we need the vcpu in question to have stopped
+ * running and be taken off the runqueues; we also need to hold the lock
+ */
+void vcpu_migrate_start(struct vcpu *v)
+{
+ set_bit(_VPF_migrating, &v->pause_flags);
+ vcpu_sleep_nosync_locked(v);
+}
+
+static void vcpu_migrate_finish(struct vcpu *v)
{
unsigned long flags;
unsigned int old_cpu, new_cpu;
spinlock_t *old_lock, *new_lock;
bool_t pick_called = 0;
+ /*
+ * If the vcpu is currently running, this will be handled by
+ * context_saved(); and in any case, if the bit is cleared, then
+ * someone else has already done the work so we don't need to.
+ */
+ if ( v->is_running || !test_bit(_VPF_migrating, &v->pause_flags) )
+ return;
+
old_cpu = new_cpu = v->processor;
for ( ; ; )
{
@@ -648,14 +668,11 @@ void vcpu_force_reschedule(struct vcpu *v)
spinlock_t *lock = vcpu_schedule_lock_irq(v);
if ( v->is_running )
- set_bit(_VPF_migrating, &v->pause_flags);
+ vcpu_migrate_start(v);
+
vcpu_schedule_unlock_irq(lock, v);
- if ( v->pause_flags & VPF_migrating )
- {
- vcpu_sleep_nosync(v);
- vcpu_migrate(v);
- }
+ vcpu_migrate_finish(v);
}
void restore_vcpu_affinity(struct domain *d)
@@ -693,10 +710,9 @@ void restore_vcpu_affinity(struct domain
if ( v->processor == cpu )
{
- set_bit(_VPF_migrating, &v->pause_flags);
- spin_unlock_irq(lock);;
- vcpu_sleep_nosync(v);
- vcpu_migrate(v);
+ vcpu_migrate_start(v);
+ spin_unlock_irq(lock);
+ vcpu_migrate_finish(v);
}
else
{
@@ -809,10 +826,10 @@ int cpu_disable_scheduler(unsigned int cpu)
* * the scheduler will always fine a suitable solution, or
* things would have failed before getting in here.
*/
- set_bit(_VPF_migrating, &v->pause_flags);
+ vcpu_migrate_start(v);
vcpu_schedule_unlock_irqrestore(lock, flags, v);
- vcpu_sleep_nosync(v);
- vcpu_migrate(v);
+
+ vcpu_migrate_finish(v);
/*
* The only caveat, in this case, is that if a vcpu active in
@@ -846,18 +863,14 @@ static int vcpu_set_affinity(
* Always ask the scheduler to re-evaluate placement
* when changing the affinity.
*/
- set_bit(_VPF_migrating, &v->pause_flags);
+ vcpu_migrate_start(v);
}
vcpu_schedule_unlock_irq(lock, v);
domain_update_node_affinity(v->domain);
- if ( v->pause_flags & VPF_migrating )
- {
- vcpu_sleep_nosync(v);
- vcpu_migrate(v);
- }
+ vcpu_migrate_finish(v);
return ret;
}
@@ -1083,7 +1096,6 @@ int vcpu_pin_override(struct vcpu *v, int cpu)
{
cpumask_copy(v->cpu_hard_affinity, v->cpu_hard_affinity_saved);
v->affinity_broken = 0;
- set_bit(_VPF_migrating, &v->pause_flags);
ret = 0;
}
}
@@ -1096,20 +1108,18 @@ int vcpu_pin_override(struct vcpu *v, int cpu)
cpumask_copy(v->cpu_hard_affinity_saved, v->cpu_hard_affinity);
v->affinity_broken = 1;
cpumask_copy(v->cpu_hard_affinity, cpumask_of(cpu));
- set_bit(_VPF_migrating, &v->pause_flags);
ret = 0;
}
}
+ if ( ret == 0 )
+ vcpu_migrate_start(v);
+
vcpu_schedule_unlock_irq(lock, v);
domain_update_node_affinity(v->domain);
- if ( v->pause_flags & VPF_migrating )
- {
- vcpu_sleep_nosync(v);
- vcpu_migrate(v);
- }
+ vcpu_migrate_finish(v);
return ret;
}
@@ -1493,8 +1503,7 @@ void context_saved(struct vcpu *prev)
SCHED_OP(VCPU2OP(prev), context_saved, prev);
- if ( unlikely(prev->pause_flags & VPF_migrating) )
- vcpu_migrate(prev);
+ vcpu_migrate_finish(prev);
}
/* The scheduler timer: force a run through the scheduler */