File xsa336.patch of Package xen.19016
x86/vpt: fix race when migrating timers between vCPUs
The current vPT code will migrate the emulated timers between vCPUs
(change the pt->vcpu field) while just holding the destination lock,
either from create_periodic_time or pt_adjust_global_vcpu_target if
the global target is adjusted. Changing the periodic_timer vCPU field
in this way creates a race where a third party could grab the lock in
the unlocked region of pt_adjust_global_vcpu_target (or before
create_periodic_time performs the vcpu change) and then release the
lock from a different vCPU, creating a locking imbalance.
Introduce a per-domain rwlock in order to protect periodic_time
migration between vCPU lists. Taking the lock in read mode prevents
any timer from being migrated to a different vCPU, while taking it in
write mode allows performing migration of timers across vCPUs. The
per-vcpu locks are still used to protect all the other fields from the
periodic_timer struct.
Note that such migration shouldn't happen frequently, and hence
there's no performance drop as a result of such locking.
This is XSA-336.
Reported-by: Igor Druzhinin <igor.druzhinin@citrix.com>
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Tested-by: Igor Druzhinin <igor.druzhinin@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
--- xen-4.10.4-testing.orig/xen/arch/x86/hvm/hvm.c
+++ xen-4.10.4-testing/xen/arch/x86/hvm/hvm.c
@@ -618,6 +618,8 @@ int hvm_domain_initialise(struct domain
/* need link to containing domain */
d->arch.hvm_domain.pl_time->domain = d;
+ rwlock_init(&d->arch.hvm_domain.pl_time->pt_migrate);
+
/* Set the default IO Bitmap. */
if ( is_hardware_domain(d) )
{
--- xen-4.10.4-testing.orig/xen/arch/x86/hvm/vpt.c
+++ xen-4.10.4-testing/xen/arch/x86/hvm/vpt.c
@@ -152,23 +152,32 @@ static int pt_irq_masked(struct periodic
return 1;
}
-static void pt_lock(struct periodic_time *pt)
+static void pt_vcpu_lock(struct vcpu *v)
{
- struct vcpu *v;
+ read_lock(&v->domain->arch.hvm_domain.pl_time->pt_migrate);
+ spin_lock(&v->arch.hvm_vcpu.tm_lock);
+}
- for ( ; ; )
- {
- v = pt->vcpu;
- spin_lock(&v->arch.hvm_vcpu.tm_lock);
- if ( likely(pt->vcpu == v) )
- break;
- spin_unlock(&v->arch.hvm_vcpu.tm_lock);
- }
+static void pt_vcpu_unlock(struct vcpu *v)
+{
+ spin_unlock(&v->arch.hvm_vcpu.tm_lock);
+ read_unlock(&v->domain->arch.hvm_domain.pl_time->pt_migrate);
+}
+
+static void pt_lock(struct periodic_time *pt)
+{
+ /*
+ * We cannot use pt_vcpu_lock here, because we need to acquire the
+ * per-domain lock first and then (re-)fetch the value of pt->vcpu, or
+ * else we might be using a stale value of pt->vcpu.
+ */
+ read_lock(&pt->vcpu->domain->arch.hvm_domain.pl_time->pt_migrate);
+ spin_lock(&pt->vcpu->arch.hvm_vcpu.tm_lock);
}
static void pt_unlock(struct periodic_time *pt)
{
- spin_unlock(&pt->vcpu->arch.hvm_vcpu.tm_lock);
+ pt_vcpu_unlock(pt->vcpu);
}
static void pt_process_missed_ticks(struct periodic_time *pt)
@@ -218,7 +227,7 @@ void pt_save_timer(struct vcpu *v)
if ( v->pause_flags & VPF_blocked )
return;
- spin_lock(&v->arch.hvm_vcpu.tm_lock);
+ pt_vcpu_lock(v);
list_for_each_entry ( pt, head, list )
if ( !pt->do_not_freeze )
@@ -226,7 +235,7 @@ void pt_save_timer(struct vcpu *v)
pt_freeze_time(v);
- spin_unlock(&v->arch.hvm_vcpu.tm_lock);
+ pt_vcpu_unlock(v);
}
void pt_restore_timer(struct vcpu *v)
@@ -234,7 +243,7 @@ void pt_restore_timer(struct vcpu *v)
struct list_head *head = &v->arch.hvm_vcpu.tm_list;
struct periodic_time *pt;
- spin_lock(&v->arch.hvm_vcpu.tm_lock);
+ pt_vcpu_lock(v);
list_for_each_entry ( pt, head, list )
{
@@ -247,7 +256,7 @@ void pt_restore_timer(struct vcpu *v)
pt_thaw_time(v);
- spin_unlock(&v->arch.hvm_vcpu.tm_lock);
+ pt_vcpu_unlock(v);
}
static void pt_timer_fn(void *data)
@@ -272,7 +281,7 @@ int pt_update_irq(struct vcpu *v)
uint64_t max_lag;
int irq, pt_vector = -1;
- spin_lock(&v->arch.hvm_vcpu.tm_lock);
+ pt_vcpu_lock(v);
earliest_pt = NULL;
max_lag = -1ULL;
@@ -300,14 +309,14 @@ int pt_update_irq(struct vcpu *v)
if ( earliest_pt == NULL )
{
- spin_unlock(&v->arch.hvm_vcpu.tm_lock);
+ pt_vcpu_unlock(v);
return -1;
}
earliest_pt->irq_issued = 1;
irq = earliest_pt->irq;
- spin_unlock(&v->arch.hvm_vcpu.tm_lock);
+ pt_vcpu_unlock(v);
switch ( earliest_pt->source )
{
@@ -377,12 +386,12 @@ void pt_intr_post(struct vcpu *v, struct
if ( intack.source == hvm_intsrc_vector )
return;
- spin_lock(&v->arch.hvm_vcpu.tm_lock);
+ pt_vcpu_lock(v);
pt = is_pt_irq(v, intack);
if ( pt == NULL )
{
- spin_unlock(&v->arch.hvm_vcpu.tm_lock);
+ pt_vcpu_unlock(v);
return;
}
@@ -421,7 +430,7 @@ void pt_intr_post(struct vcpu *v, struct
cb = pt->cb;
cb_priv = pt->priv;
- spin_unlock(&v->arch.hvm_vcpu.tm_lock);
+ pt_vcpu_unlock(v);
if ( cb != NULL )
cb(v, cb_priv);
@@ -432,12 +441,12 @@ void pt_migrate(struct vcpu *v)
struct list_head *head = &v->arch.hvm_vcpu.tm_list;
struct periodic_time *pt;
- spin_lock(&v->arch.hvm_vcpu.tm_lock);
+ pt_vcpu_lock(v);
list_for_each_entry ( pt, head, list )
migrate_timer(&pt->timer, v->processor);
- spin_unlock(&v->arch.hvm_vcpu.tm_lock);
+ pt_vcpu_unlock(v);
}
void create_periodic_time(
@@ -455,7 +464,7 @@ void create_periodic_time(
destroy_periodic_time(pt);
- spin_lock(&v->arch.hvm_vcpu.tm_lock);
+ write_lock(&v->domain->arch.hvm_domain.pl_time->pt_migrate);
pt->pending_intr_nr = 0;
pt->do_not_freeze = 0;
@@ -504,7 +513,7 @@ void create_periodic_time(
init_timer(&pt->timer, pt_timer_fn, pt, v->processor);
set_timer(&pt->timer, pt->scheduled);
- spin_unlock(&v->arch.hvm_vcpu.tm_lock);
+ write_unlock(&v->domain->arch.hvm_domain.pl_time->pt_migrate);
}
void destroy_periodic_time(struct periodic_time *pt)
@@ -529,30 +538,20 @@ void destroy_periodic_time(struct period
static void pt_adjust_vcpu(struct periodic_time *pt, struct vcpu *v)
{
- int on_list;
-
ASSERT(pt->source == PTSRC_isa || pt->source == PTSRC_ioapic);
if ( pt->vcpu == NULL )
return;
- pt_lock(pt);
- on_list = pt->on_list;
- if ( pt->on_list )
- list_del(&pt->list);
- pt->on_list = 0;
- pt_unlock(pt);
-
- spin_lock(&v->arch.hvm_vcpu.tm_lock);
+ write_lock(&pt->vcpu->domain->arch.hvm_domain.pl_time->pt_migrate);
pt->vcpu = v;
- if ( on_list )
+ if ( pt->on_list )
{
- pt->on_list = 1;
+ list_del(&pt->list);
list_add(&pt->list, &v->arch.hvm_vcpu.tm_list);
-
migrate_timer(&pt->timer, v->processor);
}
- spin_unlock(&v->arch.hvm_vcpu.tm_lock);
+ write_unlock(&pt->vcpu->domain->arch.hvm_domain.pl_time->pt_migrate);
}
void pt_adjust_global_vcpu_target(struct vcpu *v)
--- xen-4.10.4-testing.orig/xen/include/asm-x86/hvm/vpt.h
+++ xen-4.10.4-testing/xen/include/asm-x86/hvm/vpt.h
@@ -133,6 +133,13 @@ struct pl_time { /* platform time */
struct RTCState vrtc;
struct HPETState vhpet;
struct PMTState vpmt;
+ /*
+ * rwlock to prevent periodic_time vCPU migration. Take the lock in read
+ * mode in order to prevent the vcpu field of periodic_time from changing.
+ * Lock must be taken in write mode when changes to the vcpu field are
+ * performed, as it allows exclusive access to all the timers of a domain.
+ */
+ rwlock_t pt_migrate;
/* guest_time = Xen sys time + stime_offset */
int64_t stime_offset;
/* Ensures monotonicity in appropriate timer modes. */