File 53563f86-x86-svm-enable-TSC-scaling.patch of Package xen.7673
Subject: x86/svm: enable TSC scaling
From: Boris Ostrovsky boris.ostrovsky@oracle.com Tue Apr 22 12:08:06 2014 +0200
Date: Tue Apr 22 12:08:06 2014 +0200:
Git: b95fd03b5f0b66384bd7c190d5861ae68eb98c85
TSC ratio enabling logic is inverted: we want to use it when we
are running in native tsc mode, i.e. when d->arch.vtsc is zero.
Also, since now svm_set_tsc_offset()'s calculations depend
on vtsc's value, we need to call hvm_funcs.set_tsc_offset() after
vtsc changes in tsc_set_info().
In addition, with TSC ratio enabled, svm_set_tsc_offset() will
need to do rdtsc. With that we may end up having TSCs on guest's
processors out of sync. d->arch.hvm_domain.sync_tsc which is set
by the boot processor can now be used by APs as reference TSC
value instead of host's current TSC.
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Index: xen-4.4.4-testing/xen/arch/x86/hvm/hvm.c
===================================================================
--- xen-4.4.4-testing.orig/xen/arch/x86/hvm/hvm.c
+++ xen-4.4.4-testing/xen/arch/x86/hvm/hvm.c
@@ -268,27 +268,31 @@ void hvm_set_guest_tsc(struct vcpu *v, u
- v->arch.hvm_vcpu.cache_tsc_offset;
v->arch.hvm_vcpu.cache_tsc_offset = delta_tsc;
- hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);
+ hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset, 0);
}
void hvm_set_guest_tsc_adjust(struct vcpu *v, u64 tsc_adjust)
{
v->arch.hvm_vcpu.cache_tsc_offset += tsc_adjust
- v->arch.hvm_vcpu.msr_tsc_adjust;
- hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);
+ hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset, 0);
v->arch.hvm_vcpu.msr_tsc_adjust = tsc_adjust;
}
-u64 hvm_get_guest_tsc(struct vcpu *v)
+u64 hvm_get_guest_tsc_fixed(struct vcpu *v, uint64_t at_tsc)
{
uint64_t tsc;
if ( v->domain->arch.vtsc )
{
- tsc = hvm_get_guest_time(v);
+ tsc = hvm_get_guest_time_fixed(v, at_tsc);
tsc = gtime_to_gtsc(v->domain, tsc);
v->domain->arch.vtsc_kerncount++;
}
+ else if ( at_tsc )
+ {
+ tsc = at_tsc;
+ }
else
{
rdtscll(tsc);
@@ -3730,7 +3734,8 @@ void hvm_vcpu_reset_state(struct vcpu *v
/* Sync AP's TSC with BSP's. */
v->arch.hvm_vcpu.cache_tsc_offset =
v->domain->vcpu[0]->arch.hvm_vcpu.cache_tsc_offset;
- hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);
+ hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset,
+ d->arch.hvm_domain.sync_tsc);
v->arch.hvm_vcpu.msr_tsc_adjust = 0;
Index: xen-4.4.4-testing/xen/arch/x86/hvm/svm/svm.c
===================================================================
--- xen-4.4.4-testing.orig/xen/arch/x86/hvm/svm/svm.c
+++ xen-4.4.4-testing/xen/arch/x86/hvm/svm/svm.c
@@ -689,7 +689,7 @@ static uint64_t svm_get_tsc_offset(uint6
return guest_tsc - offset;
}
-static void svm_set_tsc_offset(struct vcpu *v, u64 offset)
+static void svm_set_tsc_offset(struct vcpu *v, u64 offset, u64 at_tsc)
{
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
struct vmcb_struct *n1vmcb, *n2vmcb;
@@ -697,11 +697,15 @@ static void svm_set_tsc_offset(struct vc
struct domain *d = v->domain;
uint64_t host_tsc, guest_tsc;
- guest_tsc = hvm_get_guest_tsc(v);
+ guest_tsc = hvm_get_guest_tsc_fixed(v, at_tsc);
/* Re-adjust the offset value when TSC_RATIO is available */
- if ( cpu_has_tsc_ratio && d->arch.vtsc ) {
- rdtscll(host_tsc);
+ if ( cpu_has_tsc_ratio && !d->arch.vtsc )
+ {
+ if ( at_tsc )
+ host_tsc = at_tsc;
+ else
+ rdtscll(host_tsc);
offset = svm_get_tsc_offset(host_tsc, guest_tsc, vcpu_tsc_ratio(v));
}
@@ -856,13 +860,13 @@ static int svm_update_lwp_cfg(struct vcp
static inline void svm_tsc_ratio_save(struct vcpu *v)
{
/* Other vcpus might not have vtsc enabled. So disable TSC_RATIO here. */
- if ( cpu_has_tsc_ratio && v->domain->arch.vtsc )
+ if ( cpu_has_tsc_ratio && !v->domain->arch.vtsc )
wrmsrl(MSR_AMD64_TSC_RATIO, DEFAULT_TSC_RATIO);
}
static inline void svm_tsc_ratio_load(struct vcpu *v)
{
- if ( cpu_has_tsc_ratio && v->domain->arch.vtsc )
+ if ( cpu_has_tsc_ratio && !v->domain->arch.vtsc )
wrmsrl(MSR_AMD64_TSC_RATIO, vcpu_tsc_ratio(v));
}
Index: xen-4.4.4-testing/xen/arch/x86/hvm/vmx/vmx.c
===================================================================
--- xen-4.4.4-testing.orig/xen/arch/x86/hvm/vmx/vmx.c
+++ xen-4.4.4-testing/xen/arch/x86/hvm/vmx/vmx.c
@@ -1011,7 +1011,7 @@ static void vmx_handle_cd(struct vcpu *v
}
}
-static void vmx_set_tsc_offset(struct vcpu *v, u64 offset)
+static void vmx_set_tsc_offset(struct vcpu *v, u64 offset, u64 at_tsc)
{
vmx_vmcs_enter(v);
Index: xen-4.4.4-testing/xen/arch/x86/hvm/vmx/vvmx.c
===================================================================
--- xen-4.4.4-testing.orig/xen/arch/x86/hvm/vmx/vvmx.c
+++ xen-4.4.4-testing/xen/arch/x86/hvm/vmx/vvmx.c
@@ -1058,7 +1058,7 @@ static void load_shadow_guest_state(stru
if ( control & VM_ENTRY_LOAD_PERF_GLOBAL_CTRL )
hvm_msr_write_intercept(MSR_CORE_PERF_GLOBAL_CTRL, __get_vvmcs(vvmcs, GUEST_PERF_GLOBAL_CTRL));
- hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);
+ hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset, 0);
vvmcs_to_shadow_bulk(v, ARRAY_SIZE(vmentry_fields), vmentry_fields);
@@ -1259,7 +1259,7 @@ static void load_vvmcs_host_state(struct
if ( control & VM_EXIT_LOAD_PERF_GLOBAL_CTRL )
hvm_msr_write_intercept(MSR_CORE_PERF_GLOBAL_CTRL, __get_vvmcs(vvmcs, HOST_PERF_GLOBAL_CTRL));
- hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);
+ hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset, 0);
__set_vvmcs(vvmcs, VM_ENTRY_INTR_INFO, 0);
}
Index: xen-4.4.4-testing/xen/arch/x86/hvm/vpt.c
===================================================================
--- xen-4.4.4-testing.orig/xen/arch/x86/hvm/vpt.c
+++ xen-4.4.4-testing/xen/arch/x86/hvm/vpt.c
@@ -36,7 +36,7 @@ void hvm_init_guest_time(struct domain *
pl->last_guest_time = 0;
}
-u64 hvm_get_guest_time(struct vcpu *v)
+u64 hvm_get_guest_time_fixed(struct vcpu *v, u64 at_tsc)
{
struct pl_time *pl = &v->domain->arch.hvm_domain.pl_time;
u64 now;
@@ -45,11 +45,15 @@ u64 hvm_get_guest_time(struct vcpu *v)
ASSERT(is_hvm_vcpu(v));
spin_lock(&pl->pl_time_lock);
- now = get_s_time() + pl->stime_offset;
- if ( (int64_t)(now - pl->last_guest_time) > 0 )
- pl->last_guest_time = now;
- else
- now = ++pl->last_guest_time;
+ now = get_s_time_fixed(at_tsc) + pl->stime_offset;
+
+ if ( !at_tsc )
+ {
+ if ( (int64_t)(now - pl->last_guest_time) > 0 )
+ pl->last_guest_time = now;
+ else
+ now = ++pl->last_guest_time;
+ }
spin_unlock(&pl->pl_time_lock);
return now + v->arch.hvm_vcpu.stime_offset;
Index: xen-4.4.4-testing/xen/arch/x86/time.c
===================================================================
--- xen-4.4.4-testing.orig/xen/arch/x86/time.c
+++ xen-4.4.4-testing/xen/arch/x86/time.c
@@ -713,19 +713,27 @@ static unsigned long get_cmos_time(void)
* System Time
***************************************************************************/
-s_time_t get_s_time(void)
+s_time_t get_s_time_fixed(u64 at_tsc)
{
struct cpu_time *t = &this_cpu(cpu_time);
u64 tsc, delta;
s_time_t now;
- rdtscll(tsc);
+ if ( at_tsc )
+ tsc = at_tsc;
+ else
+ rdtscll(tsc);
delta = tsc - t->local_tsc_stamp;
now = t->stime_local_stamp + scale_delta(delta, &t->tsc_scale);
return now;
}
+s_time_t get_s_time()
+{
+ return get_s_time_fixed(0);
+}
+
uint64_t tsc_ticks2ns(uint64_t ticks)
{
struct cpu_time *t = &this_cpu(cpu_time);
@@ -1919,7 +1927,24 @@ void tsc_set_info(struct domain *d,
}
d->arch.incarnation = incarnation + 1;
if ( has_hvm_container_domain(d) )
+ {
hvm_set_rdtsc_exiting(d, d->arch.vtsc);
+ if ( d->vcpu && d->vcpu[0] && incarnation == 0 )
+ {
+ /*
+ * set_tsc_offset() is called from hvm_vcpu_initialise() before
+ * tsc_set_info(). New vtsc mode may require recomputing TSC
+ * offset.
+ * We only need to do this for BSP during initial boot. APs will
+ * call set_tsc_offset() later from hvm_vcpu_reset_state() and they
+ * will sync their TSC to BSP's sync_tsc.
+ */
+ rdtscll(d->arch.hvm_domain.sync_tsc);
+ hvm_funcs.set_tsc_offset(d->vcpu[0],
+ d->vcpu[0]->arch.hvm_vcpu.cache_tsc_offset,
+ d->arch.hvm_domain.sync_tsc);
+ }
+ }
}
/* vtsc may incur measurable performance degradation, diagnose with this */
Index: xen-4.4.4-testing/xen/include/asm-x86/hvm/domain.h
===================================================================
--- xen-4.4.4-testing.orig/xen/include/asm-x86/hvm/domain.h
+++ xen-4.4.4-testing/xen/include/asm-x86/hvm/domain.h
@@ -90,6 +90,12 @@ struct hvm_domain {
bool_t qemu_mapcache_invalidate;
bool_t is_s3_suspended;
+ /*
+ * TSC value that VCPUs use to calculate their tsc_offset value.
+ * Used during initialization and save/restore.
+ */
+ uint64_t sync_tsc;
+
union {
struct vmx_domain vmx;
struct svm_domain svm;
Index: xen-4.4.4-testing/xen/include/asm-x86/hvm/hvm.h
===================================================================
--- xen-4.4.4-testing.orig/xen/include/asm-x86/hvm/hvm.h
+++ xen-4.4.4-testing/xen/include/asm-x86/hvm/hvm.h
@@ -133,7 +133,7 @@ struct hvm_function_table {
int (*get_guest_pat)(struct vcpu *v, u64 *);
int (*set_guest_pat)(struct vcpu *v, u64);
- void (*set_tsc_offset)(struct vcpu *v, u64 offset);
+ void (*set_tsc_offset)(struct vcpu *v, u64 offset, u64 at_tsc);
void (*inject_trap)(struct hvm_trap *trap);
@@ -229,11 +229,13 @@ void hvm_get_guest_pat(struct vcpu *v, u
int hvm_set_guest_pat(struct vcpu *v, u64 guest_pat);
void hvm_set_guest_tsc(struct vcpu *v, u64 guest_tsc);
-u64 hvm_get_guest_tsc(struct vcpu *v);
+u64 hvm_get_guest_tsc_fixed(struct vcpu *v, u64 at_tsc);
+#define hvm_get_guest_tsc(v) hvm_get_guest_tsc_fixed(v, 0)
void hvm_init_guest_time(struct domain *d);
void hvm_set_guest_time(struct vcpu *v, u64 guest_time);
-u64 hvm_get_guest_time(struct vcpu *v);
+u64 hvm_get_guest_time_fixed(struct vcpu *v, u64 at_tsc);
+#define hvm_get_guest_time(v) hvm_get_guest_time_fixed(v, 0)
int vmsi_deliver(
struct domain *d, int vector,
Index: xen-4.4.4-testing/xen/include/asm-x86/msr.h
===================================================================
--- xen-4.4.4-testing.orig/xen/include/asm-x86/msr.h
+++ xen-4.4.4-testing/xen/include/asm-x86/msr.h
@@ -78,9 +78,9 @@ static inline int wrmsr_safe(unsigned in
__asm__ __volatile__("rdtsc" : "=a" (low) : : "edx")
#define rdtscll(val) do { \
- unsigned int a,d; \
- asm volatile("rdtsc" : "=a" (a), "=d" (d)); \
- (val) = ((unsigned long)a) | (((unsigned long)d)<<32); \
+ unsigned int _eax, _edx; \
+ asm volatile("rdtsc" : "=a" (_eax), "=d" (_edx)); \
+ (val) = ((unsigned long)_eax) | (((unsigned long)_edx)<<32); \
} while(0)
#define __write_tsc(val) wrmsrl(MSR_IA32_TSC, val)
Index: xen-4.4.4-testing/xen/include/xen/time.h
===================================================================
--- xen-4.4.4-testing.orig/xen/include/xen/time.h
+++ xen-4.4.4-testing/xen/include/xen/time.h
@@ -32,6 +32,7 @@ struct vcpu;
typedef s64 s_time_t;
#define PRI_stime PRId64
+s_time_t get_s_time_fixed(u64 at_tick);
s_time_t get_s_time(void);
unsigned long get_localtime(struct domain *d);
uint64_t get_localtime_us(struct domain *d);