File 5a6b36cd-3-x86-migrate-MSR_SPEC_CTRL.patch of Package xen.11319
# Commit 0cf2a4eb769302b7d7d7835540e7b2f15006df30
# Date 2018-01-26 14:10:21 +0000
# Author Andrew Cooper <andrew.cooper3@citrix.com>
# Committer Andrew Cooper <andrew.cooper3@citrix.com>
x86/migrate: Move MSR_SPEC_CTRL on migrate
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Wei Liu <wei.liu2@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
--- a/tools/libxc/xc_domain_save.c
+++ b/tools/libxc/xc_domain_save.c
@@ -1974,6 +1974,26 @@ int xc_domain_save(xc_interface *xch, in
goto out;
}
+ /* Check there are no PV MSRs in use. */
+ domctl.cmd = XEN_DOMCTL_get_vcpu_msrs;
+ domctl.domain = dom;
+ memset(&domctl.u, 0, sizeof(domctl.u));
+ domctl.u.vcpu_msrs.vcpu = i;
+ domctl.u.vcpu_msrs.msr_count = 0;
+ set_xen_guest_handle_raw(domctl.u.vcpu_msrs.msrs, (void*)1);
+
+ if ( xc_domctl(xch, &domctl) < 0 )
+ {
+ if ( errno == ENOBUFS )
+ {
+ errno = EOPNOTSUPP;
+ PERROR("Unable to migrate PV guest using MSRs (yet)");
+ }
+ else
+ PERROR("Error querying maximum number of MSRs for VCPU%d", i);
+ goto out;
+ }
+
/* Start to fetch CPU eXtended States */
/* Get buffer size first */
domctl.cmd = XEN_DOMCTL_getvcpuextstate;
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -1275,6 +1275,112 @@ long arch_do_domctl(
}
break;
+ case XEN_DOMCTL_get_vcpu_msrs:
+ case XEN_DOMCTL_set_vcpu_msrs:
+ {
+ struct xen_domctl_vcpu_msrs *vmsrs = &domctl->u.vcpu_msrs;
+ struct xen_domctl_vcpu_msr msr;
+ struct vcpu *v;
+ unsigned int i, nr_msrs = 0, edx, dummy;
+ bool_t has_ibrsb;
+
+ ret = -ESRCH;
+ if ( (vmsrs->vcpu >= d->max_vcpus) ||
+ ((v = d->vcpu[vmsrs->vcpu]) == NULL) )
+ break;
+
+ ret = -EINVAL;
+ if ( (v == current) || /* no vcpu_pause() */
+ !is_pv_domain(d) )
+ break;
+
+ domain_cpuid(d, 7, 0, &dummy, &dummy, &dummy, &edx);
+ has_ibrsb = !!(edx & cpufeat_mask(X86_FEATURE_IBRSB));
+ nr_msrs += has_ibrsb;
+
+ if ( domctl->cmd == XEN_DOMCTL_get_vcpu_msrs )
+ {
+ ret = 0; copyback = 1;
+
+ /* NULL guest handle is a request for max size. */
+ if ( guest_handle_is_null(vmsrs->msrs) )
+ vmsrs->msr_count = nr_msrs;
+ else
+ {
+ i = 0;
+
+ vcpu_pause(v);
+
+ if ( has_ibrsb && v->arch.spec_ctrl )
+ {
+ if ( i < vmsrs->msr_count && !ret )
+ {
+ msr.index = MSR_SPEC_CTRL;
+ msr.reserved = 0;
+ msr.value = v->arch.spec_ctrl;
+ if ( copy_to_guest_offset(vmsrs->msrs, i, &msr, 1) )
+ ret = -EFAULT;
+ }
+ ++i;
+ }
+
+ vcpu_unpause(v);
+
+ if ( i > vmsrs->msr_count && !ret )
+ ret = -ENOBUFS;
+ vmsrs->msr_count = i;
+ }
+ }
+ else
+ {
+ ret = -EINVAL;
+ if ( vmsrs->msr_count > nr_msrs )
+ break;
+
+ vcpu_pause(v);
+
+ for ( i = 0; i < vmsrs->msr_count; ++i )
+ {
+ ret = -EFAULT;
+ if ( copy_from_guest_offset(&msr, vmsrs->msrs, i, 1) )
+ break;
+
+ ret = -EINVAL;
+ if ( msr.reserved )
+ break;
+
+ switch ( msr.index )
+ {
+ case MSR_SPEC_CTRL:
+ if ( !boot_cpu_has(X86_FEATURE_IBRSB) )
+ break; /* MSR available? */
+
+ /*
+ * Note: SPEC_CTRL_STIBP is specified as safe to use (i.e.
+ * ignored) when STIBP isn't enumerated in hardware.
+ */
+
+ if ( msr.value & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP) )
+ break;
+ v->arch.spec_ctrl = msr.value;
+ continue;
+ }
+ break;
+ }
+
+ vcpu_unpause(v);
+
+ if ( i == vmsrs->msr_count )
+ ret = 0;
+ else
+ {
+ vmsrs->msr_count = i;
+ copyback = 1;
+ }
+ }
+ }
+ break;
+
default:
ret = iommu_do_domctl(domctl, d, u_domctl);
break;
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -1218,10 +1218,123 @@ static int hvm_load_cpu_xsave_states(str
return 0;
}
-/* We need variable length data chunk for xsave area, hence customized
- * declaration other than HVM_REGISTER_SAVE_RESTORE.
+#define HVM_CPU_MSR_SIZE(cnt) offsetof(struct hvm_msr, msr[cnt])
+static unsigned int __read_mostly msr_count_max;
+
+static int hvm_save_cpu_msrs(struct domain *d, hvm_domain_context_t *h)
+{
+ struct vcpu *v;
+
+ for_each_vcpu ( d, v )
+ {
+ struct hvm_save_descriptor *d = _p(&h->data[h->cur]);
+ struct hvm_msr *ctxt;
+ unsigned int i;
+
+ if ( _hvm_init_entry(h, CPU_MSR_CODE, v->vcpu_id,
+ HVM_CPU_MSR_SIZE(msr_count_max)) )
+ return 1;
+ ctxt = (struct hvm_msr *)&h->data[h->cur];
+ ctxt->count = 0;
+
+ if ( hvm_funcs.save_msr )
+ hvm_funcs.save_msr(v, ctxt);
+
+ for ( i = 0; i < ctxt->count; ++i )
+ ctxt->msr[i]._rsvd = 0;
+
+ if ( ctxt->count )
+ {
+ /* Rewrite length to indicate how much space we actually used. */
+ d->length = HVM_CPU_MSR_SIZE(ctxt->count);
+ h->cur += HVM_CPU_MSR_SIZE(ctxt->count);
+ }
+ else
+ /* or rewind and remove the descriptor from the stream. */
+ h->cur -= sizeof(struct hvm_save_descriptor);
+ }
+
+ return 0;
+}
+
+static int hvm_load_cpu_msrs(struct domain *d, hvm_domain_context_t *h)
+{
+ unsigned int i, vcpuid = hvm_load_instance(h);
+ struct vcpu *v;
+ const struct hvm_save_descriptor *desc;
+ struct hvm_msr *ctxt;
+ int err = 0;
+
+ if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL )
+ {
+ dprintk(XENLOG_G_ERR, "HVM restore: dom%d has no vcpu%u\n",
+ d->domain_id, vcpuid);
+ return -EINVAL;
+ }
+
+ /* Customized checking for entry since our entry is of variable length */
+ desc = (struct hvm_save_descriptor *)&h->data[h->cur];
+ if ( sizeof (*desc) > h->size - h->cur)
+ {
+ printk(XENLOG_G_WARNING
+ "HVM%d.%d restore: not enough data left to read MSR descriptor\n",
+ d->domain_id, vcpuid);
+ return -ENODATA;
+ }
+ if ( desc->length + sizeof (*desc) > h->size - h->cur)
+ {
+ printk(XENLOG_G_WARNING
+ "HVM%d.%d restore: not enough data left to read %u MSR bytes\n",
+ d->domain_id, vcpuid, desc->length);
+ return -ENODATA;
+ }
+ if ( desc->length < HVM_CPU_MSR_SIZE(1) )
+ {
+ printk(XENLOG_G_WARNING
+ "HVM%d.%d restore mismatch: MSR length %u < %zu\n",
+ d->domain_id, vcpuid, desc->length, HVM_CPU_MSR_SIZE(1));
+ return -EINVAL;
+ }
+
+ h->cur += sizeof(*desc);
+ ctxt = (struct hvm_msr *)&h->data[h->cur];
+ h->cur += desc->length;
+
+ if ( desc->length != HVM_CPU_MSR_SIZE(ctxt->count) )
+ {
+ printk(XENLOG_G_WARNING
+ "HVM%d.%d restore mismatch: MSR length %u != %zu\n",
+ d->domain_id, vcpuid, desc->length,
+ HVM_CPU_MSR_SIZE(ctxt->count));
+ return -EOPNOTSUPP;
+ }
+
+ for ( i = 0; i < ctxt->count; ++i )
+ if ( ctxt->msr[i]._rsvd )
+ return -EOPNOTSUPP;
+ /* Checking finished */
+
+ if ( hvm_funcs.load_msr )
+ err = hvm_funcs.load_msr(v, ctxt);
+
+ for ( i = 0; !err && i < ctxt->count; ++i )
+ {
+ switch ( ctxt->msr[i].index )
+ {
+ default:
+ if ( !ctxt->msr[i]._rsvd )
+ err = -ENXIO;
+ break;
+ }
+ }
+
+ return err;
+}
+
+/* We need variable length data chunks for XSAVE area and MSRs, hence
+ * a custom declaration rather than HVM_REGISTER_SAVE_RESTORE.
*/
-static int __init __hvm_register_CPU_XSAVE_save_and_restore(void)
+static int __init hvm_register_CPU_save_and_restore(void)
{
hvm_register_savevm(CPU_XSAVE_CODE,
"CPU_XSAVE",
@@ -1230,9 +1343,22 @@ static int __init __hvm_register_CPU_XSA
HVM_CPU_XSAVE_SIZE(xfeature_mask) +
sizeof(struct hvm_save_descriptor),
HVMSR_PER_VCPU);
+
+ if ( hvm_funcs.init_msr )
+ msr_count_max += hvm_funcs.init_msr();
+
+ if ( msr_count_max )
+ hvm_register_savevm(CPU_MSR_CODE,
+ "CPU_MSR",
+ hvm_save_cpu_msrs,
+ hvm_load_cpu_msrs,
+ HVM_CPU_MSR_SIZE(msr_count_max) +
+ sizeof(struct hvm_save_descriptor),
+ HVMSR_PER_VCPU);
+
return 0;
}
-__initcall(__hvm_register_CPU_XSAVE_save_and_restore);
+__initcall(hvm_register_CPU_save_and_restore);
int hvm_vcpu_initialise(struct vcpu *v)
{
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -583,6 +583,65 @@ static int vmx_load_vmcs_ctxt(struct vcp
return 0;
}
+static unsigned int __init vmx_init_msr(void)
+{
+ return !!boot_cpu_has(X86_FEATURE_IBRSB);
+}
+
+static void vmx_save_msr(struct vcpu *v, struct hvm_msr *ctxt)
+{
+ uint32_t edx, dummy;
+
+ vmx_vmcs_enter(v);
+
+ domain_cpuid(v->domain, 7, 0, &dummy, &dummy, &dummy, &edx);
+ if ( (edx & cpufeat_mask(X86_FEATURE_IBRSB)) && v->arch.spec_ctrl )
+ {
+ ctxt->msr[ctxt->count].index = MSR_SPEC_CTRL;
+ ctxt->msr[ctxt->count++].val = v->arch.spec_ctrl;
+ }
+
+ vmx_vmcs_exit(v);
+}
+
+static int vmx_load_msr(struct vcpu *v, struct hvm_msr *ctxt)
+{
+ unsigned int i;
+ int err = 0;
+
+ vmx_vmcs_enter(v);
+
+ for ( i = 0; i < ctxt->count; ++i )
+ {
+ switch ( ctxt->msr[i].index )
+ {
+ case MSR_SPEC_CTRL:
+ if ( !boot_cpu_has(X86_FEATURE_IBRSB) )
+ err = -ENXIO; /* MSR available? */
+ /*
+ * Note: SPEC_CTRL_STIBP is specified as safe to use (i.e.
+ * ignored) when STIBP isn't enumerated in hardware.
+ */
+ else if ( ctxt->msr[i].val &
+ ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP) )
+ err = -ENXIO;
+ else
+ v->arch.spec_ctrl = ctxt->msr[i].val;
+ break;
+
+ default:
+ continue;
+ }
+ if ( err )
+ break;
+ ctxt->msr[i]._rsvd = 1;
+ }
+
+ vmx_vmcs_exit(v);
+
+ return err;
+}
+
static void vmx_fpu_enter(struct vcpu *v)
{
vcpu_restore_fpu_lazy(v);
@@ -1663,6 +1722,9 @@ static struct hvm_function_table __initd
.vcpu_destroy = vmx_vcpu_destroy,
.save_cpu_ctxt = vmx_save_vmcs_ctxt,
.load_cpu_ctxt = vmx_load_vmcs_ctxt,
+ .init_msr = vmx_init_msr,
+ .save_msr = vmx_save_msr,
+ .load_msr = vmx_load_msr,
.get_interrupt_shadow = vmx_get_interrupt_shadow,
.set_interrupt_shadow = vmx_set_interrupt_shadow,
.guest_x86_mode = vmx_guest_x86_mode,
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -109,6 +109,10 @@ struct hvm_function_table {
void (*save_cpu_ctxt)(struct vcpu *v, struct hvm_hw_cpu *ctxt);
int (*load_cpu_ctxt)(struct vcpu *v, struct hvm_hw_cpu *ctxt);
+ unsigned int (*init_msr)(void);
+ void (*save_msr)(struct vcpu *, struct hvm_msr *);
+ int (*load_msr)(struct vcpu *, struct hvm_msr *);
+
/* Examine specifics of the guest state. */
unsigned int (*get_interrupt_shadow)(struct vcpu *v);
void (*set_interrupt_shadow)(struct vcpu *v, unsigned int intr_shadow);
--- a/xen/include/public/arch-x86/hvm/save.h
+++ b/xen/include/public/arch-x86/hvm/save.h
@@ -592,9 +592,26 @@ struct hvm_tsc_adjust {
DECLARE_HVM_SAVE_TYPE(TSC_ADJUST, 19, struct hvm_tsc_adjust);
+struct hvm_msr {
+ uint32_t count;
+ struct hvm_one_msr {
+ uint32_t index;
+ uint32_t _rsvd;
+ uint64_t val;
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+ } msr[];
+#elif defined(__GNUC__)
+ } msr[0];
+#else
+ } msr[1 /* variable size */];
+#endif
+};
+
+#define CPU_MSR_CODE 20
+
/*
* Largest type-code in use
*/
-#define HVM_SAVE_CODE_MAX 19
+#define HVM_SAVE_CODE_MAX 20
#endif /* __XEN_PUBLIC_HVM_SAVE_X86_H__ */
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -898,6 +898,41 @@ struct xen_domctl_cacheflush {
typedef struct xen_domctl_cacheflush xen_domctl_cacheflush_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_cacheflush_t);
+#if defined(__i386__) || defined(__x86_64__)
+struct xen_domctl_vcpu_msr {
+ uint32_t index;
+ uint32_t reserved;
+ uint64_aligned_t value;
+};
+typedef struct xen_domctl_vcpu_msr xen_domctl_vcpu_msr_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpu_msr_t);
+
+/*
+ * XEN_DOMCTL_set_vcpu_msrs / XEN_DOMCTL_get_vcpu_msrs.
+ *
+ * Input:
+ * - A NULL 'msrs' guest handle is a request for the maximum 'msr_count'.
+ * - Otherwise, 'msr_count' is the number of entries in 'msrs'.
+ *
+ * Output for get:
+ * - If 'msr_count' is less than the number Xen needs to write, -ENOBUFS shall
+ * be returned and 'msr_count' updated to reflect the intended number.
+ * - On success, 'msr_count' shall indicate the number of MSRs written, which
+ * may be less than the maximum if some are not currently used by the vcpu.
+ *
+ * Output for set:
+ * - If Xen encounters an error with a specific MSR, -EINVAL shall be returned
+ * and 'msr_count' shall be set to the offending index, to aid debugging.
+ */
+struct xen_domctl_vcpu_msrs {
+ uint32_t vcpu; /* IN */
+ uint32_t msr_count; /* IN/OUT */
+ XEN_GUEST_HANDLE_64(xen_domctl_vcpu_msr_t) msrs; /* IN/OUT */
+};
+typedef struct xen_domctl_vcpu_msrs xen_domctl_vcpu_msrs_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpu_msrs_t);
+#endif
+
struct xen_domctl {
uint32_t cmd;
#define XEN_DOMCTL_createdomain 1
@@ -968,6 +1003,8 @@ struct xen_domctl {
#define XEN_DOMCTL_getnodeaffinity 69
#define XEN_DOMCTL_set_max_evtchn 70
#define XEN_DOMCTL_cacheflush 71
+#define XEN_DOMCTL_get_vcpu_msrs 72
+#define XEN_DOMCTL_set_vcpu_msrs 73
#define XEN_DOMCTL_gdbsx_guestmemio 1000
#define XEN_DOMCTL_gdbsx_pausevcpu 1001
#define XEN_DOMCTL_gdbsx_unpausevcpu 1002
@@ -1019,6 +1056,7 @@ struct xen_domctl {
#if defined(__i386__) || defined(__x86_64__)
struct xen_domctl_cpuid cpuid;
struct xen_domctl_vcpuextstate vcpuextstate;
+ struct xen_domctl_vcpu_msrs vcpu_msrs;
#endif
struct xen_domctl_set_access_required access_required;
struct xen_domctl_audit_p2m audit_p2m;