File 5a6b36cd-3-x86-migrate-MSR_SPEC_CTRL.patch of Package xen.11319

# Commit 0cf2a4eb769302b7d7d7835540e7b2f15006df30
# Date 2018-01-26 14:10:21 +0000
# Author Andrew Cooper <andrew.cooper3@citrix.com>
# Committer Andrew Cooper <andrew.cooper3@citrix.com>
x86/migrate: Move MSR_SPEC_CTRL on migrate

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Wei Liu <wei.liu2@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>

--- a/tools/libxc/xc_domain_save.c
+++ b/tools/libxc/xc_domain_save.c
@@ -1974,6 +1974,26 @@ int xc_domain_save(xc_interface *xch, in
             goto out;
         }
 
+        /* Check there are no PV MSRs in use. */
+        domctl.cmd = XEN_DOMCTL_get_vcpu_msrs;
+        domctl.domain = dom;
+        memset(&domctl.u, 0, sizeof(domctl.u));
+        domctl.u.vcpu_msrs.vcpu = i;
+        domctl.u.vcpu_msrs.msr_count = 0;
+        set_xen_guest_handle_raw(domctl.u.vcpu_msrs.msrs, (void*)1);
+
+        if ( xc_domctl(xch, &domctl) < 0 )
+        {
+            if ( errno == ENOBUFS )
+            {
+                errno = EOPNOTSUPP;
+                PERROR("Unable to migrate PV guest using MSRs (yet)");
+            }
+            else
+                PERROR("Error querying maximum number of MSRs for VCPU%d", i);
+            goto out;
+        }
+
         /* Start to fetch CPU eXtended States */
         /* Get buffer size first */
         domctl.cmd = XEN_DOMCTL_getvcpuextstate;
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -1275,6 +1275,112 @@ long arch_do_domctl(
     }
     break;
 
+    case XEN_DOMCTL_get_vcpu_msrs:
+    case XEN_DOMCTL_set_vcpu_msrs:
+    {
+        struct xen_domctl_vcpu_msrs *vmsrs = &domctl->u.vcpu_msrs;
+        struct xen_domctl_vcpu_msr msr;
+        struct vcpu *v;
+        unsigned int i, nr_msrs = 0, edx, dummy;
+        bool_t has_ibrsb;
+
+        ret = -ESRCH;
+        if ( (vmsrs->vcpu >= d->max_vcpus) ||
+             ((v = d->vcpu[vmsrs->vcpu]) == NULL) )
+            break;
+
+        ret = -EINVAL;
+        if ( (v == current) || /* no vcpu_pause() */
+             !is_pv_domain(d) )
+            break;
+
+        domain_cpuid(d, 7, 0, &dummy, &dummy, &dummy, &edx);
+        has_ibrsb = !!(edx & cpufeat_mask(X86_FEATURE_IBRSB));
+        nr_msrs += has_ibrsb;
+
+        if ( domctl->cmd == XEN_DOMCTL_get_vcpu_msrs )
+        {
+            ret = 0; copyback = 1;
+
+            /* NULL guest handle is a request for max size. */
+            if ( guest_handle_is_null(vmsrs->msrs) )
+                vmsrs->msr_count = nr_msrs;
+            else
+            {
+                i = 0;
+
+                vcpu_pause(v);
+
+                if ( has_ibrsb && v->arch.spec_ctrl )
+                {
+                    if ( i < vmsrs->msr_count && !ret )
+                    {
+                        msr.index = MSR_SPEC_CTRL;
+                        msr.reserved = 0;
+                        msr.value = v->arch.spec_ctrl;
+                        if ( copy_to_guest_offset(vmsrs->msrs, i, &msr, 1) )
+                            ret = -EFAULT;
+                    }
+                    ++i;
+                }
+
+                vcpu_unpause(v);
+
+                if ( i > vmsrs->msr_count && !ret )
+                    ret = -ENOBUFS;
+                vmsrs->msr_count = i;
+            }
+        }
+        else
+        {
+            ret = -EINVAL;
+            if ( vmsrs->msr_count > nr_msrs )
+                break;
+
+            vcpu_pause(v);
+
+            for ( i = 0; i < vmsrs->msr_count; ++i )
+            {
+                ret = -EFAULT;
+                if ( copy_from_guest_offset(&msr, vmsrs->msrs, i, 1) )
+                    break;
+
+                ret = -EINVAL;
+                if ( msr.reserved )
+                    break;
+
+                switch ( msr.index )
+                {
+                case MSR_SPEC_CTRL:
+                    if ( !boot_cpu_has(X86_FEATURE_IBRSB) )
+                        break; /* MSR available? */
+
+                    /*
+                     * Note: SPEC_CTRL_STIBP is specified as safe to use (i.e.
+                     * ignored) when STIBP isn't enumerated in hardware.
+                     */
+
+                    if ( msr.value & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP) )
+                        break;
+                    v->arch.spec_ctrl = msr.value;
+                    continue;
+                }
+                break;
+            }
+
+            vcpu_unpause(v);
+
+            if ( i == vmsrs->msr_count )
+                ret = 0;
+            else
+            {
+                vmsrs->msr_count = i;
+                copyback = 1;
+            }
+        }
+    }
+    break;
+
     default:
         ret = iommu_do_domctl(domctl, d, u_domctl);
         break;
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -1218,10 +1218,123 @@ static int hvm_load_cpu_xsave_states(str
     return 0;
 }
 
-/* We need variable length data chunk for xsave area, hence customized
- * declaration other than HVM_REGISTER_SAVE_RESTORE.
+#define HVM_CPU_MSR_SIZE(cnt) offsetof(struct hvm_msr, msr[cnt])
+static unsigned int __read_mostly msr_count_max;
+
+static int hvm_save_cpu_msrs(struct domain *d, hvm_domain_context_t *h)
+{
+    struct vcpu *v;
+
+    for_each_vcpu ( d, v )
+    {
+        struct hvm_save_descriptor *d = _p(&h->data[h->cur]);
+        struct hvm_msr *ctxt;
+        unsigned int i;
+
+        if ( _hvm_init_entry(h, CPU_MSR_CODE, v->vcpu_id,
+                             HVM_CPU_MSR_SIZE(msr_count_max)) )
+            return 1;
+        ctxt = (struct hvm_msr *)&h->data[h->cur];
+        ctxt->count = 0;
+
+        if ( hvm_funcs.save_msr )
+            hvm_funcs.save_msr(v, ctxt);
+
+        for ( i = 0; i < ctxt->count; ++i )
+            ctxt->msr[i]._rsvd = 0;
+
+        if ( ctxt->count )
+        {
+            /* Rewrite length to indicate how much space we actually used. */
+            d->length = HVM_CPU_MSR_SIZE(ctxt->count);
+            h->cur += HVM_CPU_MSR_SIZE(ctxt->count);
+        }
+        else
+            /* or rewind and remove the descriptor from the stream. */
+            h->cur -= sizeof(struct hvm_save_descriptor);
+    }
+
+    return 0;
+}
+
+static int hvm_load_cpu_msrs(struct domain *d, hvm_domain_context_t *h)
+{
+    unsigned int i, vcpuid = hvm_load_instance(h);
+    struct vcpu *v;
+    const struct hvm_save_descriptor *desc;
+    struct hvm_msr *ctxt;
+    int err = 0;
+
+    if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL )
+    {
+        dprintk(XENLOG_G_ERR, "HVM restore: dom%d has no vcpu%u\n",
+                d->domain_id, vcpuid);
+        return -EINVAL;
+    }
+
+    /* Customized checking for entry since our entry is of variable length */
+    desc = (struct hvm_save_descriptor *)&h->data[h->cur];
+    if ( sizeof (*desc) > h->size - h->cur)
+    {
+        printk(XENLOG_G_WARNING
+               "HVM%d.%d restore: not enough data left to read MSR descriptor\n",
+               d->domain_id, vcpuid);
+        return -ENODATA;
+    }
+    if ( desc->length + sizeof (*desc) > h->size - h->cur)
+    {
+        printk(XENLOG_G_WARNING
+               "HVM%d.%d restore: not enough data left to read %u MSR bytes\n",
+               d->domain_id, vcpuid, desc->length);
+        return -ENODATA;
+    }
+    if ( desc->length < HVM_CPU_MSR_SIZE(1) )
+    {
+        printk(XENLOG_G_WARNING
+               "HVM%d.%d restore mismatch: MSR length %u < %zu\n",
+               d->domain_id, vcpuid, desc->length, HVM_CPU_MSR_SIZE(1));
+        return -EINVAL;
+    }
+
+    h->cur += sizeof(*desc);
+    ctxt = (struct hvm_msr *)&h->data[h->cur];
+    h->cur += desc->length;
+
+    if ( desc->length != HVM_CPU_MSR_SIZE(ctxt->count) )
+    {
+        printk(XENLOG_G_WARNING
+               "HVM%d.%d restore mismatch: MSR length %u != %zu\n",
+               d->domain_id, vcpuid, desc->length,
+               HVM_CPU_MSR_SIZE(ctxt->count));
+        return -EOPNOTSUPP;
+    }
+
+    for ( i = 0; i < ctxt->count; ++i )
+        if ( ctxt->msr[i]._rsvd )
+            return -EOPNOTSUPP;
+    /* Checking finished */
+
+    if ( hvm_funcs.load_msr )
+        err = hvm_funcs.load_msr(v, ctxt);
+
+    for ( i = 0; !err && i < ctxt->count; ++i )
+    {
+        switch ( ctxt->msr[i].index )
+        {
+        default:
+            if ( !ctxt->msr[i]._rsvd )
+                err = -ENXIO;
+            break;
+        }
+    }
+
+    return err;
+}
+
+/* We need variable length data chunks for XSAVE area and MSRs, hence
+ * a custom declaration rather than HVM_REGISTER_SAVE_RESTORE.
  */
-static int __init __hvm_register_CPU_XSAVE_save_and_restore(void)
+static int __init hvm_register_CPU_save_and_restore(void)
 {
     hvm_register_savevm(CPU_XSAVE_CODE,
                         "CPU_XSAVE",
@@ -1230,9 +1343,22 @@ static int __init __hvm_register_CPU_XSA
                         HVM_CPU_XSAVE_SIZE(xfeature_mask) +
                             sizeof(struct hvm_save_descriptor),
                         HVMSR_PER_VCPU);
+
+    if ( hvm_funcs.init_msr )
+        msr_count_max += hvm_funcs.init_msr();
+
+    if ( msr_count_max )
+        hvm_register_savevm(CPU_MSR_CODE,
+                            "CPU_MSR",
+                            hvm_save_cpu_msrs,
+                            hvm_load_cpu_msrs,
+                            HVM_CPU_MSR_SIZE(msr_count_max) +
+                                sizeof(struct hvm_save_descriptor),
+                            HVMSR_PER_VCPU);
+
     return 0;
 }
-__initcall(__hvm_register_CPU_XSAVE_save_and_restore);
+__initcall(hvm_register_CPU_save_and_restore);
 
 int hvm_vcpu_initialise(struct vcpu *v)
 {
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -583,6 +583,65 @@ static int vmx_load_vmcs_ctxt(struct vcp
     return 0;
 }
 
+static unsigned int __init vmx_init_msr(void)
+{
+    return !!boot_cpu_has(X86_FEATURE_IBRSB);
+}
+
+static void vmx_save_msr(struct vcpu *v, struct hvm_msr *ctxt)
+{
+    uint32_t edx, dummy;
+
+    vmx_vmcs_enter(v);
+
+    domain_cpuid(v->domain, 7, 0, &dummy, &dummy, &dummy, &edx);
+    if ( (edx & cpufeat_mask(X86_FEATURE_IBRSB)) && v->arch.spec_ctrl )
+    {
+        ctxt->msr[ctxt->count].index = MSR_SPEC_CTRL;
+        ctxt->msr[ctxt->count++].val = v->arch.spec_ctrl;
+    }
+
+    vmx_vmcs_exit(v);
+}
+
+static int vmx_load_msr(struct vcpu *v, struct hvm_msr *ctxt)
+{
+    unsigned int i;
+    int err = 0;
+
+    vmx_vmcs_enter(v);
+
+    for ( i = 0; i < ctxt->count; ++i )
+    {
+        switch ( ctxt->msr[i].index )
+        {
+        case MSR_SPEC_CTRL:
+            if ( !boot_cpu_has(X86_FEATURE_IBRSB) )
+                err = -ENXIO; /* MSR available? */
+            /*
+             * Note: SPEC_CTRL_STIBP is specified as safe to use (i.e.
+             * ignored) when STIBP isn't enumerated in hardware.
+             */
+            else if ( ctxt->msr[i].val &
+                      ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP) )
+                err = -ENXIO;
+            else
+                v->arch.spec_ctrl = ctxt->msr[i].val;
+            break;
+
+        default:
+            continue;
+        }
+        if ( err )
+            break;
+        ctxt->msr[i]._rsvd = 1;
+    }
+
+    vmx_vmcs_exit(v);
+
+    return err;
+}
+
 static void vmx_fpu_enter(struct vcpu *v)
 {
     vcpu_restore_fpu_lazy(v);
@@ -1663,6 +1722,9 @@ static struct hvm_function_table __initd
     .vcpu_destroy         = vmx_vcpu_destroy,
     .save_cpu_ctxt        = vmx_save_vmcs_ctxt,
     .load_cpu_ctxt        = vmx_load_vmcs_ctxt,
+    .init_msr             = vmx_init_msr,
+    .save_msr             = vmx_save_msr,
+    .load_msr             = vmx_load_msr,
     .get_interrupt_shadow = vmx_get_interrupt_shadow,
     .set_interrupt_shadow = vmx_set_interrupt_shadow,
     .guest_x86_mode       = vmx_guest_x86_mode,
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -109,6 +109,10 @@ struct hvm_function_table {
     void (*save_cpu_ctxt)(struct vcpu *v, struct hvm_hw_cpu *ctxt);
     int (*load_cpu_ctxt)(struct vcpu *v, struct hvm_hw_cpu *ctxt);
 
+    unsigned int (*init_msr)(void);
+    void (*save_msr)(struct vcpu *, struct hvm_msr *);
+    int (*load_msr)(struct vcpu *, struct hvm_msr *);
+
     /* Examine specifics of the guest state. */
     unsigned int (*get_interrupt_shadow)(struct vcpu *v);
     void (*set_interrupt_shadow)(struct vcpu *v, unsigned int intr_shadow);
--- a/xen/include/public/arch-x86/hvm/save.h
+++ b/xen/include/public/arch-x86/hvm/save.h
@@ -592,9 +592,26 @@ struct hvm_tsc_adjust {
 
 DECLARE_HVM_SAVE_TYPE(TSC_ADJUST, 19, struct hvm_tsc_adjust);
 
+struct hvm_msr {
+    uint32_t count;
+    struct hvm_one_msr {
+        uint32_t index;
+        uint32_t _rsvd;
+        uint64_t val;
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+    } msr[];
+#elif defined(__GNUC__)
+    } msr[0];
+#else
+    } msr[1 /* variable size */];
+#endif
+};
+
+#define CPU_MSR_CODE  20
+
 /* 
  * Largest type-code in use
  */
-#define HVM_SAVE_CODE_MAX 19
+#define HVM_SAVE_CODE_MAX 20
 
 #endif /* __XEN_PUBLIC_HVM_SAVE_X86_H__ */
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -898,6 +898,41 @@ struct xen_domctl_cacheflush {
 typedef struct xen_domctl_cacheflush xen_domctl_cacheflush_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_cacheflush_t);
 
+#if defined(__i386__) || defined(__x86_64__)
+struct xen_domctl_vcpu_msr {
+    uint32_t         index;
+    uint32_t         reserved;
+    uint64_aligned_t value;
+};
+typedef struct xen_domctl_vcpu_msr xen_domctl_vcpu_msr_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpu_msr_t);
+
+/*
+ * XEN_DOMCTL_set_vcpu_msrs / XEN_DOMCTL_get_vcpu_msrs.
+ *
+ * Input:
+ * - A NULL 'msrs' guest handle is a request for the maximum 'msr_count'.
+ * - Otherwise, 'msr_count' is the number of entries in 'msrs'.
+ *
+ * Output for get:
+ * - If 'msr_count' is less than the number Xen needs to write, -ENOBUFS shall
+ *   be returned and 'msr_count' updated to reflect the intended number.
+ * - On success, 'msr_count' shall indicate the number of MSRs written, which
+ *   may be less than the maximum if some are not currently used by the vcpu.
+ *
+ * Output for set:
+ * - If Xen encounters an error with a specific MSR, -EINVAL shall be returned
+ *   and 'msr_count' shall be set to the offending index, to aid debugging.
+ */
+struct xen_domctl_vcpu_msrs {
+    uint32_t vcpu;                                   /* IN     */
+    uint32_t msr_count;                              /* IN/OUT */
+    XEN_GUEST_HANDLE_64(xen_domctl_vcpu_msr_t) msrs; /* IN/OUT */
+};
+typedef struct xen_domctl_vcpu_msrs xen_domctl_vcpu_msrs_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpu_msrs_t);
+#endif
+
 struct xen_domctl {
     uint32_t cmd;
 #define XEN_DOMCTL_createdomain                   1
@@ -968,6 +1003,8 @@ struct xen_domctl {
 #define XEN_DOMCTL_getnodeaffinity               69
 #define XEN_DOMCTL_set_max_evtchn                70
 #define XEN_DOMCTL_cacheflush                    71
+#define XEN_DOMCTL_get_vcpu_msrs                 72
+#define XEN_DOMCTL_set_vcpu_msrs                 73
 #define XEN_DOMCTL_gdbsx_guestmemio            1000
 #define XEN_DOMCTL_gdbsx_pausevcpu             1001
 #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
@@ -1019,6 +1056,7 @@ struct xen_domctl {
 #if defined(__i386__) || defined(__x86_64__)
         struct xen_domctl_cpuid             cpuid;
         struct xen_domctl_vcpuextstate      vcpuextstate;
+        struct xen_domctl_vcpu_msrs         vcpu_msrs;
 #endif
         struct xen_domctl_set_access_required access_required;
         struct xen_domctl_audit_p2m         audit_p2m;
openSUSE Build Service is sponsored by