File xsa297-2.patch of Package xen.23721

x86/spec-ctrl: Infrastructure to use VERW to flush pipeline buffers

Three synthetic features are introduced, as we need individual control of
each, depending on circumstances.  A later change will enable them at
appropriate points.

The verw_sel field doesn't strictly need to live in struct cpu_info.  It lives
there because there is a convenient hole it can fill, and it reduces the
complexity of the SPEC_CTRL_EXIT_TO_{PV,HVM} assembly by avoiding the need for
any temporary stack maintenance.

This is part of XSA-297, CVE-2018-12126, CVE-2018-12127, CVE-2018-12130, CVE-2019-11091.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/x86_64/asm-offsets.c
+++ b/xen/arch/x86/x86_64/asm-offsets.c
@@ -137,6 +137,7 @@ void __dummy__(void)
 
     OFFSET(CPUINFO_guest_cpu_user_regs, struct cpu_info, guest_cpu_user_regs);
     OFFSET(CPUINFO_processor_id, struct cpu_info, processor_id);
+    OFFSET(CPUINFO_verw_sel, struct cpu_info, verw_sel);
     OFFSET(CPUINFO_current_vcpu, struct cpu_info, current_vcpu);
     OFFSET(CPUINFO_cr4, struct cpu_info, cr4);
     OFFSET(CPUINFO_xen_cr3, struct cpu_info, xen_cr3);
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -28,6 +28,9 @@ XEN_CPUFEATURE(SC_RSB_HVM,      (FSCAPIN
 XEN_CPUFEATURE(NO_XPTI,         (FSCAPINTS+0)*32+ 18) /* XPTI mitigation not in use */
 XEN_CPUFEATURE(SC_MSR_IDLE,     (FSCAPINTS+0)*32+ 19) /* (SC_MSR_PV || SC_MSR_HVM) && default_xen_spec_ctrl */
 XEN_CPUFEATURE(MFENCE_RDTSC,    (FSCAPINTS+0)*32+ 20) /* MFENCE synchronizes RDTSC */
+XEN_CPUFEATURE(SC_VERW_PV,      (FSCAPINTS+0)*32+ 21) /* VERW used by Xen for PV */
+XEN_CPUFEATURE(SC_VERW_HVM,     (FSCAPINTS+0)*32+ 22) /* VERW used by Xen for HVM */
+XEN_CPUFEATURE(SC_VERW_IDLE,    (FSCAPINTS+0)*32+ 23) /* VERW used by Xen for idle */
 
 #define NCAPINTS (FSCAPINTS + 1) /* N 32-bit words worth of info */
 
--- a/xen/include/asm-x86/current.h
+++ b/xen/include/asm-x86/current.h
@@ -39,6 +39,7 @@ struct vcpu;
 struct cpu_info {
     struct cpu_user_regs guest_cpu_user_regs;
     unsigned int processor_id;
+    unsigned int verw_sel;
     struct vcpu *current_vcpu;
     unsigned long per_cpu_offset;
     unsigned long cr4;
--- a/xen/include/asm-x86/spec_ctrl.h
+++ b/xen/include/asm-x86/spec_ctrl.h
@@ -53,6 +53,13 @@ static inline void init_shadow_spec_ctrl
     info->shadow_spec_ctrl = 0;
     info->xen_spec_ctrl = default_xen_spec_ctrl;
     info->spec_ctrl_flags = default_spec_ctrl_flags;
+
+    /*
+     * For least latency, the VERW selector should be a writeable data
+     * descriptor resident in the cache.  __HYPERVISOR_DS32 shares a cache
+     * line with __HYPERVISOR_CS, so is expected to be very cache-hot.
+     */
+    info->verw_sel = __HYPERVISOR_DS32;
 }
 
 /* WARNING! `ret`, `call *`, `jmp *` not safe after this call. */
@@ -74,6 +81,23 @@ static always_inline void spec_ctrl_ente
                       "a" (val), "c" (MSR_SPEC_CTRL), "d" (0),
                       "i" (X86_FEATURE_SC_MSR_IDLE));
     barrier();
+
+    /*
+     * Microarchitectural Store Buffer Data Sampling:
+     *
+     * On vulnerable systems, store buffer entries are statically partitioned
+     * between active threads.  When entering idle, our store buffer entries
+     * are re-partitioned to allow the other threads to use them.
+     *
+     * Flush the buffers to ensure that no sensitive data of ours can be
+     * leaked by a sibling after it gets our store buffer entries.
+     *
+     * Note: VERW must be encoded with a memory operand, as it is only that
+     * form which causes a flush.
+     */
+    alternative_input(ASM_NOP8, "verw %[sel]", %c[feat],
+                      [sel] "m" (info->verw_sel),
+                      [feat] "i" (X86_FEATURE_SC_VERW_IDLE));
 }
 
 /* WARNING! `ret`, `call *`, `jmp *` not safe before this call. */
@@ -93,6 +117,17 @@ static always_inline void spec_ctrl_exit
                       "a" (val), "c" (MSR_SPEC_CTRL), "d" (0),
                       "i" (X86_FEATURE_SC_MSR_IDLE));
     barrier();
+
+    /*
+     * Microarchitectural Store Buffer Data Sampling:
+     *
+     * On vulnerable systems, store buffer entries are statically partitioned
+     * between active threads.  When exiting idle, the other threads store
+     * buffer entries are re-partitioned to give us some.
+     *
+     * We now have store buffer entries with stale data from sibling threads.
+     * A flush if necessary will be performed on the return to guest path.
+     */
 }
 
 #endif /* !__X86_SPEC_CTRL_H__ */
--- a/xen/include/asm-x86/spec_ctrl_asm.h
+++ b/xen/include/asm-x86/spec_ctrl_asm.h
@@ -247,12 +247,18 @@
 /* Use when exiting to PV guest context. */
 #define SPEC_CTRL_EXIT_TO_PV                                            \
     ALTERNATIVE __stringify(ASM_NOP24),                                 \
-        DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV
+        DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV;              \
+    ALTERNATIVE __stringify(ASM_NOP8),                                  \
+        __stringify(verw CPUINFO_verw_sel(%rsp)),                       \
+        X86_FEATURE_SC_VERW_PV
 
 /* Use when exiting to HVM guest context. */
 #define SPEC_CTRL_EXIT_TO_HVM                                           \
     ALTERNATIVE __stringify(ASM_NOP24),                                 \
-        DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_HVM
+        DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_HVM;             \
+    ALTERNATIVE __stringify(ASM_NOP8),                                  \
+        __stringify(verw CPUINFO_verw_sel(%rsp)),                       \
+        X86_FEATURE_SC_VERW_HVM
 
 /*
  * Use in IST interrupt/exception context.  May interrupt Xen or PV context.
openSUSE Build Service is sponsored by