File xsa456-0j.patch of Package xen.36362
# Commit 94896de1a98c4289fe6fef9e16ef99fc6ef2efc4
# Date 2024-04-09 16:37:30 +0100
# Author Andrew Cooper <andrew.cooper3@citrix.com>
# Committer Andrew Cooper <andrew.cooper3@citrix.com>
x86/spec-ctrl: Rework conditional safety for SPEC_CTRL_ENTRY_*
Right now, we have a mix of safety strategies in different blocks, making the
logic fragile and hard to follow.
Start addressing this by having a safety LFENCE at the end of the blocks,
which can be patched out if other safety criteria are met.  This will allow us
to simplify the sub-blocks.  For SPEC_CTRL_ENTRY_FROM_IST, simply leave an
LFENCE unconditionally at the end; the IST path is not a fast-path by any
stretch of the imagination.
For SPEC_CTRL_ENTRY_FROM_INTR, the existing description was incorrect.  The
IRET #GP path is non-fatal but can occur with the guest's choice of
MSR_SPEC_CTRL.  It is safe to skip the flush/barrier-like protections when
interrupting Xen, but we must run DO_SPEC_CTRL_ENTRY irrespective.
This will skip RSB stuffing which was previously unconditional even when
interrupting Xen.
AFAICT, this is a missing cleanup from commit 3fffaf9c13e9 ("x86/entry: Avoid
using alternatives in NMI/#MC paths") where we split the IST entry path out of
the main INTR entry path.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Roger Pau Monné <roger.pau@citrix.com>
--- a/xen/arch/x86/hvm/vmx/entry.S
+++ b/xen/arch/x86/hvm/vmx/entry.S
@@ -43,6 +43,7 @@ ENTRY(vmx_asm_vmexit_handler)
             wrmsr
         .endm
         ALTERNATIVE "", restore_spec_ctrl, X86_FEATURE_SC_MSR_HVM
+        ALTERNATIVE "lfence", "", X86_SPEC_NO_LFENCE_ENTRY_VMX
         /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
 
         /* Hardware clears MSR_DEBUGCTL on VMExit.  Reinstate it if debugging Xen. */
--- a/xen/arch/x86/spec_ctrl.c
+++ b/xen/arch/x86/spec_ctrl.c
@@ -2041,6 +2041,58 @@ void __init init_speculation_mitigations
     print_details(thunk);
 
     /*
+     * With the alternative blocks now chosen, see if we need any other
+     * adjustments for safety.
+     *
+     * We compile the LFENCE in, and patch it out if it's not needed.
+     *
+     * Notes:
+     *  - SPEC_CTRL_ENTRY_FROM_SVM doesn't need an LFENCE because it has an
+     *    unconditional STGI.
+     *  - SPEC_CTRL_ENTRY_FROM_IST handles its own safety, without the use of
+     *    alternatives.
+     *  - DO_OVERWRITE_RSB has conditional branches in it, but it's an inline
+     *    sequence.  It is considered safe for uarch reasons.
+     */
+    {
+        /*
+         * SPEC_CTRL_ENTRY_FROM_PV conditional safety
+         *
+         * DO_SPEC_CTRL_ENTRY (X86_FEATURE_SC_MSR_PV if used) is an
+         * unconditional WRMSR as the last action.
+         *
+         * If we have it, or we're not using any prior conditional mitigation,
+         * then it's safe to drop the LFENCE.
+         */
+        if ( boot_cpu_has(X86_FEATURE_SC_MSR_PV) ||
+             !boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) )
+            setup_force_cpu_cap(X86_SPEC_NO_LFENCE_ENTRY_PV);
+
+        /*
+         * SPEC_CTRL_ENTRY_FROM_INTR conditional safety
+         *
+         * DO_SPEC_CTRL_ENTRY (X86_FEATURE_SC_MSR_PV if used) is an
+         * unconditional WRMSR as the last action.
+         *
+         * If we have it, or we have no protections active in the block that
+         * is skipped when interrupting guest context, then it's safe to drop
+         * the LFENCE.
+         */
+        if ( boot_cpu_has(X86_FEATURE_SC_MSR_PV) ||
+             (!boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) &&
+              !boot_cpu_has(X86_FEATURE_SC_RSB_PV)) )
+            setup_force_cpu_cap(X86_SPEC_NO_LFENCE_ENTRY_INTR);
+
+        /*
+         * SPEC_CTRL_ENTRY_FROM_VMX conditional safety
+         *
+         * Currently there are no safety actions with conditional branches, so
+         * no need for the extra safety LFENCE.
+         */
+        setup_force_cpu_cap(X86_SPEC_NO_LFENCE_ENTRY_VMX);
+    }
+
+    /*
      * If MSR_SPEC_CTRL is available, apply Xen's default setting and discard
      * any firmware settings.  For performance reasons, when safe to do so, we
      * delay applying non-zero settings until after dom0 has been constructed.
--- a/xen/include/asm-x86/cpufeatures.h
+++ b/xen/include/asm-x86/cpufeatures.h
@@ -51,5 +51,9 @@ XEN_CPUFEATURE(IBPB_ENTRY_HVM,    X86_SY
 #define X86_BUG_CLFLUSH_MFENCE    X86_BUG( 2) /* MFENCE needed to serialise CLFLUSH */
 #define X86_BUG_IBPB_NO_RET       X86_BUG( 3) /* IBPB doesn't flush the RSB/RAS */
 
+#define X86_SPEC_NO_LFENCE_ENTRY_PV X86_BUG(16) /* (No) safety LFENCE for SPEC_CTRL_ENTRY_PV. */
+#define X86_SPEC_NO_LFENCE_ENTRY_INTR X86_BUG(17) /* (No) safety LFENCE for SPEC_CTRL_ENTRY_INTR. */
+#define X86_SPEC_NO_LFENCE_ENTRY_VMX X86_BUG(18) /* (No) safety LFENCE for SPEC_CTRL_ENTRY_VMX. */
+
 /* Total number of capability words, inc synth and bug words. */
 #define NCAPINTS (FSCAPINTS + X86_NR_SYNTH + X86_NR_BUG) /* N 32-bit words worth of info */
--- a/xen/include/asm-x86/spec_ctrl_asm.h
+++ b/xen/include/asm-x86/spec_ctrl_asm.h
@@ -273,25 +273,37 @@
 
     ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=0),         \
         X86_FEATURE_SC_MSR_PV
+
+    ALTERNATIVE "lfence", "", X86_SPEC_NO_LFENCE_ENTRY_PV
 .endm
 
 /*
  * Used after an exception or maskable interrupt, hitting Xen or PV context.
- * There will either be a guest speculation context, or (barring fatal
- * exceptions) a well-formed Xen speculation context.
+ * There will either be a guest speculation context, or a well-formed Xen
+ * speculation context, with the exception of one case.  IRET #GP handling may
+ * have a guest choice of MSR_SPEC_CTRL.
+ *
+ * Therefore, we can skip the flush/barrier-like protections when hitting Xen,
+ * but we must still run the mode-based protections.
  */
 .macro SPEC_CTRL_ENTRY_FROM_INTR
 /*
  * Requires %rsp=regs, %r14=stack_end, %rdx=0
  * Clobbers %rax, %rcx, %rdx
  */
+    testb $3, UREGS_cs(%rsp)
+    jz .L\@_skip
+
     ALTERNATIVE "", __stringify(DO_SPEC_CTRL_COND_IBPB maybexen=1),     \
         X86_FEATURE_IBPB_ENTRY_PV
 
     ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV
 
+.L\@_skip:
     ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=1),         \
         X86_FEATURE_SC_MSR_PV
+
+    ALTERNATIVE "lfence", "", X86_SPEC_NO_LFENCE_ENTRY_INTR
 .endm
 
 /*
@@ -360,18 +372,9 @@
     movzbl STACK_CPUINFO_FIELD(xen_spec_ctrl)(%r14), %eax
     wrmsr
 
-    /* Opencoded UNLIKELY_START() with no condition. */
-UNLIKELY_DISPATCH_LABEL(\@_serialise):
-    .subsection 1
-    /*
-     * In the case that we might need to set SPEC_CTRL.IBRS for safety, we
-     * need to ensure that an attacker can't poison the `jz .L\@_skip_wrmsr`
-     * to speculate around the WRMSR.  As a result, we need a dispatch
-     * serialising instruction in the else clause.
-     */
 .L\@_skip_msr_spec_ctrl:
+
     lfence
-    UNLIKELY_END(\@_serialise)
 .endm
 
 /*