File xsa435-3.patch of Package xen.31136
From e926771de8f4c56838b17193f8c84bd3ae37e958 Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Wed, 4 Jan 2023 16:32:44 +0000
Subject: x86/spec-ctrl: Mitigate Gather Data Sampling
This is part of XSA-435 / CVE-2022-40982
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
--- a/docs/misc/xen-command-line.pandoc
+++ b/docs/misc/xen-command-line.pandoc
@@ -1970,7 +1970,7 @@ By default SSBD will be mitigated at run
 > `= List of [ <bool>, xen=<bool>, {pv,hvm}=<bool>,
 >              {msr-sc,rsb,md-clear,ibpb-entry}=<bool>|{pv,hvm}=<bool>,
 >              bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,psfd,
->              eager-fpu,l1d-flush,srb-lock,unpriv-mmio}=<bool> ]`
+>              eager-fpu,l1d-flush,srb-lock,unpriv-mmio,gds-mit}=<bool> ]`
 
 Controls for speculative execution sidechannel mitigations.  By default, Xen
 will pick the most appropriate mitigations based on compiled in support,
@@ -2077,6 +2077,14 @@ and/or `SRBDS_CTRL` functionality availa
 release to mitigate cross-domain leakage of data via the MMIO Stale Data
 vulnerabilities.
 
+On all hardware, the `gds-mit=` option can be used to force or prevent Xen
+from mitigating the GDS (Gather Data Sampling) vulnerability.  By default, Xen
+will mitigate GDS on hardware believed to be vulnerable.  On hardware
+supporting GDS_CTRL (requires the August 2023 microcode), and where firmware
+has elected not to lock the configuration, Xen will use GDS_CTRL to mitigate
+GDS with.  Otherwise, Xen will mitigate by disabling AVX, which blocks the use
+of the AVX2 Gather instructions.
+
 ### sync_console
 > `= <boolean>`
 
--- a/xen/arch/x86/spec_ctrl.c
+++ b/xen/arch/x86/spec_ctrl.c
@@ -76,6 +76,7 @@ static bool __initdata cpu_has_bug_mds;
 static int8_t __initdata opt_srb_lock = -1;
 static bool __initdata opt_unpriv_mmio;
 static bool __read_mostly opt_fb_clear_mmio;
+static int8_t __initdata opt_gds_mit = -1;
 
 static int __init parse_spec_ctrl(const char *s)
 {
@@ -128,6 +129,7 @@ static int __init parse_spec_ctrl(const
             opt_l1d_flush = 0;
             opt_srb_lock = 0;
             opt_unpriv_mmio = false;
+            opt_gds_mit = 0;
         }
         else if ( val > 0 )
             rc = -EINVAL;
@@ -281,6 +283,8 @@ static int __init parse_spec_ctrl(const
             opt_srb_lock = val;
         else if ( (val = parse_boolean("unpriv-mmio", s, ss)) >= 0 )
             opt_unpriv_mmio = val;
+        else if ( (val = parse_boolean("gds-mit", s, ss)) >= 0 )
+            opt_gds_mit = val;
         else
             rc = -EINVAL;
 
@@ -1245,6 +1249,158 @@ static __init void mds_calculations(void
     }
 }
 
+static bool __init cpu_has_gds(void)
+{
+    /*
+     * Any part advertising GDS_NO should be not vulnerable to GDS.  This
+     * includes cases where the hypervisor is mitigating behind our backs, or
+     * has synthesized GDS_NO on older parts for levelling purposes.
+     */
+    if ( cpu_has_gds_no )
+        return false;
+
+    /*
+     * On real hardware the GDS_CTRL control only exists on parts vulnerable
+     * to GDS and with up-to-date microcode.  It might also be virtualised by
+     * an aware hypervisor, meaning "somewhere you might migrate to is
+     * vulnerable".
+     */
+    if ( cpu_has_gds_ctrl )
+        return true;
+
+    /*
+     * An attacker requires the use of the AVX2 GATHER instructions to leak
+     * data with GDS.  However, the only way to block those instructions is to
+     * prevent XCR0[2] from being set, which is original AVX.  A hypervisor
+     * might do this as a stopgap mitigation.
+     */
+    if ( !cpu_has_avx )
+        return false;
+
+    /*
+     * GDS affects the Core line from Skylake up to but not including Golden
+     * Cove (Alder Lake, Sapphire Rapids).  Broadwell and older, and the Atom
+     * line, and all hybrid parts are unaffected.
+     */
+    switch ( boot_cpu_data.x86_model )
+    {
+    case 0x55: /* Skylake/Cascade Lake/Cooper Lake SP */
+    case 0x6a: /* Ice Lake SP */
+    case 0x6c: /* Ice Lake D */
+    case 0x7e: /* Ice Lake U/Y */
+    case 0x8c: /* Tiger Lake U */
+    case 0x8d: /* Tiger Lake H */
+    case 0x8e: /* Amber/Kaby/Coffee/Whiskey/Comet lake U/Y */
+    case 0x9e: /* Kaby/Coffee lake H/S/Xeon */
+    case 0xa5: /* Comet Lake H/S */
+    case 0xa6: /* Comet Lake U */
+    case 0xa7: /* Rocket Lake */
+        return true;
+
+    default:
+        /*
+         * If we've got here and are virtualised, we're most likely under a
+         * hypervisor unaware of GDS at which point we've lost.  Err on the
+         * safe side.
+         */
+        return cpu_has_hypervisor;
+    }
+}
+
+static void __init gds_calculations(void)
+{
+    bool cpu_has_bug_gds, mitigated = false;
+
+    /* GDS is only known to affect Intel Family 6 processors at this time. */
+    if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
+         boot_cpu_data.x86 != 6 )
+        return;
+
+    cpu_has_bug_gds = cpu_has_gds();
+
+    /*
+     * If we've got GDS_CTRL, we're either native with up-to-date microcode on
+     * a GDS-vulnerable part, or virtualised under a GDS-aware hypervisor.
+     */
+    if ( cpu_has_gds_ctrl )
+    {
+        bool locked;
+        uint64_t opt_ctrl;
+
+        if ( cpu_has_gds_no )
+        {
+            /*
+             * We don't expect to ever see GDS_CTL and GDS_NO set together.
+             * Complain loudly, and forgo playing with other features.
+             */
+            printk(XENLOG_ERR
+                   "FIRMWARE BUG: CPU %02x-%02x-%02x, ucode 0x%08x: GDS_CTRL && GDS_NO\n",
+                   boot_cpu_data.x86, boot_cpu_data.x86_model,
+                   boot_cpu_data.x86_mask, this_cpu(ucode_cpu_info).cpu_sig.rev);
+            return add_taint(TAINT_CPU_OUT_OF_SPEC);
+        }
+
+        rdmsrl(MSR_MCU_OPT_CTRL, opt_ctrl);
+
+        mitigated = !(opt_ctrl & MCU_OPT_CTRL_GDS_MIT_DIS);
+        locked    =   opt_ctrl & MCU_OPT_CTRL_GDS_MIT_LOCK;
+
+        /*
+         * Firmware will lock the GDS mitigation if e.g. SGX is active.
+         * Alternatively, a hypervisor might virtualise GDS_CTRL as locked.
+         * Warn if the mitigiation is locked and the user requested the
+         * opposite configuration.
+         */
+        if ( locked )
+        {
+            if ( opt_gds_mit >= 0 && opt_gds_mit != mitigated )
+                printk(XENLOG_WARNING
+                       "GDS_MIT locked by firwmare - ignoring spec-ctrl=gds-mit setting\n");
+            opt_gds_mit = mitigated;
+        }
+        else if ( opt_gds_mit == -1 )
+            opt_gds_mit = cpu_has_bug_gds; /* Mitigate GDS by default */
+
+        /*
+         * Latch our choice of GDS_MIT for all CPUs to pick up.  If LOCK is
+         * set, we latch the same value as it currently holds.
+         */
+        set_in_mcu_opt_ctrl(MCU_OPT_CTRL_GDS_MIT_DIS,
+                            opt_gds_mit ? 0 : MCU_OPT_CTRL_GDS_MIT_DIS);
+        mitigated = opt_gds_mit;
+    }
+    else if ( opt_gds_mit == -1 )
+        opt_gds_mit = cpu_has_bug_gds; /* Mitigate GDS by default */
+
+    /*
+     * If we think we're not on vulnerable hardware, or we've mitigated GDS,
+     * synthesize GDS_NO.  This is mostly for the benefit of guests, to inform
+     * them not to panic.
+     */
+    if ( !cpu_has_bug_gds || mitigated )
+        return setup_force_cpu_cap(X86_FEATURE_GDS_NO);
+
+    /*
+     * If all else has failed, mitigate by disabling AVX.  This prevents
+     * guests from enabling %xcr0.ymm, thereby blocking the use of VGATHER
+     * instructions.
+     *
+     * There's at least one affected CPU not expected to recieve a microcode
+     * update, and this is the only remaining mitigation.
+     *
+     * If we're virtualised, this prevents our guests attacking each other,
+     * but it doesn't stop the outer hypervisor's guests attacking us.  Leave
+     * a note to this effect.
+     */
+    if ( cpu_has_avx && opt_gds_mit )
+    {
+        setup_clear_cpu_cap(X86_FEATURE_AVX);
+        printk(XENLOG_WARNING "Mitigating GDS by disabling AVX%s\n",
+               cpu_has_hypervisor ?
+               " while virtualised - protections are best-effort" : "");
+    }
+}
+
 void spec_ctrl_init_domain(struct domain *d)
 {
     bool pv = is_pv_domain(d);
@@ -1630,6 +1786,8 @@ void __init init_speculation_mitigations
                             opt_srb_lock ? 0 : MCU_OPT_CTRL_RNGDS_MITG_DIS);
     }
 
+    gds_calculations();
+
     print_details(thunk);
 
     /*