File xsa297-3.patch of Package xen

# Commit b01017a0a8ae283e714584f6dae0dc5f5a86456c
# Date 2019-04-18 14:31:59 +0100
# Author Andrew Cooper <andrew.cooper3@citrix.com>
# Committer Andrew Cooper <andrew.cooper3@citrix.com>
x86/spec-ctrl: Introduce options to control VERW flushing

The Microarchitectural Data Sampling vulnerability is split into categories
with subtly different properties:

 * Microarchitectural Load Port Data Sampling
 * Microarchitectural Store Buffer Data Sampling
 * Microarchitectural Fill Buffer Data Sampling

All structures are flushed by the new enhanced VERW functionality, but the
conditions under which flushing is necessary vary.

For this concise overview of the issues and default logic, the abbreviations
SP (Store Port), FB (Fill Buffer), LP (Load Port) and HT (Hyperthreading) are
used for brevity:

 * Vulnerable hardware is divided into two categories - parts with SP only,
   and parts with LP or FB in addition to SP.

 * SP only has an HT interaction when the thread goes idle, due to the static
   partitioning of resources.  LP and FB have HT interactions at all points,
   due to the competitive sharing of resources.  All issues potentially leak
   data across the return-to-guest transition.

 * The microcode which implements VERW flushing also extends MSR_FLUSH_CMD, so
   we don't need to do both on the HVM return-to-guest path.  However, the
   Knights range of processors are immune to L1TF (therefore have no
   MSR_FLUSH_CMD), but are vulnerable to MDS, so do require VERW on the HVM
   path.

Note that we deliberately support mds=1 even without MD_CLEAR in case the
microcode has been updated but the feature bit not exposed.

This is part of XSA-297 / CVE-2018-12126 / CVE-2018-12127 / CVE-2018-12130.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>

--- a/docs/misc/xen-command-line.pandoc
+++ b/docs/misc/xen-command-line.pandoc
@@ -1895,7 +1895,7 @@ not be able to control the state of the
 By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`).
 
 ### spec-ctrl (x86)
-> `= List of [ <bool>, xen=<bool>, {pv,hvm,msr-sc,rsb}=<bool>,
+> `= List of [ <bool>, xen=<bool>, {pv,hvm,msr-sc,rsb,mds}=<bool>,
 >              bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,eager-fpu,
 >              l1d-flush}=<bool> ]`
 
@@ -1919,9 +1919,9 @@ in place for guests to use.
 
 Use of a positive boolean value for either of these options is invalid.
 
-The booleans `pv=`, `hvm=`, `msr-sc=` and `rsb=` offer fine grained control
-over the alternative blocks used by Xen.  These impact Xen's ability to
-protect itself, and Xen's ability to virtualise support for guests to use.
+The booleans `pv=`, `hvm=`, `msr-sc=`, `rsb=` and `mds=` offer fine grained
+control over the alternative blocks used by Xen.  These impact Xen's ability
+to protect itself, and Xen's ability to virtualise support for guests to use.
 
 * `pv=` and `hvm=` offer control over all suboptions for PV and HVM guests
   respectively.
@@ -1930,6 +1930,8 @@ protect itself, and Xen's ability to vir
   guests and if disabled, guests will be unable to use IBRS/STIBP/SSBD/etc.
 * `rsb=` offers control over whether to overwrite the Return Stack Buffer /
   Return Address Stack on entry to Xen.
+* `mds=` offers control over whether to VERW to flush microarchitectural
+  buffers on idle and exit from Xen.
 
 If Xen was compiled with INDIRECT_THUNK support, `bti-thunk=` can be used to
 select which of the thunks gets patched into the `__x86_indirect_thunk_%reg`
--- a/xen/arch/x86/spec_ctrl.c
+++ b/xen/arch/x86/spec_ctrl.c
@@ -35,6 +35,8 @@ static bool __initdata opt_msr_sc_pv = t
 static bool __initdata opt_msr_sc_hvm = true;
 static bool __initdata opt_rsb_pv = true;
 static bool __initdata opt_rsb_hvm = true;
+static int8_t __initdata opt_mds_pv = -1;
+static int8_t __initdata opt_mds_hvm = -1;
 
 /* Cmdline controls for Xen's speculative settings. */
 static enum ind_thunk {
@@ -59,6 +61,9 @@ paddr_t __read_mostly l1tf_addr_mask, __
 static bool __initdata cpu_has_bug_l1tf;
 static unsigned int __initdata l1d_maxphysaddr;
 
+static bool __initdata cpu_has_bug_mds; /* MLPDS and/or MFBDS. */
+static bool __initdata cpu_has_bug_msbds_only;
+
 static int __init parse_spec_ctrl(const char *s)
 {
     const char *ss;
@@ -94,6 +99,8 @@ static int __init parse_spec_ctrl(const
         disable_common:
             opt_rsb_pv = false;
             opt_rsb_hvm = false;
+            opt_mds_pv = 0;
+            opt_mds_hvm = 0;
 
             opt_thunk = THUNK_JMP;
             opt_ibrs = 0;
@@ -116,11 +123,13 @@ static int __init parse_spec_ctrl(const
         {
             opt_msr_sc_pv = val;
             opt_rsb_pv = val;
+            opt_mds_pv = val;
         }
         else if ( (val = parse_boolean("hvm", s, ss)) >= 0 )
         {
             opt_msr_sc_hvm = val;
             opt_rsb_hvm = val;
+            opt_mds_hvm = val;
         }
         else if ( (val = parse_boolean("msr-sc", s, ss)) >= 0 )
         {
@@ -132,6 +141,11 @@ static int __init parse_spec_ctrl(const
             opt_rsb_pv = val;
             opt_rsb_hvm = val;
         }
+        else if ( (val = parse_boolean("mds", s, ss)) >= 0 )
+        {
+            opt_mds_pv = val;
+            opt_mds_hvm = val;
+        }
 
         /* Xen's speculative sidechannel mitigation settings. */
         else if ( !strncmp(s, "bti-thunk=", 10) )
@@ -317,7 +331,7 @@ static void __init print_details(enum in
                "\n");
 
     /* Settings for Xen's protection, irrespective of guests. */
-    printk("  Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s, Other:%s%s\n",
+    printk("  Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s, Other:%s%s%s\n",
            thunk == THUNK_NONE      ? "N/A" :
            thunk == THUNK_RETPOLINE ? "RETPOLINE" :
            thunk == THUNK_LFENCE    ? "LFENCE" :
@@ -327,7 +341,8 @@ static void __init print_details(enum in
            !boot_cpu_has(X86_FEATURE_SSBD)           ? "" :
            (default_xen_spec_ctrl & SPEC_CTRL_SSBD)  ? " SSBD+" : " SSBD-",
            opt_ibpb                                  ? " IBPB"  : "",
-           opt_l1d_flush                             ? " L1D_FLUSH" : "");
+           opt_l1d_flush                             ? " L1D_FLUSH" : "",
+           opt_mds_pv || opt_mds_hvm                 ? " VERW"  : "");
 
     /* L1TF diagnostics, printed if vulnerable or PV shadowing is in use. */
     if ( cpu_has_bug_l1tf || opt_pv_l1tf_hwdom || opt_pv_l1tf_domu )
@@ -737,6 +752,107 @@ static __init void l1tf_calculations(uin
                                             : (3ul << (paddr_bits - 2))));
 }
 
+/* Calculate whether this CPU is vulnerable to MDS. */
+static __init void mds_calculations(uint64_t caps)
+{
+    /* MDS is only known to affect Intel Family 6 processors at this time. */
+    if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
+         boot_cpu_data.x86 != 6 )
+        return;
+
+    /* Any processor advertising MDS_NO should be not vulnerable to MDS. */
+    if ( caps & ARCH_CAPS_MDS_NO )
+        return;
+
+    switch ( boot_cpu_data.x86_model )
+    {
+        /*
+         * Core processors since at least Nehalem are vulnerable.
+         */
+    case 0x1f: /* Auburndale / Havendale */
+    case 0x1e: /* Nehalem */
+    case 0x1a: /* Nehalem EP */
+    case 0x2e: /* Nehalem EX */
+    case 0x25: /* Westmere */
+    case 0x2c: /* Westmere EP */
+    case 0x2f: /* Westmere EX */
+    case 0x2a: /* SandyBridge */
+    case 0x2d: /* SandyBridge EP/EX */
+    case 0x3a: /* IvyBridge */
+    case 0x3e: /* IvyBridge EP/EX */
+    case 0x3c: /* Haswell */
+    case 0x3f: /* Haswell EX/EP */
+    case 0x45: /* Haswell D */
+    case 0x46: /* Haswell H */
+    case 0x3d: /* Broadwell */
+    case 0x47: /* Broadwell H */
+    case 0x4f: /* Broadwell EP/EX */
+    case 0x56: /* Broadwell D */
+    case 0x4e: /* Skylake M */
+    case 0x5e: /* Skylake D */
+        cpu_has_bug_mds = true;
+        break;
+
+        /*
+         * Some Core processors have per-stepping vulnerability.
+         */
+    case 0x55: /* Skylake-X / Cascade Lake */
+        if ( boot_cpu_data.x86_mask <= 5 )
+            cpu_has_bug_mds = true;
+        break;
+
+    case 0x8e: /* Kaby / Coffee / Whiskey Lake M */
+        if ( boot_cpu_data.x86_mask <= 0xb )
+            cpu_has_bug_mds = true;
+        break;
+
+    case 0x9e: /* Kaby / Coffee / Whiskey Lake D */
+        if ( boot_cpu_data.x86_mask <= 0xc )
+            cpu_has_bug_mds = true;
+        break;
+
+        /*
+         * Very old and very new Atom processors are not vulnerable.
+         */
+    case 0x1c: /* Pineview */
+    case 0x26: /* Lincroft */
+    case 0x27: /* Penwell */
+    case 0x35: /* Cloverview */
+    case 0x36: /* Cedarview */
+    case 0x7a: /* Goldmont */
+        break;
+
+        /*
+         * Middling Atom processors are vulnerable to just the Store Buffer
+         * aspect.
+         */
+    case 0x37: /* Baytrail / Valleyview (Silvermont) */
+    case 0x4a: /* Merrifield */
+    case 0x4c: /* Cherrytrail / Brasswell */
+    case 0x4d: /* Avaton / Rangely (Silvermont) */
+    case 0x5a: /* Moorefield */
+    case 0x5d:
+    case 0x65:
+    case 0x6e:
+    case 0x75:
+        /*
+         * Knights processors (which are based on the Silvermont/Airmont
+         * microarchitecture) are similarly only affected by the Store Buffer
+         * aspect.
+         */
+    case 0x57: /* Knights Landing */
+    case 0x85: /* Knights Mill */
+        cpu_has_bug_msbds_only = true;
+        break;
+
+    default:
+        printk("Unrecognised CPU model %#x - assuming vulnerable to MDS\n",
+               boot_cpu_data.x86_model);
+        cpu_has_bug_mds = true;
+        break;
+    }
+}
+
 void __init init_speculation_mitigations(void)
 {
     enum ind_thunk thunk = THUNK_DEFAULT;
@@ -924,6 +1040,47 @@ void __init init_speculation_mitigations
             "enabled.  Please assess your configuration and choose an\n"
             "explicit 'smt=<bool>' setting.  See XSA-273.\n");
 
+    mds_calculations(caps);
+
+    /*
+     * By default, enable PV and HVM mitigations on MDS-vulnerable hardware.
+     * This will only be a token effort for MLPDS/MFBDS when HT is enabled,
+     * but it is somewhat better than nothing.
+     */
+    if ( opt_mds_pv == -1 )
+        opt_mds_pv = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) &&
+                      boot_cpu_has(X86_FEATURE_MD_CLEAR));
+    if ( opt_mds_hvm == -1 )
+        opt_mds_hvm = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) &&
+                       boot_cpu_has(X86_FEATURE_MD_CLEAR));
+
+    /*
+     * Enable MDS defences as applicable.  The PV blocks need using all the
+     * time, and the Idle blocks need using if either PV or HVM defences are
+     * used.
+     *
+     * HVM is more complicated.  The MD_CLEAR microcode extends L1D_FLUSH with
+     * equivelent semantics to avoid needing to perform both flushes on the
+     * HVM path.  The HVM blocks don't need activating if our hypervisor told
+     * us it was handling L1D_FLUSH, or we are using L1D_FLUSH ourselves.
+     */
+    if ( opt_mds_pv )
+        setup_force_cpu_cap(X86_FEATURE_SC_VERW_PV);
+    if ( opt_mds_pv || opt_mds_hvm )
+        setup_force_cpu_cap(X86_FEATURE_SC_VERW_IDLE);
+    if ( opt_mds_hvm && !(caps & ARCH_CAPS_SKIP_L1DFL) && !opt_l1d_flush )
+        setup_force_cpu_cap(X86_FEATURE_SC_VERW_HVM);
+
+    /*
+     * Warn the user if they are on MLPDS/MFBDS-vulnerable hardware with HT
+     * active and no explicit SMT choice.
+     */
+    if ( opt_smt == -1 && cpu_has_bug_mds && hw_smt_enabled )
+        warning_add(
+            "Booted on MLPDS/MFBDS-vulnerable hardware with SMT/Hyperthreading\n"
+            "enabled.  Mitigations will not be fully effective.  Please\n"
+            "choose an explicit smt=<bool> setting.  See XSA-297.\n");
+
     print_details(thunk, caps);
 
     /*
openSUSE Build Service is sponsored by