File 5d516531-x86-xpti-dont-leak-TSS-adjacent-data.patch of Package xen.15650
# Commit 7888440625617693487495a7842e6a991ead2647
# Date 2019-08-12 14:10:09 +0100
# Author Jan Beulich <JBeulich@suse.com>
# Committer Andrew Cooper <andrew.cooper3@citrix.com>
x86/xpti: Don't leak TSS-adjacent percpu data via Meltdown
The XPTI work restricted the visibility of most of memory, but missed a few
aspects when it came to the TSS.
Given that the TSS is just an object in percpu data, the 4k mapping for it
created in setup_cpu_root_pgt() maps adjacent percpu data, making it all
leakable via Meltdown, even when XPTI is in use.
Furthermore, no care is taken to check that the TSS doesn't cross a page
boundary. As it turns out, struct tss_struct is aligned on its size which
does prevent it straddling a page boundary.
Rework the TSS types while making this change. Rename tss_struct to tss64, to
mirror the existing tss32 structure we have in HVM's Tast Switch logic. Drop
tss64's alignment and __cacheline_filler[] field.
Introduce tss_page which contains a single tss64 and keeps the rest of the
page clear, so no adjacent data can be leaked. Move the definition from
setup.c to traps.c, which is a more appropriate place for it to live.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -721,7 +721,7 @@ void load_system_tables(void)
unsigned long stack_bottom = get_stack_bottom(),
stack_top = stack_bottom & ~(STACK_SIZE - 1);
- struct tss_struct *tss = &this_cpu(init_tss);
+ struct tss64 *tss = &this_cpu(tss_page).tss;
struct desc_struct *gdt =
this_cpu(gdt_table) - FIRST_RESERVED_GDT_ENTRY;
struct desc_struct *compat_gdt =
@@ -736,7 +736,7 @@ void load_system_tables(void)
.limit = (IDT_ENTRIES * sizeof(idt_entry_t)) - 1,
};
- *tss = (struct tss_struct){
+ *tss = (struct tss64){
/* Main stack for interrupts/exceptions. */
.rsp0 = stack_bottom,
@@ -761,16 +761,12 @@ void load_system_tables(void)
.bitmap = IOBMP_INVALID_OFFSET,
};
- _set_tssldt_desc(
- gdt + TSS_ENTRY,
- (unsigned long)tss,
- offsetof(struct tss_struct, __cacheline_filler) - 1,
- SYS_DESC_tss_avail);
- _set_tssldt_desc(
- compat_gdt + TSS_ENTRY,
- (unsigned long)tss,
- offsetof(struct tss_struct, __cacheline_filler) - 1,
- SYS_DESC_tss_busy);
+ BUILD_BUG_ON(sizeof(*tss) <= 0x67); /* Mandated by the architecture. */
+
+ _set_tssldt_desc(gdt + TSS_ENTRY, (unsigned long)tss,
+ sizeof(*tss) - 1, SYS_DESC_tss_avail);
+ _set_tssldt_desc(compat_gdt + TSS_ENTRY, (unsigned long)tss,
+ sizeof(*tss) - 1, SYS_DESC_tss_busy);
asm volatile ("lgdt %0" : : "m" (gdtr) );
asm volatile ("lidt %0" : : "m" (idtr) );
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -808,7 +808,7 @@ static void vmx_set_host_env(struct vcpu
__vmwrite(HOST_IDTR_BASE, (unsigned long)idt_tables[cpu]);
__vmwrite(HOST_TR_SELECTOR, TSS_ENTRY << 3);
- __vmwrite(HOST_TR_BASE, (unsigned long)&per_cpu(init_tss, cpu));
+ __vmwrite(HOST_TR_BASE, (unsigned long)&per_cpu(tss_page, cpu).tss);
__vmwrite(HOST_SYSENTER_ESP, get_stack_bottom());
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -15,7 +15,6 @@
#include <xen/domain_page.h>
#include <xen/version.h>
#include <xen/gdbstub.h>
-#include <xen/percpu.h>
#include <xen/hypercall.h>
#include <xen/keyhandler.h>
#include <xen/numa.h>
@@ -100,8 +99,6 @@ unsigned long __read_mostly xen_phys_sta
unsigned long __read_mostly xen_virt_end;
-DEFINE_PER_CPU(struct tss_struct, init_tss);
-
char __section(".bss.stack_aligned") __aligned(STACK_SIZE)
cpu0_stack[STACK_SIZE];
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -825,7 +825,11 @@ static int setup_cpu_root_pgt(unsigned i
if ( !rc )
rc = clone_mapping(idt_tables[cpu], rpt);
if ( !rc )
- rc = clone_mapping(&per_cpu(init_tss, cpu), rpt);
+ {
+ BUILD_BUG_ON(sizeof(this_cpu(tss_page)) != PAGE_SIZE);
+
+ rc = clone_mapping(&per_cpu(tss_page, cpu).tss, rpt);
+ }
if ( !rc )
rc = clone_mapping((void *)per_cpu(stubs.addr, cpu), rpt);
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -106,6 +106,12 @@ idt_entry_t __section(".bss.page_aligned
/* Pointer to the IDT of every CPU. */
idt_entry_t *idt_tables[NR_CPUS] __read_mostly;
+/*
+ * The TSS is smaller than a page, but we give it a full page to avoid
+ * adjacent per-cpu data leaking via Meltdown when XPTI is in use.
+ */
+DEFINE_PER_CPU_PAGE_ALIGNED(struct tss_page, tss_page);
+
void (*ioemul_handle_quirk)(
u8 opcode, char *io_emul_stub, struct cpu_user_regs *regs);
@@ -529,7 +535,7 @@ void show_stack_overflow(unsigned int cp
printk("Valid stack range: %p-%p, sp=%p, tss.rsp0=%p\n",
(void *)esp_top, (void *)esp_bottom, (void *)esp,
- (void *)per_cpu(init_tss, cpu).rsp0);
+ (void *)per_cpu(tss_page, cpu).tss.rsp0);
/*
* Trigger overflow trace if %esp is anywhere within the guard page, or
@@ -1901,7 +1907,7 @@ static void __init set_intr_gate(unsigne
void load_TR(void)
{
- struct tss_struct *tss = &this_cpu(init_tss);
+ struct tss64 *tss = &this_cpu(tss_page).tss;
struct desc_ptr old_gdt, tss_gdt = {
.base = (long)(this_cpu(gdt_table) - FIRST_RESERVED_GDT_ENTRY),
.limit = LAST_RESERVED_GDT_BYTE
@@ -1909,14 +1915,10 @@ void load_TR(void)
_set_tssldt_desc(
this_cpu(gdt_table) + TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY,
- (unsigned long)tss,
- offsetof(struct tss_struct, __cacheline_filler) - 1,
- SYS_DESC_tss_avail);
+ (unsigned long)tss, sizeof(*tss) - 1, SYS_DESC_tss_avail);
_set_tssldt_desc(
this_cpu(compat_gdt_table) + TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY,
- (unsigned long)tss,
- offsetof(struct tss_struct, __cacheline_filler) - 1,
- SYS_DESC_tss_busy);
+ (unsigned long)tss, sizeof(*tss) - 1, SYS_DESC_tss_busy);
/* Switch to non-compat GDT (which has B bit clear) to execute LTR. */
asm volatile (
--- a/xen/include/asm-x86/processor.h
+++ b/xen/include/asm-x86/processor.h
@@ -458,7 +458,7 @@ static always_inline void __mwait(unsign
#define IOBMP_BYTES 8192
#define IOBMP_INVALID_OFFSET 0x8000
-struct __packed __cacheline_aligned tss_struct {
+struct __packed tss64 {
uint32_t :32;
uint64_t rsp0, rsp1, rsp2;
uint64_t :64;
@@ -469,9 +469,11 @@ struct __packed __cacheline_aligned tss_
uint64_t ist[7];
uint64_t :64;
uint16_t :16, bitmap;
- /* Pads the TSS to be cacheline-aligned (total size is 0x80). */
- uint8_t __cacheline_filler[24];
};
+struct tss_page {
+ struct tss64 __aligned(PAGE_SIZE) tss;
+};
+DECLARE_PER_CPU(struct tss_page, tss_page);
#define IST_NONE 0UL
#define IST_DF 1UL
@@ -496,7 +498,6 @@ static always_inline void set_ist(idt_en
extern idt_entry_t idt_table[];
extern idt_entry_t *idt_tables[];
-DECLARE_PER_CPU(struct tss_struct, init_tss);
DECLARE_PER_CPU(root_pgentry_t *, root_pgt);
extern void init_int80_direct_trap(struct vcpu *v);