File 5a856a2b-x86-xpti-hide-almost-all-of-Xen-image-mappings.patch of Package xen

# Commit 422588e88511d17984544c0f017a927de3315290
# Date 2018-02-15 11:08:27 +0000
# Author Andrew Cooper <andrew.cooper3@citrix.com>
# Committer Andrew Cooper <andrew.cooper3@citrix.com>
x86/xpti: Hide almost all of .text and all .data/.rodata/.bss mappings

The current XPTI implementation isolates the directmap (and therefore a lot of
guest data), but a large quantity of CPU0's state (including its stack)
remains visible.

Furthermore, an attacker able to read .text is in a vastly superior position
to normal when it comes to fingerprinting Xen for known vulnerabilities, or
scanning for ROP/Spectre gadgets.

Collect together the entrypoints in .text.entry (currently 3x4k frames, but
can almost certainly be slimmed down), and create a common mapping which is
inserted into each per-cpu shadow.  The stubs are also inserted into this
mapping by pointing at the in-use L2.  This allows stubs allocated later (SMP
boot, or CPU hotplug) to work without further changes to the common mappings.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>

# Commit d1d6fc97d66cf56847fc0bcc2ddc370707c22378
# Date 2018-03-06 16:46:27 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/xpti: really hide almost all of Xen image

Commit 422588e885 ("x86/xpti: Hide almost all of .text and all
.data/.rodata/.bss mappings") carefully limited the Xen image cloning to
just entry code, but then overwrote the just allocated and populated L3
entry with the normal one again covering both Xen image and stubs.

Drop the respective code in favor of an explicit clone_mapping()
invocation. This in turn now requires setup_cpu_root_pgt() to run after
stub setup in all cases. Additionally, with (almost) no unintended
mappings left, the BSP's IDT now also needs to be page aligned.

The moving ahead of cleanup_cpu_root_pgt() is not strictly necessary
for functionality, but things are more logical this way, and we retain
cleanup being done in the inverse order of setup.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>

# Commit 044fedfaa29b5d5774196e3fc7d955a48bfceac4
# Date 2018-03-09 15:42:24 +0000
# Author Andrew Cooper <andrew.cooper3@citrix.com>
# Committer Andrew Cooper <andrew.cooper3@citrix.com>
x86/traps: Put idt_table[] back into .bss

c/s d1d6fc97d "x86/xpti: really hide almost all of Xen image" accidentially
moved idt_table[] from .bss to .data by virtue of using the page_aligned
section.  We also have .bss.page_aligned, so use that.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Wei Liu <wei.liu2@citrix.com>

--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -1897,9 +1897,6 @@ mode.
 Override default selection of whether to isolate 64-bit PV guest page
 tables.
 
-** WARNING: Not yet a complete isolation implementation, but better than
-nothing. **
-
 ### xsave
 > `= <boolean>`
 
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -644,13 +644,24 @@ static int clone_mapping(const void *ptr
 {
     unsigned long linear = (unsigned long)ptr, pfn;
     unsigned int flags;
-    l3_pgentry_t *pl3e = l4e_to_l3e(idle_pg_table[root_table_offset(linear)]) +
-                         l3_table_offset(linear);
+    l3_pgentry_t *pl3e;
     l2_pgentry_t *pl2e;
     l1_pgentry_t *pl1e;
 
-    if ( linear < DIRECTMAP_VIRT_START )
-        return 0;
+    /*
+     * Sanity check 'linear'.  We only allow cloning from the Xen virtual
+     * range, and in particular, only from the directmap and .text ranges.
+     */
+    if ( root_table_offset(linear) > ROOT_PAGETABLE_LAST_XEN_SLOT ||
+         root_table_offset(linear) < ROOT_PAGETABLE_FIRST_XEN_SLOT )
+        return -EINVAL;
+
+    if ( linear < XEN_VIRT_START ||
+         (linear >= XEN_VIRT_END && linear < DIRECTMAP_VIRT_START) )
+        return -EINVAL;
+
+    pl3e = l4e_to_l3e(idle_pg_table[root_table_offset(linear)]) +
+        l3_table_offset(linear);
 
     flags = l3e_get_flags(*pl3e);
     ASSERT(flags & _PAGE_PRESENT);
@@ -742,6 +753,10 @@ static __read_mostly int8_t opt_xpti = -
 boolean_param("xpti", opt_xpti);
 DEFINE_PER_CPU(root_pgentry_t *, root_pgt);
 
+static root_pgentry_t common_pgt;
+
+extern const char _stextentry[], _etextentry[];
+
 static int setup_cpu_root_pgt(unsigned int cpu)
 {
     root_pgentry_t *rpt;
@@ -762,8 +777,23 @@ static int setup_cpu_root_pgt(unsigned i
         idle_pg_table[root_table_offset(RO_MPT_VIRT_START)];
     /* SH_LINEAR_PT inserted together with guest mappings. */
     /* PERDOMAIN inserted during context switch. */
-    rpt[root_table_offset(XEN_VIRT_START)] =
-        idle_pg_table[root_table_offset(XEN_VIRT_START)];
+
+    /* One-time setup of common_pgt, which maps .text.entry and the stubs. */
+    if ( unlikely(!root_get_intpte(common_pgt)) )
+    {
+        const char *ptr;
+
+        for ( rc = 0, ptr = _stextentry;
+              !rc && ptr < _etextentry; ptr += PAGE_SIZE )
+            rc = clone_mapping(ptr, rpt);
+
+        if ( rc )
+            return rc;
+
+        common_pgt = rpt[root_table_offset(XEN_VIRT_START)];
+    }
+
+    rpt[root_table_offset(XEN_VIRT_START)] = common_pgt;
 
     /* Install direct map page table entries for stack, IDT, and TSS. */
     for ( off = rc = 0; !rc && off < STACK_SIZE; off += PAGE_SIZE )
@@ -773,6 +803,8 @@ static int setup_cpu_root_pgt(unsigned i
         rc = clone_mapping(idt_tables[cpu], rpt);
     if ( !rc )
         rc = clone_mapping(&per_cpu(init_tss, cpu), rpt);
+    if ( !rc )
+        rc = clone_mapping((void *)per_cpu(stubs.addr, cpu), rpt);
 
     return rc;
 }
@@ -781,6 +813,7 @@ static void cleanup_cpu_root_pgt(unsigne
 {
     root_pgentry_t *rpt = per_cpu(root_pgt, cpu);
     unsigned int r;
+    unsigned long stub_linear = per_cpu(stubs.addr, cpu);
 
     if ( !rpt )
         return;
@@ -825,6 +858,16 @@ static void cleanup_cpu_root_pgt(unsigne
     }
 
     free_xen_pagetable(rpt);
+
+    /* Also zap the stub mapping for this CPU. */
+    if ( stub_linear )
+    {
+        l3_pgentry_t *l3t = l4e_to_l3e(common_pgt);
+        l2_pgentry_t *l2t = l3e_to_l2e(l3t[l3_table_offset(stub_linear)]);
+        l1_pgentry_t *l1t = l2e_to_l1e(l2t[l2_table_offset(stub_linear)]);
+
+        l1t[l2_table_offset(stub_linear)] = l1e_empty();
+    }
 }
 
 static void cpu_smpboot_free(unsigned int cpu)
@@ -848,6 +891,8 @@ static void cpu_smpboot_free(unsigned in
     if ( per_cpu(scratch_cpumask, cpu) != &scratch_cpu0mask )
         free_cpumask_var(per_cpu(scratch_cpumask, cpu));
 
+    cleanup_cpu_root_pgt(cpu);
+
     if ( per_cpu(stubs.addr, cpu) )
     {
         mfn_t mfn = _mfn(per_cpu(stubs.mfn, cpu));
@@ -865,8 +910,6 @@ static void cpu_smpboot_free(unsigned in
             free_domheap_page(mfn_to_page(mfn));
     }
 
-    cleanup_cpu_root_pgt(cpu);
-
     order = get_order_from_pages(NR_RESERVED_GDT_PAGES);
     free_xenheap_pages(per_cpu(gdt_table, cpu), order);
 
@@ -922,9 +965,6 @@ static int cpu_smpboot_alloc(unsigned in
     set_ist(&idt_tables[cpu][TRAP_nmi],           IST_NONE);
     set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE);
 
-    if ( setup_cpu_root_pgt(cpu) )
-        goto oom;
-
     for ( stub_page = 0, i = cpu & ~(STUBS_PER_PAGE - 1);
           i < nr_cpu_ids && i <= (cpu | (STUBS_PER_PAGE - 1)); ++i )
         if ( cpu_online(i) && cpu_to_node(i) == node )
@@ -938,6 +978,9 @@ static int cpu_smpboot_alloc(unsigned in
         goto oom;
     per_cpu(stubs.addr, cpu) = stub_page + STUB_BUF_CPU_OFFS(cpu);
 
+    if ( setup_cpu_root_pgt(cpu) )
+        goto oom;
+
     if ( secondary_socket_cpumask == NULL &&
          (secondary_socket_cpumask = xzalloc(cpumask_t)) == NULL )
         goto oom;
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -102,7 +102,8 @@ DEFINE_PER_CPU_READ_MOSTLY(struct desc_s
 DEFINE_PER_CPU_READ_MOSTLY(struct desc_struct *, compat_gdt_table);
 
 /* Master table, used by CPU0. */
-idt_entry_t idt_table[IDT_ENTRIES];
+idt_entry_t __section(".bss.page_aligned") __aligned(PAGE_SIZE)
+    idt_table[IDT_ENTRIES];
 
 /* Pointer to the IDT of every CPU. */
 idt_entry_t *idt_tables[NR_CPUS] __read_mostly;
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -13,6 +13,8 @@
 #include <public/xen.h>
 #include <irq_vectors.h>
 
+        .section .text.entry, "ax", @progbits
+
 ENTRY(entry_int82)
         ASM_CLAC
         pushq $0
@@ -270,6 +272,9 @@ ENTRY(compat_int80_direct_trap)
         call  compat_create_bounce_frame
         jmp   compat_test_all_events
 
+        /* compat_create_bounce_frame & helpers don't need to be in .text.entry */
+        .text
+
 /* CREATE A BASIC EXCEPTION FRAME ON GUEST OS (RING-1) STACK:            */
 /*   {[ERRCODE,] EIP, CS, EFLAGS, [ESP, SS]}                             */
 /* %rdx: trap_bounce, %rbx: struct vcpu                                  */
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -14,6 +14,8 @@
 #include <public/xen.h>
 #include <irq_vectors.h>
 
+        .section .text.entry, "ax", @progbits
+
 /* %rbx: struct vcpu */
 ENTRY(switch_to_kernel)
         leaq  VCPU_trap_bounce(%rbx),%rdx
@@ -357,6 +359,9 @@ int80_slow_path:
         subq  $2,UREGS_rip(%rsp)
         jmp   handle_exception_saved
 
+        /* create_bounce_frame & helpers don't need to be in .text.entry */
+        .text
+
 /* CREATE A BASIC EXCEPTION FRAME ON GUEST OS STACK:                     */
 /*   { RCX, R11, [ERRCODE,] RIP, CS, RFLAGS, RSP, SS }                   */
 /* %rdx: trap_bounce, %rbx: struct vcpu                                  */
@@ -487,6 +492,8 @@ ENTRY(dom_crash_sync_extable)
         jmp   asm_domain_crash_synchronous /* Does not return */
         .popsection
 
+        .section .text.entry, "ax", @progbits
+
 ENTRY(common_interrupt)
         SAVE_ALL CLAC
 
@@ -846,8 +853,7 @@ GLOBAL(trap_nop)
 
 
 
-.section .rodata, "a", @progbits
-
+        .pushsection .rodata, "a", @progbits
 ENTRY(exception_table)
         .quad do_trap
         .quad do_debug
@@ -873,9 +879,10 @@ ENTRY(exception_table)
         .quad do_reserved_trap /* Architecturally reserved exceptions. */
         .endr
         .size exception_table, . - exception_table
+        .popsection
 
 /* Table of automatically generated entry points.  One per vector. */
-        .section .init.rodata, "a", @progbits
+        .pushsection .init.rodata, "a", @progbits
 GLOBAL(autogen_entrypoints)
         /* pop into the .init.rodata section and record an entry point. */
         .macro entrypoint ent
@@ -884,7 +891,7 @@ GLOBAL(autogen_entrypoints)
         .popsection
         .endm
 
-        .text
+        .popsection
 autogen_stubs: /* Automatically generated stubs. */
 
         vec = 0
--- a/xen/arch/x86/xen.lds.S
+++ b/xen/arch/x86/xen.lds.S
@@ -60,6 +60,13 @@ SECTIONS
         _stext = .;            /* Text and read-only data */
        *(.text)
        *(.text.__x86_indirect_thunk_*)
+
+       . = ALIGN(PAGE_SIZE);
+       _stextentry = .;
+       *(.text.entry)
+       . = ALIGN(PAGE_SIZE);
+       _etextentry = .;
+
        *(.text.cold)
        *(.text.unlikely)
        *(.fixup)
openSUSE Build Service is sponsored by