File 5a9eb85c-x86-slightly-reduce-XPTI-overhead.patch of Package xen.8005

# Commit 9d1d31ad9498e6ceb285d5774e34fed5f648c273
# Date 2018-03-06 16:48:44 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86: slightly reduce Meltdown band-aid overhead

I'm not sure why I didn't do this right away: By avoiding the use of
global PTEs in the cloned directmap, there's no need to fiddle with
CR4.PGE on any of the entry paths. Only the exit paths need to flush
global mappings.

The reduced flushing, however, requires that we now have interrupts off
on all entry paths until after the page table switch, so that flush IPIs
can't be serviced while on the restricted pagetables, leaving a window
where a potentially stale guest global mapping can be brought into the
TLB. Along those lines the "sync" IPI after L4 entry updates now needs
to become a real (and global) flush IPI, so that inside Xen we'll also
pick up such changes.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

# Commit c4dd58f0cf23cdf119bbccedfb8c24435fc6f3ab
# Date 2018-03-16 17:27:36 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86: correct EFLAGS.IF in SYSENTER frame

Commit 9d1d31ad94 ("x86: slightly reduce Meltdown band-aid overhead")
moved the STI past the PUSHF. While this isn't an active problem (as we
force EFLAGS.IF to 1 before exiting to guest context), let's not risk
internal confusion by finding a PV guest frame with interrupts
apparently off.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>

--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -3934,14 +3934,9 @@ long do_mmu_update(
     {
         /*
          * Force other vCPU-s of the affected guest to pick up L4 entry
-         * changes (if any). Issue a flush IPI with empty operation mask to
-         * facilitate this (including ourselves waiting for the IPI to
-         * actually have arrived). Utilize the fact that FLUSH_VA_VALID is
-         * meaningless without FLUSH_CACHE, but will allow to pass the no-op
-         * check in flush_area_mask().
+         * changes (if any).
          */
-        flush_area_mask(pt_owner->domain_dirty_cpumask,
-                        ZERO_BLOCK_PTR, FLUSH_VA_VALID);
+        flush_mask(pt_owner->domain_dirty_cpumask, FLUSH_TLB_GLOBAL);
     }
 
     perfc_add(num_page_updates, i);
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -726,6 +726,7 @@ static int clone_mapping(const void *ptr
     }
 
     pl1e += l1_table_offset(linear);
+    flags &= ~_PAGE_GLOBAL;
 
     if ( l1e_get_flags(*pl1e) & _PAGE_PRESENT )
     {
@@ -951,8 +952,16 @@ void __init smp_prepare_cpus(unsigned in
     if ( rc )
         panic("Error %d setting up PV root page table\n", rc);
     if ( per_cpu(root_pgt, 0) )
+    {
         get_cpu_info()->pv_cr3 = __pa(per_cpu(root_pgt, 0));
 
+        /*
+         * All entry points which may need to switch page tables have to start
+         * with interrupts off. Re-write what pv_trap_init() has put there.
+         */
+        _set_gate(idt_table + LEGACY_SYSCALL_VECTOR, 14, 3, &int80_direct_trap);
+    }
+
     if ( !zalloc_cpumask_var(&per_cpu(cpu_sibling_mask, 0)) ||
          !zalloc_cpumask_var(&per_cpu(cpu_core_mask, 0)) )
         panic("No memory for boot CPU sibling/core maps");
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -177,7 +177,7 @@ UNLIKELY_END(exit_cr3)
  * the space left by the trampoline.
  */
 ENTRY(syscall_enter)
-        sti
+        /* sti could live here when we don't switch page tables below. */
         movl  $FLAT_KERNEL_SS,24(%rsp)
         pushq %rcx
         pushq $0
@@ -194,9 +194,10 @@ ENTRY(syscall_enter)
         jz    .Llstar_cr3_okay
         mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
         neg   %rcx
-        write_cr3 rcx, r11, r12
+        mov   %rcx, %cr3
         movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
 .Llstar_cr3_okay:
+        sti
 
         __GET_CURRENT(%rbx)
         movq  VCPU_domain(%rbx),%rcx
@@ -340,7 +341,7 @@ bad_hypercall:
         jmp  test_all_events
 
 ENTRY(sysenter_entry)
-        sti
+        /* sti could live here when we don't switch page tables below. */
         pushq $FLAT_USER_SS
         pushq $0
         pushfq
@@ -355,14 +356,17 @@ GLOBAL(sysenter_eflags_saved)
         /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
 
         GET_STACK_BASE(%rbx)
+        /* PUSHF above has saved EFLAGS.IF clear (the caller had it set). */
+        orl   $X86_EFLAGS_IF, UREGS_eflags(%rsp)
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx
         neg   %rcx
         jz    .Lsyse_cr3_okay
         mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
         neg   %rcx
-        write_cr3 rcx, rdi, rsi
+        mov   %rcx, %cr3
         movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
 .Lsyse_cr3_okay:
+        sti
 
         __GET_CURRENT(%rbx)
         cmpb  $0,VCPU_sysenter_disables_events(%rbx)
@@ -408,9 +412,10 @@ ENTRY(int80_direct_trap)
         jz    .Lint80_cr3_okay
         mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
         neg   %rcx
-        write_cr3 rcx, rdi, rsi
+        mov   %rcx, %cr3
         movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
 .Lint80_cr3_okay:
+        sti
 
         cmpb  $0,untrusted_msi(%rip)
 UNLIKELY_START(ne, msi_check)
@@ -580,7 +585,7 @@ ENTRY(common_interrupt)
         mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
         neg   %rcx
 .Lintr_cr3_load:
-        write_cr3 rcx, rdi, rsi
+        mov   %rcx, %cr3
         xor   %ecx, %ecx
         mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
         testb $3, UREGS_cs(%rsp)
@@ -621,7 +626,7 @@ GLOBAL(handle_exception)
         mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
         neg   %rcx
 .Lxcpt_cr3_load:
-        write_cr3 rcx, rdi, rsi
+        mov   %rcx, %cr3
         xor   %ecx, %ecx
         mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
         testb $3, UREGS_cs(%rsp)
@@ -772,7 +777,7 @@ ENTRY(double_fault)
         jns   .Ldblf_cr3_load
         neg   %rbx
 .Ldblf_cr3_load:
-        write_cr3 rbx, rdi, rsi
+        mov   %rbx, %cr3
 .Ldblf_cr3_okay:
 
         movq  %rsp,%rdi
@@ -806,7 +811,7 @@ handle_ist_exception:
         mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
         neg   %rcx
 .List_cr3_load:
-        write_cr3 rcx, rdi, rsi
+        mov   %rcx, %cr3
         movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
 .List_cr3_okay:
 
openSUSE Build Service is sponsored by