File 5a9eb85c-x86-slightly-reduce-XPTI-overhead.patch of Package xen.8005
# Commit 9d1d31ad9498e6ceb285d5774e34fed5f648c273
# Date 2018-03-06 16:48:44 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86: slightly reduce Meltdown band-aid overhead
I'm not sure why I didn't do this right away: By avoiding the use of
global PTEs in the cloned directmap, there's no need to fiddle with
CR4.PGE on any of the entry paths. Only the exit paths need to flush
global mappings.
The reduced flushing, however, requires that we now have interrupts off
on all entry paths until after the page table switch, so that flush IPIs
can't be serviced while on the restricted pagetables, leaving a window
where a potentially stale guest global mapping can be brought into the
TLB. Along those lines the "sync" IPI after L4 entry updates now needs
to become a real (and global) flush IPI, so that inside Xen we'll also
pick up such changes.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
# Commit c4dd58f0cf23cdf119bbccedfb8c24435fc6f3ab
# Date 2018-03-16 17:27:36 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86: correct EFLAGS.IF in SYSENTER frame
Commit 9d1d31ad94 ("x86: slightly reduce Meltdown band-aid overhead")
moved the STI past the PUSHF. While this isn't an active problem (as we
force EFLAGS.IF to 1 before exiting to guest context), let's not risk
internal confusion by finding a PV guest frame with interrupts
apparently off.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -3934,14 +3934,9 @@ long do_mmu_update(
{
/*
* Force other vCPU-s of the affected guest to pick up L4 entry
- * changes (if any). Issue a flush IPI with empty operation mask to
- * facilitate this (including ourselves waiting for the IPI to
- * actually have arrived). Utilize the fact that FLUSH_VA_VALID is
- * meaningless without FLUSH_CACHE, but will allow to pass the no-op
- * check in flush_area_mask().
+ * changes (if any).
*/
- flush_area_mask(pt_owner->domain_dirty_cpumask,
- ZERO_BLOCK_PTR, FLUSH_VA_VALID);
+ flush_mask(pt_owner->domain_dirty_cpumask, FLUSH_TLB_GLOBAL);
}
perfc_add(num_page_updates, i);
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -726,6 +726,7 @@ static int clone_mapping(const void *ptr
}
pl1e += l1_table_offset(linear);
+ flags &= ~_PAGE_GLOBAL;
if ( l1e_get_flags(*pl1e) & _PAGE_PRESENT )
{
@@ -951,8 +952,16 @@ void __init smp_prepare_cpus(unsigned in
if ( rc )
panic("Error %d setting up PV root page table\n", rc);
if ( per_cpu(root_pgt, 0) )
+ {
get_cpu_info()->pv_cr3 = __pa(per_cpu(root_pgt, 0));
+ /*
+ * All entry points which may need to switch page tables have to start
+ * with interrupts off. Re-write what pv_trap_init() has put there.
+ */
+ _set_gate(idt_table + LEGACY_SYSCALL_VECTOR, 14, 3, &int80_direct_trap);
+ }
+
if ( !zalloc_cpumask_var(&per_cpu(cpu_sibling_mask, 0)) ||
!zalloc_cpumask_var(&per_cpu(cpu_core_mask, 0)) )
panic("No memory for boot CPU sibling/core maps");
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -177,7 +177,7 @@ UNLIKELY_END(exit_cr3)
* the space left by the trampoline.
*/
ENTRY(syscall_enter)
- sti
+ /* sti could live here when we don't switch page tables below. */
movl $FLAT_KERNEL_SS,24(%rsp)
pushq %rcx
pushq $0
@@ -194,9 +194,10 @@ ENTRY(syscall_enter)
jz .Llstar_cr3_okay
mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
neg %rcx
- write_cr3 rcx, r11, r12
+ mov %rcx, %cr3
movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
.Llstar_cr3_okay:
+ sti
__GET_CURRENT(%rbx)
movq VCPU_domain(%rbx),%rcx
@@ -340,7 +341,7 @@ bad_hypercall:
jmp test_all_events
ENTRY(sysenter_entry)
- sti
+ /* sti could live here when we don't switch page tables below. */
pushq $FLAT_USER_SS
pushq $0
pushfq
@@ -355,14 +356,17 @@ GLOBAL(sysenter_eflags_saved)
/* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
GET_STACK_BASE(%rbx)
+ /* PUSHF above has saved EFLAGS.IF clear (the caller had it set). */
+ orl $X86_EFLAGS_IF, UREGS_eflags(%rsp)
mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx
neg %rcx
jz .Lsyse_cr3_okay
mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
neg %rcx
- write_cr3 rcx, rdi, rsi
+ mov %rcx, %cr3
movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
.Lsyse_cr3_okay:
+ sti
__GET_CURRENT(%rbx)
cmpb $0,VCPU_sysenter_disables_events(%rbx)
@@ -408,9 +412,10 @@ ENTRY(int80_direct_trap)
jz .Lint80_cr3_okay
mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
neg %rcx
- write_cr3 rcx, rdi, rsi
+ mov %rcx, %cr3
movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
.Lint80_cr3_okay:
+ sti
cmpb $0,untrusted_msi(%rip)
UNLIKELY_START(ne, msi_check)
@@ -580,7 +585,7 @@ ENTRY(common_interrupt)
mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
neg %rcx
.Lintr_cr3_load:
- write_cr3 rcx, rdi, rsi
+ mov %rcx, %cr3
xor %ecx, %ecx
mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
testb $3, UREGS_cs(%rsp)
@@ -621,7 +626,7 @@ GLOBAL(handle_exception)
mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
neg %rcx
.Lxcpt_cr3_load:
- write_cr3 rcx, rdi, rsi
+ mov %rcx, %cr3
xor %ecx, %ecx
mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
testb $3, UREGS_cs(%rsp)
@@ -772,7 +777,7 @@ ENTRY(double_fault)
jns .Ldblf_cr3_load
neg %rbx
.Ldblf_cr3_load:
- write_cr3 rbx, rdi, rsi
+ mov %rbx, %cr3
.Ldblf_cr3_okay:
movq %rsp,%rdi
@@ -806,7 +811,7 @@ handle_ist_exception:
mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
neg %rcx
.List_cr3_load:
- write_cr3 rcx, rdi, rsi
+ mov %rcx, %cr3
movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
.List_cr3_okay: