File 54db80d3-x86-nmi-shootdown-pcpus-in-VMX-non-root-mode.patch of Package xen.481
# Commit cbeeaa7da01bfa37c1fcdfe79e8f4f1400262ccb
# Date 2015-02-11 17:18:27 +0100
# Author Andrew Cooper <andrew.cooper3@citrix.com>
# Committer Jan Beulich <jbeulich@suse.com>
x86/nmi: fix shootdown of pcpus running in VMX non-root mode
c/s 7dd3b06ff "vmx: fix handling of NMI VMEXIT" fixed one issue but
inadvertently introduced a regression when it came to the NMI shootdown. The
shootdown code worked by patching vector 2 in each IDT, but the introduced
direct call to do_nmi() bypassed this.
Instead of patching each IDT, take a different approach by updating the
existing dispatch table. This allows for the removal of the remote IDT
patching and the removal of the nmi_crash() entry point.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/crash.c
+++ b/xen/arch/x86/crash.c
@@ -38,7 +38,7 @@ static DEFINE_PER_CPU_READ_MOSTLY(bool_t
/* This becomes the NMI handler for non-crashing CPUs, when Xen is crashing. */
void __attribute__((noreturn)) do_nmi_crash(struct cpu_user_regs *regs)
{
- int cpu = smp_processor_id();
+ unsigned int cpu = smp_processor_id();
/* nmi_shootdown_cpus() should ensure that this assertion is correct. */
ASSERT(cpu != crashing_cpu);
@@ -116,7 +116,7 @@ void __attribute__((noreturn)) do_nmi_cr
static void nmi_shootdown_cpus(void)
{
unsigned long msecs;
- int i, cpu = smp_processor_id();
+ unsigned int cpu = smp_processor_id();
disable_lapic_nmi_watchdog();
local_irq_disable();
@@ -126,37 +126,25 @@ static void nmi_shootdown_cpus(void)
cpumask_andnot(&waiting_to_crash, &cpu_online_map, cpumask_of(cpu));
- /* Change NMI trap handlers. Non-crashing pcpus get nmi_crash which
- * invokes do_nmi_crash (above), which cause them to write state and
- * fall into a loop. The crashing pcpu gets the nop handler to
- * cause it to return to this function ASAP.
+ /*
+ * Disable IST for MCEs to avoid stack corruption race conditions, and
+ * change the NMI handler to a nop to avoid deviation from this codepath.
*/
- for ( i = 0; i < nr_cpu_ids; i++ )
- {
- if ( idt_tables[i] == NULL )
- continue;
+ _set_gate_lower(&idt_tables[cpu][TRAP_nmi], 14, 0, &trap_nop);
+ set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE);
- if ( i == cpu )
- {
- /*
- * Disable the interrupt stack tables for this cpu's MCE and NMI
- * handlers, and alter the NMI handler to have no operation.
- * Disabling the stack tables prevents stack corruption race
- * conditions, while changing the handler helps prevent cascading
- * faults; we are certainly going to crash by this point.
- *
- * This update is safe from a security point of view, as this pcpu
- * is never going to try to sysret back to a PV vcpu.
- */
- _set_gate_lower(&idt_tables[i][TRAP_nmi], 14, 0, &trap_nop);
- set_ist(&idt_tables[i][TRAP_machine_check], IST_NONE);
- }
- else
- {
- /* Do not update stack table for other pcpus. */
- _update_gate_addr_lower(&idt_tables[i][TRAP_nmi], &nmi_crash);
- }
- }
+ /*
+ * Ideally would be:
+ * exception_table[TRAP_nmi] = &do_nmi_crash;
+ *
+ * but the exception_table is read only. Borrow an unused fixmap entry
+ * to construct a writable mapping.
+ */
+ set_fixmap(FIX_TBOOT_MAP_ADDRESS, __pa(&exception_table[TRAP_nmi]));
+ write_atomic((unsigned long *)
+ (fix_to_virt(FIX_TBOOT_MAP_ADDRESS) +
+ ((unsigned long)&exception_table[TRAP_nmi] & ~PAGE_MASK)),
+ (unsigned long)&do_nmi_crash);
/* Ensure the new callback function is set before sending out the NMI. */
wmb();
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -658,14 +658,6 @@ handle_ist_exception:
je restore_all_guest
jmp compat_restore_all_guest
-ENTRY(nmi_crash)
- pushq $0
- movl $TRAP_nmi,4(%rsp)
- SAVE_ALL
- movq %rsp,%rdi
- callq do_nmi_crash /* Does not return */
- ud2
-
ENTRY(machine_check)
pushq $0
movl $TRAP_machine_check,4(%rsp)