File xsa348.patch of Package xen.25143

x86: avoid calling {svm,vmx}_do_resume()

These functions follow the following path: hvm_do_resume() ->
handle_hvm_io_completion() -> hvm_wait_for_io() ->
wait_on_xen_event_channel() -> do_softirq() -> schedule() ->
sched_context_switch() -> continue_running() and hence may
recursively invoke themselves. If this ends up happening a couple of
times, a stack overflow would result.

Prevent this by also resetting the stack at the
->arch.ctxt_switch->tail() invocations (in both places for consistency)
and thus jumping to the functions instead of calling them.

This is XSA-348 / CVE-2020-29566.

Reported-by: Julien Grall <jgrall@amazon.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Juergen Gross <jgross@suse.com>

--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -168,7 +168,7 @@ static void play_dead(void)
     (*dead_idle)();
 }
 
-static void idle_loop(void)
+static void noreturn idle_loop(void)
 {
     for ( ; ; )
     {
@@ -196,12 +196,7 @@ void startup_cpu_idle_loop(void)
     reset_stack_and_jump(idle_loop);
 }
 
-static void noreturn continue_idle_domain(struct vcpu *v)
-{
-    reset_stack_and_jump(idle_loop);
-}
-
-static void noreturn continue_nonidle_domain(struct vcpu *v)
+static void noreturn continue_nonidle_domain(void)
 {
     check_wakeup_from_wait();
     reset_stack_and_jump(ret_from_intr);
@@ -628,7 +623,7 @@ int vcpu_initialise(struct vcpu *v)
 
     if ( is_idle_domain(d) )
     {
-        v->arch.schedule_tail = continue_idle_domain;
+        v->arch.schedule_tail = idle_loop;
         v->arch.cr3           = __pa(idle_pg_table);
     }
 
@@ -2335,12 +2330,12 @@ void context_switch(struct vcpu *prev, s
     /* Ensure that the vcpu has an up-to-date time base. */
     update_vcpu_system_time(next);
 
-    schedule_tail(next);
+    reset_stack_and_jump_ind(next->arch.schedule_tail);
 }
 
 void continue_running(struct vcpu *same)
 {
-    schedule_tail(same);
+    reset_stack_and_jump_ind(same->arch.schedule_tail);
 }
 
 int __sync_local_execstate(void)
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -1088,8 +1088,9 @@ static void svm_ctxt_switch_to(struct vc
         wrmsr_tsc_aux(hvm_msr_tsc_aux(v));
 }
 
-static void noreturn svm_do_resume(struct vcpu *v)
+static void noreturn svm_do_resume(void)
 {
+    struct vcpu *v = current;
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
     bool_t debug_state = v->domain->debugger_attached;
     bool_t vcpu_guestmode = 0;
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -1786,8 +1786,9 @@ void vmx_vmentry_failure(void)
     domain_crash_synchronous();
 }
 
-void vmx_do_resume(struct vcpu *v)
+void vmx_do_resume(void)
 {
+    struct vcpu *v = current;
     bool_t debug_state;
 
     if ( v->arch.hvm_vmx.active_cpu == smp_processor_id() )
--- a/xen/include/asm-x86/current.h
+++ b/xen/include/asm-x86/current.h
@@ -125,25 +125,22 @@ unsigned long get_stack_dump_bottom (uns
 # define CHECK_FOR_LIVEPATCH_WORK ""
 #endif
 
-#define reset_stack_and_jump(__fn)                                      \
+#define switch_stack_and_jump(fn, instr, constr)                        \
     ({                                                                  \
         __asm__ __volatile__ (                                          \
             "mov %0,%%"__OP"sp;"                                        \
-            CHECK_FOR_LIVEPATCH_WORK                                      \
-             "jmp %c1"                                                  \
-            : : "r" (guest_cpu_user_regs()), "i" (__fn) : "memory" );   \
+            CHECK_FOR_LIVEPATCH_WORK                                    \
+            instr "1"                                                   \
+            : : "r" (guest_cpu_user_regs()), constr (fn) : "memory" );  \
         unreachable();                                                  \
     })
 
-/*
- * Schedule tail *should* be a terminal function pointer, but leave a bugframe
- * around just incase it returns, to save going back into the context
- * switching code and leaving a far more subtle crash to diagnose.
- */
-#define schedule_tail(vcpu) do {                \
-        (((vcpu)->arch.schedule_tail)(vcpu));   \
-        BUG();                                  \
-    } while (0)
+#define reset_stack_and_jump(fn)                                        \
+    switch_stack_and_jump(fn, "jmp %c", "i")
+
+/* The constraint may only specify non-call-clobbered registers. */
+#define reset_stack_and_jump_ind(fn)                                    \
+    switch_stack_and_jump(fn, "INDIRECT_JMP %", "b")
 
 /*
  * Which VCPU's state is currently running on each CPU?
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -556,7 +556,7 @@ struct arch_vcpu
 
     unsigned long      flags; /* TF_ */
 
-    void (*schedule_tail) (struct vcpu *);
+    void noreturn (*schedule_tail) (void);
 
     void (*ctxt_switch_from) (struct vcpu *);
     void (*ctxt_switch_to) (struct vcpu *);
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -95,7 +95,7 @@ typedef enum {
 void vmx_asm_vmexit_handler(struct cpu_user_regs);
 void vmx_asm_do_vmentry(void);
 void vmx_intr_assist(void);
-void noreturn vmx_do_resume(struct vcpu *);
+void noreturn vmx_do_resume(void);
 void vmx_vlapic_msr_changed(struct vcpu *v);
 void vmx_realmode_emulate_one(struct hvm_emulate_ctxt *hvmemul_ctxt);
 void vmx_realmode(struct cpu_user_regs *regs);
openSUSE Build Service is sponsored by