File xsa469-05.patch of Package xen.39862
From: Andrew Cooper <andrew.cooper3@citrix.com>
Subject: x86/stubs: Introduce place_ret() to abstract away raw 0xc3's
The Indirect Target Selection speculative vulnerability means that indirect
branches (including RETs) are unsafe when in the first half of a cacheline.
This means it's not safe for logic using the stubs to write raw 0xc3's.
Introduce place_ret() which, for now, writes a raw 0xc3 but will contain
additional logic when return thunks are in use.
stub_selftest() doesn't strictly need to be converted as they only run on
boot, but doing so gets us a partial test of place_ret() too.
No functional change.
This is part of XSA-469 / CVE-2024-28956
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
--- a/tools/tests/x86_emulator/x86-emulate.h
+++ b/tools/tests/x86_emulator/x86-emulate.h
@@ -64,6 +64,12 @@
#define is_canonical_address(x) (((int64_t)(x) >> 47) == ((int64_t)(x) >> 63))
+static inline void *place_ret(void *ptr)
+{
+ *(uint8_t *)ptr = 0xc3;
+ return ptr + 1;
+}
+
extern uint32_t mxcsr_mask;
extern struct cpu_policy cp;
--- a/xen/arch/x86/Makefile
+++ b/xen/arch/x86/Makefile
@@ -8,9 +8,7 @@ subdir-$(CONFIG_XENOPROF) += oprofile
subdir-$(CONFIG_PV) += pv
subdir-y += x86_64
-alternative-y := alternative.init.o
-alternative-$(CONFIG_LIVEPATCH) :=
-obj-bin-y += $(alternative-y)
+obj-y += alternative.o
obj-y += apic.o
obj-y += bhb-thunk.o
obj-y += bitops.o
@@ -38,7 +36,7 @@ obj-y += hypercall.o
obj-y += i387.o
obj-y += i8259.o
obj-y += io_apic.o
-obj-$(CONFIG_LIVEPATCH) += alternative.o livepatch.o
+obj-$(CONFIG_LIVEPATCH) += livepatch.o
obj-y += msi.o
obj-y += msr.o
obj-$(CONFIG_INDIRECT_THUNK) += indirect-thunk.o
--- a/xen/arch/x86/alternative.c
+++ b/xen/arch/x86/alternative.c
@@ -148,6 +148,20 @@ void init_or_livepatch add_nops(void *in
}
/*
+ * Place a return at @ptr. @ptr must be in the writable alias of a stub.
+ *
+ * Returns the next position to write into the stub.
+ */
+void *place_ret(void *ptr)
+{
+ uint8_t *p = ptr;
+
+ *p++ = 0xc3;
+
+ return p;
+}
+
+/*
* text_poke - Update instructions on a live kernel or non-executed code.
* @addr: address to modify
* @opcode: source of the copy
--- a/xen/arch/x86/extable.c
+++ b/xen/arch/x86/extable.c
@@ -129,19 +129,19 @@ search_exception_table(const struct cpu_
static int __init stub_selftest(void)
{
static const struct {
- uint8_t opc[4];
+ uint8_t opc[3];
uint64_t rax;
union stub_exception_token res;
} tests[] __initconst = {
- { .opc = { 0x0f, 0xb9, 0xc3, 0xc3 }, /* ud1 */
+ { .opc = { 0x0f, 0xb9, 0x90 }, /* ud1 */
.res.fields.trapnr = TRAP_invalid_op },
- { .opc = { 0x90, 0x02, 0x00, 0xc3 }, /* nop; add (%rax),%al */
+ { .opc = { 0x90, 0x02, 0x00 }, /* nop; add (%rax),%al */
.rax = 0x0123456789abcdef,
.res.fields.trapnr = TRAP_gp_fault },
- { .opc = { 0x02, 0x04, 0x04, 0xc3 }, /* add (%rsp,%rax),%al */
+ { .opc = { 0x02, 0x04, 0x04 }, /* add (%rsp,%rax),%al */
.rax = 0xfedcba9876543210,
.res.fields.trapnr = TRAP_stack_error },
- { .opc = { 0xcc, 0xc3, 0xc3, 0xc3 }, /* int3 */
+ { .opc = { 0xcc, 0x90, 0x90 }, /* int3 */
.res.fields.trapnr = TRAP_int3 },
};
unsigned long addr = this_cpu(stubs.addr) + STUB_BUF_SIZE / 2;
@@ -158,6 +158,7 @@ static int __init stub_selftest(void)
memset(ptr, 0xcc, STUB_BUF_SIZE / 2);
memcpy(ptr, tests[i].opc, ARRAY_SIZE(tests[i].opc));
+ place_ret(ptr + ARRAY_SIZE(tests[i].opc));
unmap_domain_page(ptr);
asm volatile ( "INDIRECT_CALL %[stb]\n"
--- a/xen/arch/x86/ioport_emulate.c
+++ b/xen/arch/x86/ioport_emulate.c
@@ -8,7 +8,7 @@
#include <xen/sched.h>
#include <xen/dmi.h>
-static bool ioemul_handle_proliant_quirk(
+static unsigned int ioemul_handle_proliant_quirk(
u8 opcode, char *io_emul_stub, struct cpu_user_regs *regs)
{
static const char stub[] = {
@@ -19,18 +19,17 @@ static bool ioemul_handle_proliant_quirk
0xa8, 0x80, /* test $0x80, %al */
0x75, 0xfb, /* jnz 1b */
0x9d, /* popf */
- 0xc3, /* ret */
};
uint16_t port = regs->dx;
uint8_t value = regs->al;
if ( (opcode != 0xee) || (port != 0xcd4) || !(value & 0x80) )
- return false;
+ return 0;
memcpy(io_emul_stub, stub, sizeof(stub));
BUILD_BUG_ON(IOEMUL_QUIRK_STUB_BYTES < sizeof(stub));
- return true;
+ return sizeof(stub);
}
/* This table is the set of system-specific I/O emulation hooks. */
--- a/xen/arch/x86/pv/emul-priv-op.c
+++ b/xen/arch/x86/pv/emul-priv-op.c
@@ -66,7 +66,7 @@ static io_emul_stub_t *io_emul_stub_setu
struct stubs *this_stubs = &this_cpu(stubs);
unsigned long stub_va = this_stubs->addr + STUB_BUF_SIZE / 2;
long disp;
- bool use_quirk_stub = false;
+ unsigned quirk_stub_used = 0;
if ( !ctxt->io_emul_stub )
ctxt->io_emul_stub =
@@ -79,10 +79,10 @@ static io_emul_stub_t *io_emul_stub_setu
*(int32_t *)&ctxt->io_emul_stub[1] = disp;
if ( unlikely(ioemul_handle_quirk) )
- use_quirk_stub = ioemul_handle_quirk(opcode, &ctxt->io_emul_stub[5],
- ctxt->ctxt.regs);
+ quirk_stub_used = ioemul_handle_quirk(opcode, &ctxt->io_emul_stub[5],
+ ctxt->ctxt.regs);
- if ( !use_quirk_stub )
+ if ( !quirk_stub_used )
{
/* data16 or nop */
ctxt->io_emul_stub[5] = (bytes != 2) ? 0x90 : 0x66;
@@ -90,12 +90,18 @@ static io_emul_stub_t *io_emul_stub_setu
ctxt->io_emul_stub[6] = opcode;
/* imm8 or nop */
ctxt->io_emul_stub[7] = !(opcode & 8) ? port : 0x90;
- /* ret (jumps to guest_to_host_gpr_switch) */
- ctxt->io_emul_stub[8] = 0xc3;
+
+ quirk_stub_used = 8;
}
+ else
+ quirk_stub_used += 5;
+
+ /* ret (jumps to guest_to_host_gpr_switch) */
+ place_ret(&ctxt->io_emul_stub[quirk_stub_used]);
- BUILD_BUG_ON(STUB_BUF_SIZE / 2 < MAX(9, /* Default emul stub */
- 5 + IOEMUL_QUIRK_STUB_BYTES));
+ BUILD_BUG_ON(STUB_BUF_SIZE / 2 < MAX(8, /* Default emul stub */
+ 5 + IOEMUL_QUIRK_STUB_BYTES) +
+ 1 /* ret */);
block_speculation(); /* SCSB */
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -114,7 +114,7 @@ idt_entry_t *idt_tables[NR_CPUS] __read_
*/
DEFINE_PER_CPU_PAGE_ALIGNED(struct tss_page, tss_page);
-bool (*ioemul_handle_quirk)(
+unsigned int (*ioemul_handle_quirk)(
u8 opcode, char *io_emul_stub, struct cpu_user_regs *regs);
static int debug_stack_lines = 20;
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1448,36 +1448,42 @@ static inline bool fpu_check_write(void)
#define emulate_fpu_insn_memdst(opc, ext, arg) \
do { \
+ void *_p = get_stub(stub); \
/* ModRM: mod=0, reg=ext, rm=0, i.e. a (%rax) operand */ \
insn_bytes = 2; \
- memcpy(get_stub(stub), \
- ((uint8_t[]){ opc, ((ext) & 7) << 3, 0xc3 }), 3); \
+ memcpy(_p, ((uint8_t[]){ opc, ((ext) & 7) << 3 }), 2); _p += 2; \
+ place_ret(_p); \
invoke_stub("", "", "+m" (arg) : "a" (&(arg))); \
put_stub(stub); \
} while (0)
#define emulate_fpu_insn_memsrc(opc, ext, arg) \
do { \
+ void *_p = get_stub(stub); \
/* ModRM: mod=0, reg=ext, rm=0, i.e. a (%rax) operand */ \
- memcpy(get_stub(stub), \
- ((uint8_t[]){ opc, ((ext) & 7) << 3, 0xc3 }), 3); \
+ memcpy(_p, ((uint8_t[]){ opc, ((ext) & 7) << 3 }), 2); _p += 2; \
+ place_ret(_p); \
invoke_stub("", "", "=m" (dummy) : "m" (arg), "a" (&(arg))); \
put_stub(stub); \
} while (0)
#define emulate_fpu_insn_stub(bytes...) \
do { \
+ void *_p = get_stub(stub); \
unsigned int nr_ = sizeof((uint8_t[]){ bytes }); \
- memcpy(get_stub(stub), ((uint8_t[]){ bytes, 0xc3 }), nr_ + 1); \
+ memcpy(_p, ((uint8_t[]){ bytes }), nr_); _p += nr_; \
+ place_ret(_p); \
invoke_stub("", "", "=m" (dummy) : "i" (0)); \
put_stub(stub); \
} while (0)
#define emulate_fpu_insn_stub_eflags(bytes...) \
do { \
+ void *_p = get_stub(stub); \
unsigned int nr_ = sizeof((uint8_t[]){ bytes }); \
unsigned long tmp_; \
- memcpy(get_stub(stub), ((uint8_t[]){ bytes, 0xc3 }), nr_ + 1); \
+ memcpy(_p, ((uint8_t[]){ bytes }), nr_); _p += nr_; \
+ place_ret(_p); \
invoke_stub(_PRE_EFLAGS("[eflags]", "[mask]", "[tmp]"), \
_POST_EFLAGS("[eflags]", "[mask]", "[tmp]"), \
[eflags] "+g" (_regs.eflags), [tmp] "=&r" (tmp_) \
@@ -3705,7 +3711,7 @@ x86_emulate(
stb[3] = 0x91;
stb[4] = evex.opmsk << 3;
insn_bytes = 5;
- stb[5] = 0xc3;
+ place_ret(&stb[5]);
invoke_stub("", "", "+m" (op_mask) : "a" (&op_mask));
@@ -6363,7 +6369,7 @@ x86_emulate(
evex.lr = 0;
opc[1] = (modrm & 0x38) | 0xc0;
insn_bytes = EVEX_PFX_BYTES + 2;
- opc[2] = 0xc3;
+ place_ret(&opc[2]);
copy_EVEX(opc, evex);
invoke_stub("", "", "=g" (dummy) : "a" (src.val));
@@ -6430,7 +6436,7 @@ x86_emulate(
insn_bytes = PFX_BYTES + 2;
copy_REX_VEX(opc, rex_prefix, vex);
}
- opc[2] = 0xc3;
+ place_ret(&opc[2]);
ea.reg = decode_gpr(&_regs, modrm_reg);
invoke_stub("", "", "=a" (*ea.reg) : "c" (mmvalp), "m" (*mmvalp));
@@ -6499,7 +6505,7 @@ x86_emulate(
insn_bytes = PFX_BYTES + 2;
copy_REX_VEX(opc, rex_prefix, vex);
}
- opc[2] = 0xc3;
+ place_ret(&opc[2]);
invoke_stub(_PRE_EFLAGS("[eflags]", "[mask]", "[tmp]"),
_POST_EFLAGS("[eflags]", "[mask]", "[tmp]"),
@@ -6720,7 +6726,7 @@ x86_emulate(
opc[1] = modrm & 0xc7;
insn_bytes = PFX_BYTES + 2;
simd_0f_to_gpr:
- opc[insn_bytes - PFX_BYTES] = 0xc3;
+ place_ret(&opc[insn_bytes - PFX_BYTES]);
generate_exception_if(ea.type != OP_REG, EXC_UD);
@@ -7106,7 +7112,7 @@ x86_emulate(
vex.w = 0;
opc[1] = modrm & 0x38;
insn_bytes = PFX_BYTES + 2;
- opc[2] = 0xc3;
+ place_ret(&opc[2]);
copy_REX_VEX(opc, rex_prefix, vex);
invoke_stub("", "", "+m" (src.val) : "a" (&src.val));
@@ -7132,7 +7138,7 @@ x86_emulate(
evex.w = 0;
opc[1] = modrm & 0x38;
insn_bytes = EVEX_PFX_BYTES + 2;
- opc[2] = 0xc3;
+ place_ret(&opc[2]);
copy_EVEX(opc, evex);
invoke_stub("", "", "+m" (src.val) : "a" (&src.val));
@@ -7313,7 +7319,7 @@ x86_emulate(
opc[2] = imm1;
insn_bytes = PFX_BYTES + 3;
simd_0f_reg_only:
- opc[insn_bytes - PFX_BYTES] = 0xc3;
+ place_ret(&opc[insn_bytes - PFX_BYTES]);
copy_REX_VEX(opc, rex_prefix, vex);
invoke_stub("", "", [dummy_out] "=g" (dummy) : [dummy_in] "i" (0) );
@@ -7618,7 +7624,7 @@ x86_emulate(
if ( !mode_64bit() )
vex.w = 0;
opc[1] = modrm & 0xf8;
- opc[2] = 0xc3;
+ place_ret(&opc[2]);
copy_VEX(opc, vex);
ea.reg = decode_gpr(&_regs, modrm_rm);
@@ -7661,7 +7667,7 @@ x86_emulate(
if ( !mode_64bit() )
vex.w = 0;
opc[1] = modrm & 0xc7;
- opc[2] = 0xc3;
+ place_ret(&opc[2]);
copy_VEX(opc, vex);
invoke_stub("", "", "=a" (dst.val) : [dummy] "i" (0));
@@ -7691,7 +7697,7 @@ x86_emulate(
opc = init_prefixes(stub);
opc[0] = b;
opc[1] = modrm;
- opc[2] = 0xc3;
+ place_ret(&opc[2]);
copy_VEX(opc, vex);
invoke_stub(_PRE_EFLAGS("[eflags]", "[mask]", "[tmp]"),
@@ -8505,7 +8511,7 @@ x86_emulate(
if ( !mode_64bit() )
vex.w = 0;
opc[1] = modrm & 0xc7;
- opc[2] = 0xc3;
+ place_ret(&opc[2]);
copy_REX_VEX(opc, rex_prefix, vex);
invoke_stub("", "", "=a" (ea.val) : [dummy] "i" (0));
@@ -8636,7 +8642,7 @@ x86_emulate(
opc[1] &= 0x38;
}
insn_bytes = PFX_BYTES + 2;
- opc[2] = 0xc3;
+ place_ret(&opc[2]);
if ( vex.opcx == vex_none )
{
/* Cover for extra prefix byte. */
@@ -8915,7 +8921,7 @@ x86_emulate(
pvex->b = !mode_64bit() || (vex.reg >> 3);
opc[1] = 0xc0 | (~vex.reg & 7);
pvex->reg = 0xf;
- opc[2] = 0xc3;
+ place_ret(&opc[2]);
invoke_stub("", "", "=a" (ea.val) : [dummy] "i" (0));
put_stub(stub);
@@ -9145,7 +9151,7 @@ x86_emulate(
evex.w = 0;
opc[1] = modrm & 0xf8;
insn_bytes = EVEX_PFX_BYTES + 2;
- opc[2] = 0xc3;
+ place_ret(&opc[2]);
copy_EVEX(opc, evex);
invoke_stub("", "", "=g" (dummy) : "a" (src.val));
@@ -9240,7 +9246,7 @@ x86_emulate(
pvex->b = 1;
opc[1] = (modrm_reg & 7) << 3;
pvex->reg = 0xf;
- opc[2] = 0xc3;
+ place_ret(&opc[2]);
invoke_stub("", "", "=m" (*mmvalp) : "a" (mmvalp));
@@ -9310,7 +9316,7 @@ x86_emulate(
pvex->b = 1;
opc[1] = (modrm_reg & 7) << 3;
pvex->reg = 0xf;
- opc[2] = 0xc3;
+ place_ret(&opc[2]);
invoke_stub("", "", "+m" (*mmvalp) : "a" (mmvalp));
@@ -9366,7 +9372,7 @@ x86_emulate(
pevex->b = 1;
opc[1] = (modrm_reg & 7) << 3;
pevex->RX = 1;
- opc[2] = 0xc3;
+ place_ret(&opc[2]);
invoke_stub("", "", "=m" (*mmvalp) : "a" (mmvalp));
@@ -9431,7 +9437,7 @@ x86_emulate(
pevex->b = 1;
opc[1] = (modrm_reg & 7) << 3;
pevex->RX = 1;
- opc[2] = 0xc3;
+ place_ret(&opc[2]);
invoke_stub("", "", "+m" (*mmvalp) : "a" (mmvalp));
@@ -9445,7 +9451,7 @@ x86_emulate(
opc[2] = 0x90;
/* Use (%rax) as source. */
opc[3] = evex.opmsk << 3;
- opc[4] = 0xc3;
+ place_ret(&opc[4]);
invoke_stub("", "", "+m" (op_mask) : "a" (&op_mask));
put_stub(stub);
@@ -9539,7 +9545,7 @@ x86_emulate(
pevex->b = 1;
opc[1] = (modrm_reg & 7) << 3;
pevex->RX = 1;
- opc[2] = 0xc3;
+ place_ret(&opc[2]);
invoke_stub("", "", "=m" (*mmvalp) : "a" (mmvalp));
@@ -9596,7 +9602,7 @@ x86_emulate(
opc[2] = 0x90;
/* Use (%rax) as source. */
opc[3] = evex.opmsk << 3;
- opc[4] = 0xc3;
+ place_ret(&opc[4]);
invoke_stub("", "", "+m" (op_mask) : "a" (&op_mask));
put_stub(stub);
@@ -9665,7 +9671,7 @@ x86_emulate(
pevex->r = !mode_64bit() || !(state->sib_index & 0x08);
pevex->R = !mode_64bit() || !(state->sib_index & 0x10);
pevex->RX = 1;
- opc[2] = 0xc3;
+ place_ret(&opc[2]);
invoke_stub("", "", "=m" (index) : "a" (&index));
put_stub(stub);
@@ -9838,7 +9844,7 @@ x86_emulate(
pvex->reg = 0xf; /* rAX */
buf[3] = b;
buf[4] = 0x09; /* reg=rCX r/m=(%rCX) */
- buf[5] = 0xc3;
+ place_ret(&buf[5]);
src.reg = decode_vex_gpr(vex.reg, &_regs, ctxt);
emulate_stub([dst] "=&c" (dst.val), "[dst]" (&src.val), "a" (*src.reg));
@@ -9872,7 +9878,7 @@ x86_emulate(
pvex->reg = 0xf; /* rAX */
buf[3] = b;
buf[4] = (modrm & 0x38) | 0x01; /* r/m=(%rCX) */
- buf[5] = 0xc3;
+ place_ret(&buf[5]);
dst.reg = decode_vex_gpr(vex.reg, &_regs, ctxt);
emulate_stub("=&a" (dst.val), "c" (&src.val));
@@ -10039,7 +10045,7 @@ x86_emulate(
evex.w = vex.w = 0;
opc[1] = modrm & 0x38;
opc[2] = imm1;
- opc[3] = 0xc3;
+ place_ret(&opc[3]);
if ( vex.opcx == vex_none )
{
/* Cover for extra prefix byte. */
@@ -10206,7 +10212,7 @@ x86_emulate(
insn_bytes = PFX_BYTES + 3;
copy_VEX(opc, vex);
}
- opc[3] = 0xc3;
+ place_ret(&opc[3]);
/* Latch MXCSR - we may need to restore it below. */
invoke_stub("stmxcsr %[mxcsr]", "",
@@ -10434,7 +10440,7 @@ x86_emulate(
}
opc[2] = imm1;
insn_bytes = PFX_BYTES + 3;
- opc[3] = 0xc3;
+ place_ret(&opc[3]);
if ( vex.opcx == vex_none )
{
/* Cover for extra prefix byte. */
@@ -10588,7 +10594,7 @@ x86_emulate(
pxop->reg = 0xf; /* rAX */
buf[3] = b;
buf[4] = (modrm & 0x38) | 0x01; /* r/m=(%rCX) */
- buf[5] = 0xc3;
+ place_ret(&buf[5]);
dst.reg = decode_vex_gpr(vex.reg, &_regs, ctxt);
emulate_stub([dst] "=&a" (dst.val), "c" (&src.val));
@@ -10693,7 +10699,7 @@ x86_emulate(
buf[3] = b;
buf[4] = 0x09; /* reg=rCX r/m=(%rCX) */
*(uint32_t *)(buf + 5) = imm1;
- buf[9] = 0xc3;
+ place_ret(&buf[9]);
emulate_stub([dst] "=&c" (dst.val), "[dst]" (&src.val));
@@ -10760,12 +10766,12 @@ x86_emulate(
BUG();
if ( evex_encoded() )
{
- opc[insn_bytes - EVEX_PFX_BYTES] = 0xc3;
+ place_ret(&opc[insn_bytes - EVEX_PFX_BYTES]);
copy_EVEX(opc, evex);
}
else
{
- opc[insn_bytes - PFX_BYTES] = 0xc3;
+ place_ret(&opc[insn_bytes - PFX_BYTES]);
copy_REX_VEX(opc, rex_prefix, vex);
}
--- a/xen/include/asm-x86/alternative.h
+++ b/xen/include/asm-x86/alternative.h
@@ -30,6 +30,8 @@ struct __packed alt_instr {
#define ALT_REPL_PTR(a) __ALT_PTR(a, repl_offset)
extern void add_nops(void *insns, unsigned int len);
+void *place_ret(void *ptr);
+
/* Similar to alternative_instructions except it can be run with IRQs enabled. */
extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
extern void alternative_instructions(void);
--- a/xen/include/asm-x86/io.h
+++ b/xen/include/asm-x86/io.h
@@ -50,7 +50,7 @@ __OUT(l,,int)
/* Function pointer used to handle platform specific I/O port emulation. */
#define IOEMUL_QUIRK_STUB_BYTES 10
-extern bool (*ioemul_handle_quirk)(
+extern unsigned int (*ioemul_handle_quirk)(
u8 opcode, char *io_emul_stub, struct cpu_user_regs *regs);
#endif