File xsa469-05.patch of Package xen.39152
From: Andrew Cooper <andrew.cooper3@citrix.com>
Subject: x86/stubs: Introduce place_ret() to abstract away raw 0xc3's
The Indirect Target Selection speculative vulnerability means that indirect
branches (including RETs) are unsafe when in the first half of a cacheline.
This means it's not safe for logic using the stubs to write raw 0xc3's.
Introduce place_ret() which, for now, writes a raw 0xc3 but will contain
additional logic when return thunks are in use.
stub_selftest() doesn't strictly need to be converted as they only run on
boot, but doing so gets us a partial test of place_ret() too.
No functional change.
This is part of XSA-469 / CVE-2024-28956
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
--- a/tools/tests/x86_emulator/x86-emulate.h
+++ b/tools/tests/x86_emulator/x86-emulate.h
@@ -67,6 +67,12 @@
#define is_canonical_address(x) (((int64_t)(x) >> 47) == ((int64_t)(x) >> 63))
+static inline void *place_ret(void *ptr)
+{
+ *(uint8_t *)ptr = 0xc3;
+ return ptr + 1;
+}
+
extern uint32_t mxcsr_mask;
extern struct cpu_policy cp;
--- a/xen/arch/x86/Makefile
+++ b/xen/arch/x86/Makefile
@@ -8,9 +8,7 @@ obj-$(CONFIG_XENOPROF) += oprofile/
obj-$(CONFIG_PV) += pv/
obj-y += x86_64/
-alternative-y := alternative.init.o
-alternative-$(CONFIG_LIVEPATCH) :=
-obj-bin-y += $(alternative-y)
+obj-y += alternative.o
obj-y += apic.o
obj-y += bhb-thunk.o
obj-y += bitops.o
@@ -38,7 +36,7 @@ obj-y += hypercall.o
obj-y += i387.o
obj-y += i8259.o
obj-y += io_apic.o
-obj-$(CONFIG_LIVEPATCH) += alternative.o livepatch.o
+obj-$(CONFIG_LIVEPATCH) += livepatch.o
obj-y += msi.o
obj-y += msr.o
obj-$(CONFIG_INDIRECT_THUNK) += indirect-thunk.o
--- a/xen/arch/x86/alternative.c
+++ b/xen/arch/x86/alternative.c
@@ -149,6 +149,20 @@ void init_or_livepatch add_nops(void *in
}
/*
+ * Place a return at @ptr. @ptr must be in the writable alias of a stub.
+ *
+ * Returns the next position to write into the stub.
+ */
+void *place_ret(void *ptr)
+{
+ uint8_t *p = ptr;
+
+ *p++ = 0xc3;
+
+ return p;
+}
+
+/*
* text_poke - Update instructions on a live kernel or non-executed code.
* @addr: address to modify
* @opcode: source of the copy
--- a/xen/arch/x86/extable.c
+++ b/xen/arch/x86/extable.c
@@ -132,20 +132,20 @@ search_exception_table(const struct cpu_
static int __init stub_selftest(void)
{
static const struct {
- uint8_t opc[8];
+ uint8_t opc[7];
uint64_t rax;
union stub_exception_token res;
} tests[] __initconst = {
#define endbr64 0xf3, 0x0f, 0x1e, 0xfa
- { .opc = { endbr64, 0x0f, 0xb9, 0xc3, 0xc3 }, /* ud1 */
+ { .opc = { endbr64, 0x0f, 0xb9, 0x90 }, /* ud1 */
.res.fields.trapnr = TRAP_invalid_op },
- { .opc = { endbr64, 0x90, 0x02, 0x00, 0xc3 }, /* nop; add (%rax),%al */
+ { .opc = { endbr64, 0x90, 0x02, 0x00 }, /* nop; add (%rax),%al */
.rax = 0x0123456789abcdef,
.res.fields.trapnr = TRAP_gp_fault },
- { .opc = { endbr64, 0x02, 0x04, 0x04, 0xc3 }, /* add (%rsp,%rax),%al */
+ { .opc = { endbr64, 0x02, 0x04, 0x04 }, /* add (%rsp,%rax),%al */
.rax = 0xfedcba9876543210,
.res.fields.trapnr = TRAP_stack_error },
- { .opc = { endbr64, 0xcc, 0xc3, 0xc3, 0xc3 }, /* int3 */
+ { .opc = { endbr64, 0xcc, 0x90, 0x90 }, /* int3 */
.res.fields.trapnr = TRAP_int3 },
#undef endbr64
};
@@ -163,6 +163,7 @@ static int __init stub_selftest(void)
memset(ptr, 0xcc, STUB_BUF_SIZE / 2);
memcpy(ptr, tests[i].opc, ARRAY_SIZE(tests[i].opc));
+ place_ret(ptr + ARRAY_SIZE(tests[i].opc));
unmap_domain_page(ptr);
asm volatile ( "INDIRECT_CALL %[stb]\n"
--- a/xen/include/asm-x86/alternative.h
+++ b/xen/include/asm-x86/alternative.h
@@ -30,6 +30,8 @@ struct __packed alt_instr {
#define ALT_REPL_PTR(a) __ALT_PTR(a, repl_offset)
extern void add_nops(void *insns, unsigned int len);
+void *place_ret(void *ptr);
+
/* Similar to alternative_instructions except it can be run with IRQs enabled. */
extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
extern void alternative_instructions(void);
--- a/xen/arch/x86/pv/emul-priv-op.c
+++ b/xen/arch/x86/pv/emul-priv-op.c
@@ -88,7 +88,6 @@ static io_emul_stub_t *io_emul_stub_setu
0x41, 0x5c, /* pop %r12 */
0x5d, /* pop %rbp */
0x5b, /* pop %rbx */
- 0xc3, /* ret */
};
const struct stubs *this_stubs = &this_cpu(stubs);
@@ -138,11 +137,13 @@ static io_emul_stub_t *io_emul_stub_setu
APPEND_CALL(save_guest_gprs);
APPEND_BUFF(epilogue);
+ p = place_ret(p);
/* Build-time best effort attempt to catch problems. */
BUILD_BUG_ON(STUB_BUF_SIZE / 2 <
(sizeof(prologue) + sizeof(epilogue) + 10 /* 2x call */ +
- MAX(3 /* default stub */, IOEMUL_QUIRK_STUB_BYTES)));
+ MAX(3 /* default stub */, IOEMUL_QUIRK_STUB_BYTES) +
+ 1 /* ret */));
/* Runtime confirmation that we haven't clobbered an adjacent stub. */
BUG_ON(STUB_BUF_SIZE / 2 < (p - ctxt->io_emul_stub));
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1533,36 +1533,42 @@ static inline bool fpu_check_write(void)
#define emulate_fpu_insn_memdst(opc, ext, arg) \
do { \
+ void *_p = get_stub(stub); \
/* ModRM: mod=0, reg=ext, rm=0, i.e. a (%rax) operand */ \
insn_bytes = 2; \
- memcpy(get_stub(stub), \
- ((uint8_t[]){ opc, ((ext) & 7) << 3, 0xc3 }), 3); \
+ memcpy(_p, ((uint8_t[]){ opc, ((ext) & 7) << 3 }), 2); _p += 2; \
+ place_ret(_p); \
invoke_stub("", "", "+m" (arg) : "a" (&(arg))); \
put_stub(stub); \
} while (0)
#define emulate_fpu_insn_memsrc(opc, ext, arg) \
do { \
+ void *_p = get_stub(stub); \
/* ModRM: mod=0, reg=ext, rm=0, i.e. a (%rax) operand */ \
- memcpy(get_stub(stub), \
- ((uint8_t[]){ opc, ((ext) & 7) << 3, 0xc3 }), 3); \
+ memcpy(_p, ((uint8_t[]){ opc, ((ext) & 7) << 3 }), 2); _p += 2; \
+ place_ret(_p); \
invoke_stub("", "", "=m" (dummy) : "m" (arg), "a" (&(arg))); \
put_stub(stub); \
} while (0)
#define emulate_fpu_insn_stub(bytes...) \
do { \
+ void *_p = get_stub(stub); \
unsigned int nr_ = sizeof((uint8_t[]){ bytes }); \
- memcpy(get_stub(stub), ((uint8_t[]){ bytes, 0xc3 }), nr_ + 1); \
+ memcpy(_p, ((uint8_t[]){ bytes }), nr_); _p += nr_; \
+ place_ret(_p); \
invoke_stub("", "", "=m" (dummy) : "i" (0)); \
put_stub(stub); \
} while (0)
#define emulate_fpu_insn_stub_eflags(bytes...) \
do { \
+ void *_p = get_stub(stub); \
unsigned int nr_ = sizeof((uint8_t[]){ bytes }); \
unsigned long tmp_; \
- memcpy(get_stub(stub), ((uint8_t[]){ bytes, 0xc3 }), nr_ + 1); \
+ memcpy(_p, ((uint8_t[]){ bytes }), nr_); _p += nr_; \
+ place_ret(_p); \
invoke_stub(_PRE_EFLAGS("[eflags]", "[mask]", "[tmp]"), \
_POST_EFLAGS("[eflags]", "[mask]", "[tmp]"), \
[eflags] "+g" (_regs.eflags), [tmp] "=&r" (tmp_) \
@@ -3852,7 +3858,7 @@ x86_emulate(
stb[3] = 0x91;
stb[4] = evex.opmsk << 3;
insn_bytes = 5;
- stb[5] = 0xc3;
+ place_ret(&stb[5]);
invoke_stub("", "", "+m" (op_mask) : "a" (&op_mask));
@@ -6751,7 +6757,7 @@ x86_emulate(
evex.lr = 0;
opc[1] = (modrm & 0x38) | 0xc0;
insn_bytes = EVEX_PFX_BYTES + 2;
- opc[2] = 0xc3;
+ place_ret(&opc[2]);
copy_EVEX(opc, evex);
invoke_stub("", "", "=g" (dummy) : "a" (src.val));
@@ -6816,7 +6822,7 @@ x86_emulate(
insn_bytes = PFX_BYTES + 2;
copy_REX_VEX(opc, rex_prefix, vex);
}
- opc[2] = 0xc3;
+ place_ret(&opc[2]);
ea.reg = decode_gpr(&_regs, modrm_reg);
invoke_stub("", "", "=a" (*ea.reg) : "c" (mmvalp), "m" (*mmvalp));
@@ -6883,7 +6889,7 @@ x86_emulate(
insn_bytes = PFX_BYTES + 2;
copy_REX_VEX(opc, rex_prefix, vex);
}
- opc[2] = 0xc3;
+ place_ret(&opc[2]);
_regs.eflags &= ~EFLAGS_MASK;
invoke_stub("",
@@ -7112,7 +7118,7 @@ x86_emulate(
opc[1] = modrm & 0xc7;
insn_bytes = PFX_BYTES + 2;
simd_0f_to_gpr:
- opc[insn_bytes - PFX_BYTES] = 0xc3;
+ place_ret(&opc[insn_bytes - PFX_BYTES]);
generate_exception_if(ea.type != OP_REG, EXC_UD);
@@ -7509,7 +7515,7 @@ x86_emulate(
vex.w = 0;
opc[1] = modrm & 0x38;
insn_bytes = PFX_BYTES + 2;
- opc[2] = 0xc3;
+ place_ret(&opc[2]);
copy_REX_VEX(opc, rex_prefix, vex);
invoke_stub("", "", "+m" (src.val) : "a" (&src.val));
@@ -7537,7 +7543,7 @@ x86_emulate(
evex.w = 0;
opc[1] = modrm & 0x38;
insn_bytes = EVEX_PFX_BYTES + 2;
- opc[2] = 0xc3;
+ place_ret(&opc[2]);
copy_EVEX(opc, evex);
invoke_stub("", "", "+m" (src.val) : "a" (&src.val));
@@ -7732,7 +7738,7 @@ x86_emulate(
#endif /* X86EMUL_NO_SIMD */
simd_0f_reg_only:
- opc[insn_bytes - PFX_BYTES] = 0xc3;
+ place_ret(&opc[insn_bytes - PFX_BYTES]);
copy_REX_VEX(opc, rex_prefix, vex);
invoke_stub("", "", [dummy_out] "=g" (dummy) : [dummy_in] "i" (0) );
@@ -8057,7 +8063,7 @@ x86_emulate(
if ( !mode_64bit() )
vex.w = 0;
opc[1] = modrm & 0xf8;
- opc[2] = 0xc3;
+ place_ret(&opc[2]);
copy_VEX(opc, vex);
ea.reg = decode_gpr(&_regs, modrm_rm);
@@ -8100,7 +8106,7 @@ x86_emulate(
if ( !mode_64bit() )
vex.w = 0;
opc[1] = modrm & 0xc7;
- opc[2] = 0xc3;
+ place_ret(&opc[2]);
copy_VEX(opc, vex);
invoke_stub("", "", "=a" (dst.val) : [dummy] "i" (0));
@@ -8130,7 +8136,7 @@ x86_emulate(
opc = init_prefixes(stub);
opc[0] = b;
opc[1] = modrm;
- opc[2] = 0xc3;
+ place_ret(&opc[2]);
copy_VEX(opc, vex);
_regs.eflags &= ~EFLAGS_MASK;
@@ -9026,7 +9032,7 @@ x86_emulate(
if ( !mode_64bit() )
vex.w = 0;
opc[1] = modrm & 0xc7;
- opc[2] = 0xc3;
+ place_ret(&opc[2]);
copy_REX_VEX(opc, rex_prefix, vex);
invoke_stub("", "", "=a" (ea.val) : [dummy] "i" (0));
@@ -9144,7 +9150,7 @@ x86_emulate(
opc[1] &= 0x38;
}
insn_bytes = PFX_BYTES + 2;
- opc[2] = 0xc3;
+ place_ret(&opc[2]);
if ( vex.opcx == vex_none )
{
/* Cover for extra prefix byte. */
@@ -9423,7 +9429,7 @@ x86_emulate(
pvex->b = !mode_64bit() || (vex.reg >> 3);
opc[1] = 0xc0 | (~vex.reg & 7);
pvex->reg = 0xf;
- opc[2] = 0xc3;
+ place_ret(&opc[2]);
invoke_stub("", "", "=a" (ea.val) : [dummy] "i" (0));
put_stub(stub);
@@ -9683,7 +9689,7 @@ x86_emulate(
evex.w = 0;
opc[1] = modrm & 0xf8;
insn_bytes = EVEX_PFX_BYTES + 2;
- opc[2] = 0xc3;
+ place_ret(&opc[2]);
copy_EVEX(opc, evex);
invoke_stub("", "", "=g" (dummy) : "a" (src.val));
@@ -9782,7 +9788,7 @@ x86_emulate(
pvex->b = 1;
opc[1] = (modrm_reg & 7) << 3;
pvex->reg = 0xf;
- opc[2] = 0xc3;
+ place_ret(&opc[2]);
invoke_stub("", "", "=m" (*mmvalp) : "a" (mmvalp));
@@ -9852,7 +9858,7 @@ x86_emulate(
pvex->b = 1;
opc[1] = (modrm_reg & 7) << 3;
pvex->reg = 0xf;
- opc[2] = 0xc3;
+ place_ret(&opc[2]);
invoke_stub("", "", "+m" (*mmvalp) : "a" (mmvalp));
@@ -9908,7 +9914,7 @@ x86_emulate(
pevex->b = 1;
opc[1] = (modrm_reg & 7) << 3;
pevex->RX = 1;
- opc[2] = 0xc3;
+ place_ret(&opc[2]);
invoke_stub("", "", "=m" (*mmvalp) : "a" (mmvalp));
@@ -9973,7 +9979,7 @@ x86_emulate(
pevex->b = 1;
opc[1] = (modrm_reg & 7) << 3;
pevex->RX = 1;
- opc[2] = 0xc3;
+ place_ret(&opc[2]);
invoke_stub("", "", "+m" (*mmvalp) : "a" (mmvalp));
@@ -9987,7 +9993,7 @@ x86_emulate(
opc[2] = 0x90;
/* Use (%rax) as source. */
opc[3] = evex.opmsk << 3;
- opc[4] = 0xc3;
+ place_ret(&opc[4]);
invoke_stub("", "", "+m" (op_mask) : "a" (&op_mask));
put_stub(stub);
@@ -10081,7 +10087,7 @@ x86_emulate(
pevex->b = 1;
opc[1] = (modrm_reg & 7) << 3;
pevex->RX = 1;
- opc[2] = 0xc3;
+ place_ret(&opc[2]);
invoke_stub("", "", "=m" (*mmvalp) : "a" (mmvalp));
@@ -10159,7 +10165,7 @@ x86_emulate(
opc[2] = 0x90;
/* Use (%rax) as source. */
opc[3] = evex.opmsk << 3;
- opc[4] = 0xc3;
+ place_ret(&opc[4]);
invoke_stub("", "", "+m" (op_mask) : "a" (&op_mask));
put_stub(stub);
@@ -10228,7 +10234,7 @@ x86_emulate(
pevex->r = !mode_64bit() || !(state->sib_index & 0x08);
pevex->R = !mode_64bit() || !(state->sib_index & 0x10);
pevex->RX = 1;
- opc[2] = 0xc3;
+ place_ret(&opc[2]);
invoke_stub("", "", "=m" (index) : "a" (&index));
put_stub(stub);
@@ -10403,7 +10409,7 @@ x86_emulate(
pvex->reg = 0xf; /* rAX */
buf[3] = b;
buf[4] = 0x09; /* reg=rCX r/m=(%rCX) */
- buf[5] = 0xc3;
+ place_ret(&buf[5]);
src.reg = decode_vex_gpr(vex.reg, &_regs, ctxt);
emulate_stub([dst] "=&c" (dst.val), "[dst]" (&src.val), "a" (*src.reg));
@@ -10437,7 +10443,7 @@ x86_emulate(
pvex->reg = 0xf; /* rAX */
buf[3] = b;
buf[4] = (modrm & 0x38) | 0x01; /* r/m=(%rCX) */
- buf[5] = 0xc3;
+ place_ret(&buf[5]);
dst.reg = decode_vex_gpr(vex.reg, &_regs, ctxt);
emulate_stub("=&a" (dst.val), "c" (&src.val));
@@ -10669,7 +10675,7 @@ x86_emulate(
evex.w = vex.w = 0;
opc[1] = modrm & 0x38;
opc[2] = imm1;
- opc[3] = 0xc3;
+ place_ret(&opc[3]);
if ( vex.opcx == vex_none )
{
/* Cover for extra prefix byte. */
@@ -10836,7 +10842,7 @@ x86_emulate(
insn_bytes = PFX_BYTES + 3;
copy_VEX(opc, vex);
}
- opc[3] = 0xc3;
+ place_ret(&opc[3]);
/* Latch MXCSR - we may need to restore it below. */
invoke_stub("stmxcsr %[mxcsr]", "",
@@ -11064,7 +11070,7 @@ x86_emulate(
}
opc[2] = imm1;
insn_bytes = PFX_BYTES + 3;
- opc[3] = 0xc3;
+ place_ret(&opc[3]);
if ( vex.opcx == vex_none )
{
/* Cover for extra prefix byte. */
@@ -11224,7 +11230,7 @@ x86_emulate(
pxop->reg = 0xf; /* rAX */
buf[3] = b;
buf[4] = (modrm & 0x38) | 0x01; /* r/m=(%rCX) */
- buf[5] = 0xc3;
+ place_ret(&buf[5]);
dst.reg = decode_vex_gpr(vex.reg, &_regs, ctxt);
emulate_stub([dst] "=&a" (dst.val), "c" (&src.val));
@@ -11333,7 +11339,7 @@ x86_emulate(
buf[3] = b;
buf[4] = 0x09; /* reg=rCX r/m=(%rCX) */
*(uint32_t *)(buf + 5) = imm1;
- buf[9] = 0xc3;
+ place_ret(&buf[9]);
emulate_stub([dst] "=&c" (dst.val), "[dst]" (&src.val));
@@ -11400,12 +11406,12 @@ x86_emulate(
BUG();
if ( evex_encoded() )
{
- opc[insn_bytes - EVEX_PFX_BYTES] = 0xc3;
+ place_ret(&opc[insn_bytes - EVEX_PFX_BYTES]);
copy_EVEX(opc, evex);
}
else
{
- opc[insn_bytes - PFX_BYTES] = 0xc3;
+ place_ret(&opc[insn_bytes - PFX_BYTES]);
copy_REX_VEX(opc, rex_prefix, vex);
}