Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
SUSE:SLE-12-SP3:Update
xen.14030
xsa293-1.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File xsa293-1.patch of Package xen.14030
From: Andrew Cooper <andrew.cooper3@citrix.com> Subject: x86/pv: Rewrite guest %cr4 handling from scratch The PV cr4 logic is almost impossible to follow, and leaks bits into guest context which definitely shouldn't be visible (in particular, VMXE). The biggest problem however, and source of the complexity, is that it derives new real and guest cr4 values from the current value in hardware - this is context dependent and an inappropriate source of information. Rewrite the cr4 logic to be invariant of the current value in hardware. First of all, modify write_ptbase() to always use mmu_cr4_features for IDLE and HVM contexts. mmu_cr4_features *is* the correct value to use, and makes the ASSERT() obviously redundant. For PV guests, curr->arch.pv.ctrlreg[4] remains the guests view of cr4, but all logic gets reworked in terms of this and mmu_cr4_features only. Two masks are introduced; bits which the guest has control over, and bits which are forwarded from Xen's settings. One guest-visible change here is that Xen's VMXE setting is no longer visible at all. pv_make_cr4() follows fairly closely from pv_guest_cr4_to_real_cr4(), but deliberately starts with mmu_cr4_features, and only alters the minimal subset of bits. The boot-time {compat_,}pv_cr4_mask variables are removed, as they are a remnant of the pre-CPUID policy days. pv_fixup_guest_cr4() gains a related derivation from the policy. Another guest visible change here is that a 32bit PV guest can now flip FSGSBASE in its view of CR4. While the {RD,WR}{FS,GS}BASE instructions are unusable outside of a 64bit code segment, the ability to modify FSGSBASE matches real hardware behaviour, and avoids the need for any 32bit/64bit differences in the logic. Overall, this patch shouldn't have a practical change in guest behaviour. VMXE will disappear from view, and an inquisitive 32bit kernel can now see FSGSBASE changing, but this new logic is otherwise bug-compatible with before. This is part of XSA-293 Reported-by: Andy Lutomirski <luto@kernel.org> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> Reviewed-by: Jan Beulich <jbeulich@suse.com> --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -437,6 +437,53 @@ int switch_compat(struct domain *d) return -ENOMEM; } +unsigned long pv_fixup_guest_cr4(const struct vcpu *v, unsigned long cr4) +{ + unsigned int leaf1_ecx = 0, leaf1_edx = 0; + unsigned int leaf7_0_ebx = 0, level = 0; + + pv_cpuid(0, 0, &level, NULL, NULL, NULL); + if ( level >= 1 ) + pv_cpuid(1, 0, NULL, NULL, &leaf1_ecx, &leaf1_edx); + if ( level >= 7 ) + pv_cpuid(7, 0, NULL, &leaf7_0_ebx, NULL, NULL); + + /* Discard attempts to set guest controllable bits outside of the policy. */ + cr4 &= ~(((leaf1_edx & cpufeat_mask(X86_FEATURE_TSC)) + ? 0 : X86_CR4_TSD) | + ((leaf1_edx & cpufeat_mask(X86_FEATURE_DE)) + ? 0 : X86_CR4_DE) | + ((leaf7_0_ebx & cpufeat_mask(X86_FEATURE_FSGSBASE)) + ? 0 : X86_CR4_FSGSBASE) | + ((leaf1_ecx & cpufeat_mask(X86_FEATURE_XSAVE)) + ? 0 : X86_CR4_OSXSAVE)); + + /* Masks expected to be disjoint sets. */ + BUILD_BUG_ON(PV_CR4_GUEST_MASK & PV_CR4_GUEST_VISIBLE_MASK); + + /* + * A guest sees the policy subset of its own choice of guest controllable + * bits, and a subset of Xen's choice of certain hardware settings. + */ + return ((cr4 & PV_CR4_GUEST_MASK) | + (mmu_cr4_features & PV_CR4_GUEST_VISIBLE_MASK)); +} + +unsigned long pv_make_cr4(const struct vcpu *v) +{ + const struct domain *d = v->domain; + unsigned long cr4 = mmu_cr4_features & ~X86_CR4_TSD; + + /* + * TSD is needed if either the guest has elected to use it, or Xen is + * virtualising the TSC value the guest sees. + */ + if ( d->arch.vtsc || (v->arch.pv_vcpu.ctrlreg[4] & X86_CR4_TSD) ) + cr4 |= X86_CR4_TSD; + + return cr4; +} + int vcpu_initialise(struct vcpu *v) { struct domain *d = v->domain; @@ -487,6 +534,8 @@ int vcpu_initialise(struct vcpu *v) /* PV guests by default have a 100Hz ticker. */ v->periodic_period = MILLISECS(10); + + v->arch.pv_vcpu.ctrlreg[4] = pv_fixup_guest_cr4(v, 0); } v->arch.schedule_tail = continue_nonidle_domain; @@ -499,8 +548,6 @@ int vcpu_initialise(struct vcpu *v) v->arch.cr3 = __pa(idle_pg_table); } - v->arch.pv_vcpu.ctrlreg[4] = real_cr4_to_pv_guest_cr4(mmu_cr4_features); - rc = is_pv_32on64_vcpu(v) ? setup_compat_l4(v) : 0; done: if ( rc ) @@ -710,26 +757,6 @@ void arch_domain_unpause(struct domain * viridian_time_ref_count_thaw(d); } -unsigned long pv_guest_cr4_fixup(const struct vcpu *v, unsigned long guest_cr4) -{ - unsigned long hv_cr4_mask, hv_cr4 = real_cr4_to_pv_guest_cr4(read_cr4()); - - hv_cr4_mask = ~X86_CR4_TSD; - if ( cpu_has_de ) - hv_cr4_mask &= ~X86_CR4_DE; - if ( cpu_has_fsgsbase && !is_pv_32bit_domain(v->domain) ) - hv_cr4_mask &= ~X86_CR4_FSGSBASE; - if ( cpu_has_xsave ) - hv_cr4_mask &= ~X86_CR4_OSXSAVE; - - if ( (guest_cr4 & hv_cr4_mask) != (hv_cr4 & hv_cr4_mask) ) - printk(XENLOG_G_WARNING - "d%d attempted to change %pv's CR4 flags %08lx -> %08lx\n", - current->domain->domain_id, v, hv_cr4, guest_cr4); - - return (hv_cr4 & hv_cr4_mask) | (guest_cr4 & ~hv_cr4_mask); -} - #define xen_vcpu_guest_context vcpu_guest_context #define fpu_ctxt fpu_ctxt.x CHECK_FIELD_(struct, vcpu_guest_context, fpu_ctxt); @@ -743,7 +770,7 @@ int arch_set_info_guest( struct domain *d = v->domain; unsigned long cr3_gfn; struct page_info *cr3_page; - unsigned long flags, cr4; + unsigned long flags; unsigned int i; int rc = 0, compat; @@ -956,9 +983,8 @@ int arch_set_info_guest( v->arch.pv_vcpu.ctrlreg[0] &= X86_CR0_TS; v->arch.pv_vcpu.ctrlreg[0] |= read_cr0() & ~X86_CR0_TS; - cr4 = v->arch.pv_vcpu.ctrlreg[4]; - v->arch.pv_vcpu.ctrlreg[4] = cr4 ? pv_guest_cr4_fixup(v, cr4) : - real_cr4_to_pv_guest_cr4(mmu_cr4_features); + v->arch.pv_vcpu.ctrlreg[4] = + pv_fixup_guest_cr4(v, v->arch.pv_vcpu.ctrlreg[4]); memset(v->arch.debugreg, 0, sizeof(v->arch.debugreg)); for ( i = 0; i < 8; i++ ) @@ -1480,7 +1506,7 @@ static void paravirt_ctxt_switch_to(stru l4e_from_page(v->domain->arch.perdomain_l3_pg, __PAGE_HYPERVISOR); - cr4 = pv_guest_cr4_to_real_cr4(v); + cr4 = pv_make_cr4(v); if ( unlikely(cr4 != read_cr4()) ) write_cr4(cr4); --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -763,7 +763,8 @@ static void _domain_cpuid(struct domain unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { - if ( !is_control_domain(currd) && !is_hardware_domain(currd) ) + if ( !is_control_domain(currd) && !is_hardware_domain(currd) && + !is_idle_domain(currd) ) domain_cpuid(currd, leaf, subleaf, eax, ebx, ecx, edx); else cpuid_count(leaf, subleaf, eax, ebx, ecx, edx); @@ -775,7 +776,8 @@ void pv_cpuid(uint32_t leaf, uint32_t su uint32_t a, b, c, d; struct vcpu *curr = current; - if ( !is_control_domain(curr->domain) && !is_hardware_domain(curr->domain) ) + if ( !is_control_domain(curr->domain) && + !is_hardware_domain(curr->domain) && !is_idle_domain(curr->domain) ) { if ( !cpuid_hypervisor_leaves(leaf, sub_leaf, &a, &b, &c, &d) ) domain_cpuid(curr->domain, leaf, sub_leaf, &a, &b, &c, &d); @@ -2495,8 +2497,8 @@ static int emulate_privileged_op(struct } case 4: /* Write CR4 */ - v->arch.pv_vcpu.ctrlreg[4] = pv_guest_cr4_fixup(v, *reg); - write_cr4(pv_guest_cr4_to_real_cr4(v)); + v->arch.pv_vcpu.ctrlreg[4] = pv_fixup_guest_cr4(v, *reg); + write_cr4(pv_make_cr4(v)); break; default: --- a/xen/include/asm-x86/domain.h +++ b/xen/include/asm-x86/domain.h @@ -535,22 +535,22 @@ bool_t update_secondary_system_time(stru void vcpu_show_execution_state(struct vcpu *); void vcpu_show_registers(const struct vcpu *); -/* Clean up CR4 bits that are not under guest control. */ -unsigned long pv_guest_cr4_fixup(const struct vcpu *, unsigned long guest_cr4); +/* + * Bits which a PV guest can toggle in its view of cr4. Some are loaded into + * hardware, while some are fully emulated. + */ +#define PV_CR4_GUEST_MASK \ + (X86_CR4_TSD | X86_CR4_DE | X86_CR4_FSGSBASE | X86_CR4_OSXSAVE) -/* Convert between guest-visible and real CR4 values. */ -#define pv_guest_cr4_to_real_cr4(v) \ - (((v)->arch.pv_vcpu.ctrlreg[4] \ - | (mmu_cr4_features \ - & (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_SMEP | \ - X86_CR4_SMAP | X86_CR4_OSXSAVE | \ - X86_CR4_FSGSBASE)) \ - | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0)) \ - & ~X86_CR4_DE) -#define real_cr4_to_pv_guest_cr4(c) \ - ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD | \ - X86_CR4_OSXSAVE | X86_CR4_SMEP | \ - X86_CR4_FSGSBASE | X86_CR4_SMAP)) +/* Bits which a PV guest may observe from the real hardware settings. */ +#define PV_CR4_GUEST_VISIBLE_MASK \ + (X86_CR4_PAE | X86_CR4_MCE | X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT) + +/* Given a new cr4 value, construct the resulting guest-visible cr4 value. */ +unsigned long pv_fixup_guest_cr4(const struct vcpu *v, unsigned long cr4); + +/* Create a cr4 value to load into hardware, based on vcpu settings. */ +unsigned long pv_make_cr4(const struct vcpu *v); void domain_cpuid(struct domain *d, unsigned int input,
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor