File xsa299-4.patch of Package xen.14030
From 39063864e70a30a2ee958953ed71760e7596f202 Mon Sep 17 00:00:00 2001
From: George Dunlap <george.dunlap@citrix.com>
Date: Thu, 10 Oct 2019 17:57:49 +0100
Subject: [PATCH 04/12] x86/mm: Separate out partial_pte tristate into
individual flags
At the moment, partial_pte is a tri-state that contains two distinct bits
of information:
1. If zero, the pte at index [nr_validated_ptes] is un-validated. If
non-zero, the pte was last seen with PGT_partial set.
2. If positive, the pte at index [nr_validated_ptes] does not hold a
general reference count. If negative, it does.
To make future patches more clear, separate out this functionality
into two distinct, named bits: PTF_partial_set (for #1) and
PTF_partial_general_ref (for #2).
Additionally, a number of functions which need this information also
take other flags to control behavior (such as `preemptible` and
`defer`). These are hard to read in the caller (since you only see
'true' or 'false'), and ugly when many are added together. In
preparation for adding yet another flag in a future patch, collapse
all of these into a single `flag` variable.
NB that this does mean checking for what was previously the '-1'
condition a bit more ugly in the put_page_from_lNe functions (since
you have to check for both partial_set and general ref); but this
clause will go away in a future patch.
Also note that the original comment had an off-by-one error:
partial_flags (like partial_pte before it) concerns
plNe[nr_validated_ptes], not plNe[nr_validated_ptes+1].
No functional change intended.
This is part of XSA-299.
Reported-by: George Dunlap <george.dunlap@citrix.com>
Signed-off-by: George Dunlap <george.dunlap@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
---
xen/arch/x86/mm.c | 166 +++++++++++++++++++++++----------------
xen/include/asm-x86/mm.h | 41 +++++++---
2 files changed, 128 insertions(+), 79 deletions(-)
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -664,22 +664,35 @@ static int get_page_from_pagenr(unsigned
static int __get_page_type(struct page_info *page, unsigned long type,
int preemptible);
+/*
+ * The following flags are used to specify behavior of various get and
+ * put commands. The first two are also stored in page->partial_flags
+ * to indicate the state of the page pointed to by
+ * page->pte[page->nr_validated_entries]. See the comment in mm.h for
+ * more information.
+ */
+#define PTF_partial_set (1 << 0)
+#define PTF_partial_general_ref (1 << 1)
+#define PTF_preemptible (1 << 2)
+#define PTF_defer (1 << 3)
+
static int get_page_and_type_from_pagenr(unsigned long page_nr,
unsigned long type,
struct domain *d,
- int partial,
- int preemptible)
+ unsigned int flags)
{
struct page_info *page = mfn_to_page(page_nr);
int rc;
+ bool_t preemptible = !!(flags & PTF_preemptible),
+ partial_ref = !!(flags & PTF_partial_general_ref);
- if ( likely(partial >= 0) &&
+ if ( likely(!partial_ref) &&
unlikely(!get_page_from_pagenr(page_nr, d)) )
return -EINVAL;
rc = __get_page_type(page, type, preemptible);
- if ( unlikely(rc) && partial >= 0 &&
+ if ( unlikely(rc) && !partial_ref &&
(!preemptible || page != current->arch.old_guest_table) )
put_page(page);
@@ -1061,7 +1074,7 @@ get_page_from_l1e(
define_get_linear_pagetable(l2);
static int
get_page_from_l2e(
- l2_pgentry_t l2e, unsigned long pfn, struct domain *d, int partial)
+ l2_pgentry_t l2e, unsigned long pfn, struct domain *d, unsigned int flags)
{
unsigned long mfn = l2e_get_pfn(l2e);
int rc;
@@ -1077,8 +1090,9 @@ get_page_from_l2e(
if ( !(l2e_get_flags(l2e) & _PAGE_PSE) )
{
- rc = get_page_and_type_from_pagenr(mfn, PGT_l1_page_table, d,
- partial, 0);
+ ASSERT(!(flags & PTF_preemptible));
+
+ rc = get_page_and_type_from_pagenr(mfn, PGT_l1_page_table, d, flags);
if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) )
rc = 0;
return rc;
@@ -1104,7 +1118,7 @@ get_page_from_l2e(
define_get_linear_pagetable(l3);
static int
get_page_from_l3e(
- l3_pgentry_t l3e, unsigned long pfn, struct domain *d, int partial)
+ l3_pgentry_t l3e, unsigned long pfn, struct domain *d, unsigned int flags)
{
int rc;
@@ -1118,7 +1132,7 @@ get_page_from_l3e(
}
rc = get_page_and_type_from_pagenr(
- l3e_get_pfn(l3e), PGT_l2_page_table, d, partial, 1);
+ l3e_get_pfn(l3e), PGT_l2_page_table, d, flags | PTF_preemptible);
if ( unlikely(rc == -EINVAL) &&
!is_pv_32bit_domain(d) &&
get_l3_linear_pagetable(l3e, pfn, d) )
@@ -1130,7 +1144,7 @@ get_page_from_l3e(
define_get_linear_pagetable(l4);
static int
get_page_from_l4e(
- l4_pgentry_t l4e, unsigned long pfn, struct domain *d, int partial)
+ l4_pgentry_t l4e, unsigned long pfn, struct domain *d, unsigned int flags)
{
int rc;
@@ -1144,7 +1158,7 @@ get_page_from_l4e(
}
rc = get_page_and_type_from_pagenr(
- l4e_get_pfn(l4e), PGT_l3_page_table, d, partial, 1);
+ l4e_get_pfn(l4e), PGT_l3_page_table, d, flags | PTF_preemptible);
if ( unlikely(rc == -EINVAL) && get_l4_linear_pagetable(l4e, pfn, d) )
rc = 0;
@@ -1274,7 +1288,7 @@ void put_page_from_l1e(l1_pgentry_t l1e,
* Note also that this automatically deals correctly with linear p.t.'s.
*/
static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn,
- int partial, bool_t defer)
+ unsigned int flags)
{
int rc = 0;
@@ -1288,12 +1302,13 @@ static int put_page_from_l2e(l2_pgentry_
struct page_info *pg = l2e_get_page(l2e);
struct page_info *ptpg = mfn_to_page(pfn);
- if ( unlikely(partial > 0) )
+ if ( (flags & (PTF_partial_set | PTF_partial_general_ref)) ==
+ PTF_partial_set )
{
- ASSERT(!defer);
+ ASSERT(!(flags & PTF_defer));
rc = _put_page_type(pg, 1, ptpg);
}
- else if ( defer )
+ else if ( flags & PTF_defer )
{
current->arch.old_guest_ptpg = ptpg;
current->arch.old_guest_table = pg;
@@ -1310,7 +1325,7 @@ static int put_page_from_l2e(l2_pgentry_
}
static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
- int partial, bool_t defer)
+ unsigned int flags)
{
struct page_info *pg;
int rc;
@@ -1333,13 +1348,14 @@ static int put_page_from_l3e(l3_pgentry_
pg = l3e_get_page(l3e);
- if ( unlikely(partial > 0) )
+ if ( (flags & (PTF_partial_set | PTF_partial_general_ref)) ==
+ PTF_partial_set )
{
- ASSERT(!defer);
+ ASSERT(!(flags & PTF_defer));
return _put_page_type(pg, 1, mfn_to_page(pfn));
}
- if ( defer )
+ if ( flags & PTF_defer )
{
current->arch.old_guest_ptpg = mfn_to_page(pfn);
current->arch.old_guest_table = pg;
@@ -1354,7 +1370,7 @@ static int put_page_from_l3e(l3_pgentry_
}
static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn,
- int partial, bool_t defer)
+ unsigned int flags)
{
int rc = 1;
@@ -1363,13 +1379,14 @@ static int put_page_from_l4e(l4_pgentry_
{
struct page_info *pg = l4e_get_page(l4e);
- if ( unlikely(partial > 0) )
+ if ( (flags & (PTF_partial_set | PTF_partial_general_ref)) ==
+ PTF_partial_set )
{
- ASSERT(!defer);
+ ASSERT(!(flags & PTF_defer));
return _put_page_type(pg, 1, mfn_to_page(pfn));
}
- if ( defer )
+ if ( flags & PTF_defer )
{
current->arch.old_guest_ptpg = mfn_to_page(pfn);
current->arch.old_guest_table = pg;
@@ -1481,12 +1498,13 @@ static int alloc_l2_table(struct page_in
unsigned long pfn = page_to_mfn(page);
l2_pgentry_t *pl2e;
unsigned int i;
- int rc = 0, partial = page->partial_pte;
+ int rc = 0;
+ unsigned int partial_flags = page->partial_flags;
pl2e = map_domain_page(pfn);
for ( i = page->nr_validated_ptes; i < L2_PAGETABLE_ENTRIES;
- i++, partial = 0 )
+ i++, partial_flags = 0 )
{
if ( i > page->nr_validated_ptes && hypercall_preempt_check() )
{
@@ -1496,18 +1514,19 @@ static int alloc_l2_table(struct page_in
}
if ( !is_guest_l2_slot(d, type, i) ||
- (rc = get_page_from_l2e(pl2e[i], pfn, d, partial)) > 0 )
+ (rc = get_page_from_l2e(pl2e[i], pfn, d, partial_flags)) > 0 )
continue;
if ( rc == -ERESTART )
{
page->nr_validated_ptes = i;
- page->partial_pte = partial ?: 1;
+ /* Set 'set', retain 'general ref' */
+ page->partial_flags = partial_flags | PTF_partial_set;
}
else if ( rc == -EINTR && i )
{
page->nr_validated_ptes = i;
- page->partial_pte = 0;
+ page->partial_flags = 0;
rc = -ERESTART;
}
else if ( rc < 0 && rc != -EINTR )
@@ -1516,7 +1535,7 @@ static int alloc_l2_table(struct page_in
if ( i )
{
page->nr_validated_ptes = i;
- page->partial_pte = 0;
+ page->partial_flags = 0;
current->arch.old_guest_ptpg = NULL;
current->arch.old_guest_table = page;
}
@@ -1546,7 +1565,8 @@ static int alloc_l3_table(struct page_in
unsigned long pfn = page_to_mfn(page);
l3_pgentry_t *pl3e;
unsigned int i;
- int rc = 0, partial = page->partial_pte;
+ int rc = 0;
+ unsigned int partial_flags = page->partial_flags;
pl3e = map_domain_page(pfn);
@@ -1561,7 +1581,7 @@ static int alloc_l3_table(struct page_in
memset(pl3e + 4, 0, (L3_PAGETABLE_ENTRIES - 4) * sizeof(*pl3e));
for ( i = page->nr_validated_ptes; i < L3_PAGETABLE_ENTRIES;
- i++, partial = 0 )
+ i++, partial_flags = 0 )
{
if ( i > page->nr_validated_ptes && hypercall_preempt_check() )
{
@@ -1579,21 +1599,23 @@ static int alloc_l3_table(struct page_in
rc = get_page_and_type_from_pagenr(l3e_get_pfn(pl3e[i]),
PGT_l2_page_table |
PGT_pae_xen_l2,
- d, partial, 1);
+ d,
+ partial_flags | PTF_preemptible);
}
else if ( !is_guest_l3_slot(i) ||
- (rc = get_page_from_l3e(pl3e[i], pfn, d, partial)) > 0 )
+ (rc = get_page_from_l3e(pl3e[i], pfn, d, partial_flags)) > 0 )
continue;
if ( rc == -ERESTART )
{
page->nr_validated_ptes = i;
- page->partial_pte = partial ?: 1;
+ /* Set 'set', leave 'general ref' set if this entry was set */
+ page->partial_flags = partial_flags | PTF_partial_set;
}
else if ( rc == -EINTR && i )
{
page->nr_validated_ptes = i;
- page->partial_pte = 0;
+ page->partial_flags = 0;
rc = -ERESTART;
}
if ( rc < 0 )
@@ -1610,7 +1632,7 @@ static int alloc_l3_table(struct page_in
if ( i )
{
page->nr_validated_ptes = i;
- page->partial_pte = 0;
+ page->partial_flags = 0;
current->arch.old_guest_ptpg = NULL;
current->arch.old_guest_table = page;
}
@@ -1676,19 +1698,21 @@ static int alloc_l4_table(struct page_in
unsigned long pfn = page_to_mfn(page);
l4_pgentry_t *pl4e = map_domain_page(pfn);
unsigned int i;
- int rc = 0, partial = page->partial_pte;
+ int rc = 0;
+ unsigned int partial_flags = page->partial_flags;
for ( i = page->nr_validated_ptes; i < L4_PAGETABLE_ENTRIES;
- i++, partial = 0 )
+ i++, partial_flags = 0 )
{
if ( !is_guest_l4_slot(d, i) ||
- (rc = get_page_from_l4e(pl4e[i], pfn, d, partial)) > 0 )
+ (rc = get_page_from_l4e(pl4e[i], pfn, d, partial_flags)) > 0 )
continue;
if ( rc == -ERESTART )
{
page->nr_validated_ptes = i;
- page->partial_pte = partial ?: 1;
+ /* Set 'set', leave 'general ref' set if this entry was set */
+ page->partial_flags = partial_flags | PTF_partial_set;
}
else if ( rc < 0 )
{
@@ -1697,7 +1721,7 @@ static int alloc_l4_table(struct page_in
if ( i )
{
page->nr_validated_ptes = i;
- page->partial_pte = 0;
+ page->partial_flags = 0;
if ( rc == -EINTR )
rc = -ERESTART;
else
@@ -1751,19 +1775,20 @@ static int free_l2_table(struct page_inf
struct domain *d = page_get_owner(page);
unsigned long pfn = page_to_mfn(page);
l2_pgentry_t *pl2e;
- int rc = 0, partial = page->partial_pte;
- unsigned int i = page->nr_validated_ptes - !partial;
+ int rc = 0;
+ unsigned int partial_flags = page->partial_flags,
+ i = page->nr_validated_ptes - !(partial_flags & PTF_partial_set);
pl2e = map_domain_page(pfn);
for ( ; ; )
{
if ( is_guest_l2_slot(d, page->u.inuse.type_info, i) )
- rc = put_page_from_l2e(pl2e[i], pfn, partial, 0);
+ rc = put_page_from_l2e(pl2e[i], pfn, partial_flags);
if ( rc < 0 )
break;
- partial = 0;
+ partial_flags = 0;
if ( !i-- )
break;
@@ -1785,12 +1810,14 @@ static int free_l2_table(struct page_inf
else if ( rc == -ERESTART )
{
page->nr_validated_ptes = i;
- page->partial_pte = partial ?: -1;
+ page->partial_flags = (partial_flags & PTF_partial_set) ?
+ partial_flags :
+ (PTF_partial_set | PTF_partial_general_ref);
}
else if ( rc == -EINTR && i < L2_PAGETABLE_ENTRIES - 1 )
{
page->nr_validated_ptes = i + 1;
- page->partial_pte = 0;
+ page->partial_flags = 0;
rc = -ERESTART;
}
@@ -1802,8 +1829,9 @@ static int free_l3_table(struct page_inf
struct domain *d = page_get_owner(page);
unsigned long pfn = page_to_mfn(page);
l3_pgentry_t *pl3e;
- int rc = 0, partial = page->partial_pte;
- unsigned int i = page->nr_validated_ptes - !partial;
+ int rc = 0;
+ unsigned int partial_flags = page->partial_flags,
+ i = page->nr_validated_ptes - !(partial_flags & PTF_partial_set);
pl3e = map_domain_page(pfn);
@@ -1811,11 +1839,11 @@ static int free_l3_table(struct page_inf
{
if ( is_guest_l3_slot(i) )
{
- rc = put_page_from_l3e(pl3e[i], pfn, partial, 0);
+ rc = put_page_from_l3e(pl3e[i], pfn, partial_flags);
if ( rc < 0 )
break;
- partial = 0;
+ partial_flags = 0;
if ( rc == 0 )
unadjust_guest_l3e(pl3e[i], d);
}
@@ -1835,12 +1863,14 @@ static int free_l3_table(struct page_inf
if ( rc == -ERESTART )
{
page->nr_validated_ptes = i;
- page->partial_pte = partial ?: -1;
+ page->partial_flags = (partial_flags & PTF_partial_set) ?
+ partial_flags :
+ (PTF_partial_set | PTF_partial_general_ref);
}
else if ( rc == -EINTR && i < L3_PAGETABLE_ENTRIES - 1 )
{
page->nr_validated_ptes = i + 1;
- page->partial_pte = 0;
+ page->partial_flags = 0;
rc = -ERESTART;
}
return rc > 0 ? 0 : rc;
@@ -1851,26 +1881,29 @@ static int free_l4_table(struct page_inf
struct domain *d = page_get_owner(page);
unsigned long pfn = page_to_mfn(page);
l4_pgentry_t *pl4e = map_domain_page(pfn);
- int rc = 0, partial = page->partial_pte;
- unsigned int i = page->nr_validated_ptes - !partial;
+ int rc = 0;
+ unsigned partial_flags = page->partial_flags,
+ i = page->nr_validated_ptes - !(partial_flags & PTF_partial_set);
do {
if ( is_guest_l4_slot(d, i) )
- rc = put_page_from_l4e(pl4e[i], pfn, partial, 0);
+ rc = put_page_from_l4e(pl4e[i], pfn, partial_flags);
if ( rc < 0 )
break;
- partial = 0;
+ partial_flags = 0;
} while ( i-- );
if ( rc == -ERESTART )
{
page->nr_validated_ptes = i;
- page->partial_pte = partial ?: -1;
+ page->partial_flags = (partial_flags & PTF_partial_set) ?
+ partial_flags :
+ (PTF_partial_set | PTF_partial_general_ref);
}
else if ( rc == -EINTR && i < L4_PAGETABLE_ENTRIES - 1 )
{
page->nr_validated_ptes = i + 1;
- page->partial_pte = 0;
+ page->partial_flags = 0;
rc = -ERESTART;
}
@@ -2129,7 +2162,7 @@ static int mod_l2_entry(l2_pgentry_t *pl
return -EBUSY;
}
- put_page_from_l2e(ol2e, pfn, 0, 1);
+ put_page_from_l2e(ol2e, pfn, PTF_defer);
return rc;
}
@@ -2203,7 +2236,7 @@ static int mod_l3_entry(l3_pgentry_t *pl
if ( !create_pae_xen_mappings(d, pl3e) )
BUG();
- put_page_from_l3e(ol3e, pfn, 0, 1);
+ put_page_from_l3e(ol3e, pfn, PTF_defer);
return rc;
}
@@ -2265,7 +2298,7 @@ static int mod_l4_entry(l4_pgentry_t *pl
return -EFAULT;
}
- put_page_from_l4e(ol4e, pfn, 0, 1);
+ put_page_from_l4e(ol4e, pfn, PTF_defer);
return rc;
}
@@ -2524,7 +2557,7 @@ int free_page_type(struct page_info *pag
if ( !(type & PGT_partial) )
{
page->nr_validated_ptes = 1U << PAGETABLE_ORDER;
- page->partial_pte = 0;
+ page->partial_flags = 0;
}
switch ( type & PGT_type_mask )
@@ -2804,7 +2837,7 @@ static int __get_page_type(struct page_i
if ( !(x & PGT_partial) )
{
page->nr_validated_ptes = 0;
- page->partial_pte = 0;
+ page->partial_flags = 0;
}
page->linear_pt_count = 0;
rc = alloc_page_type(page, type, preemptible);
@@ -3169,7 +3202,8 @@ int new_guest_cr3(unsigned long mfn)
rc = paging_mode_refcounts(d)
? (get_page_from_pagenr(mfn, d) ? 0 : -EINVAL)
- : get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0, 1);
+ : get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d,
+ PTF_preemptible);
switch ( rc )
{
case 0:
@@ -3535,7 +3569,7 @@ long do_mmuext_op(
else
{
rc = get_page_and_type_from_pagenr(
- op.arg1.mfn, PGT_root_page_table, d, 0, 1);
+ op.arg1.mfn, PGT_root_page_table, d, PTF_preemptible);
okay = !rc;
}
if ( unlikely(!okay) )
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -135,19 +135,34 @@ struct page_info
* setting the flag must not drop that reference, whereas the instance
* clearing it will have to.
*
- * If @partial_pte is positive then PTE at @nr_validated_ptes+1 has
- * been partially validated. This implies that the general reference
- * to the page (acquired from get_page_from_lNe()) would be dropped
- * (again due to the apparent failure) and hence must be re-acquired
- * when resuming the validation, but must not be dropped when picking
- * up the page for invalidation.
- *
- * If @partial_pte is negative then PTE at @nr_validated_ptes+1 has
- * been partially invalidated. This is basically the opposite case of
- * above, i.e. the general reference to the page was not dropped in
- * put_page_from_lNe() (due to the apparent failure), and hence it
- * must be dropped when the put operation is resumed (and completes),
- * but it must not be acquired if picking up the page for validation.
+ * If partial_flags & PTF_partial_set is set, then the page at
+ * at @nr_validated_ptes had PGT_partial set as a result of an
+ * operation on the current page. (That page may or may not
+ * still have PGT_partial set.)
+ *
+ * If PTF_partial_general_ref is set, then the PTE at
+ * @nr_validated_ptef holds a general reference count for the
+ * page.
+ *
+ * This happens:
+ * - During de-validation, if de-validation of the page was
+ * interrupted
+ * - During validation, if an invalid entry is encountered and
+ * validation is preemptible
+ * - During validation, if PTF_partial_general_ref was set on
+ * this entry to begin with (perhaps because we're picking
+ * up from a partial de-validation).
+ *
+ * When resuming validation, if PTF_partial_general_ref is clear,
+ * then a general reference must be re-acquired; if it is set, no
+ * reference should be acquired.
+ *
+ * When resuming de-validation, if PTF_partial_general_ref is
+ * clear, no reference should be dropped; if it is set, a
+ * reference should be dropped.
+ *
+ * NB that PTF_partial_set and PTF_partial_general_ref are
+ * defined in mm.c, the only place where they are used.
*
* The 3rd field, @linear_pt_count, indicates
* - by a positive value, how many same-level page table entries a page
@@ -158,7 +173,7 @@ struct page_info
struct {
u16 nr_validated_ptes:PAGETABLE_ORDER + 1;
u16 :16 - PAGETABLE_ORDER - 1 - 2;
- s16 partial_pte:2;
+ u16 partial_flags:2;
s16 linear_pt_count;
};