File xsa299-0003-x86-mm-Separate-out-partial_pte-tristate-into-indivi.patch of Package xen.13144
From 7c0a37005f52d10903ce22851b52ae9b6f4f0ee2 Mon Sep 17 00:00:00 2001
From: George Dunlap <george.dunlap@citrix.com>
Date: Thu, 10 Oct 2019 17:57:49 +0100
Subject: [PATCH 03/11] x86/mm: Separate out partial_pte tristate into
individual flags
At the moment, partial_pte is a tri-state that contains two distinct bits
of information:
1. If zero, the pte at index [nr_validated_ptes] is un-validated. If
non-zero, the pte was last seen with PGT_partial set.
2. If positive, the pte at index [nr_validated_ptes] does not hold a
general reference count. If negative, it does.
To make future patches more clear, separate out this functionality
into two distinct, named bits: PTF_partial_set (for #1) and
PTF_partial_general_ref (for #2).
Additionally, a number of functions which need this information also
take other flags to control behavior (such as `preemptible` and
`defer`). These are hard to read in the caller (since you only see
'true' or 'false'), and ugly when many are added together. In
preparation for adding yet another flag in a future patch, collapse
all of these into a single `flag` variable.
NB that this does mean checking for what was previously the '-1'
condition a bit more ugly in the put_page_from_lNe functions (since
you have to check for both partial_set and general ref); but this
clause will go away in a future patch.
Also note that the original comment had an off-by-one error:
partial_flags (like partial_pte before it) concerns
plNe[nr_validated_ptes], not plNe[nr_validated_ptes+1].
No functional change intended.
This is part of XSA-299.
Signed-off-by: George Dunlap <george.dunlap@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
---
xen/arch/x86/mm.c | 164 +++++++++++++++++++++++----------------
xen/include/asm-x86/mm.h | 41 ++++++----
2 files changed, 127 insertions(+), 78 deletions(-)
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 8ced185b49..1c4f54e328 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -610,20 +610,34 @@ static int alloc_segdesc_page(struct page_info *page)
static int _get_page_type(struct page_info *page, unsigned long type,
bool preemptible);
+/*
+ * The following flags are used to specify behavior of various get and
+ * put commands. The first two are also stored in page->partial_flags
+ * to indicate the state of the page pointed to by
+ * page->pte[page->nr_validated_entries]. See the comment in mm.h for
+ * more information.
+ */
+#define PTF_partial_set (1 << 0)
+#define PTF_partial_general_ref (1 << 1)
+#define PTF_preemptible (1 << 2)
+#define PTF_defer (1 << 3)
+
static int get_page_and_type_from_mfn(
mfn_t mfn, unsigned long type, struct domain *d,
- int partial, int preemptible)
+ unsigned int flags)
{
struct page_info *page = mfn_to_page(mfn);
int rc;
+ bool preemptible = flags & PTF_preemptible,
+ partial_ref = flags & PTF_partial_general_ref;
- if ( likely(partial >= 0) &&
+ if ( likely(!partial_ref) &&
unlikely(!get_page_from_mfn(mfn, d)) )
return -EINVAL;
rc = _get_page_type(page, type, preemptible);
- if ( unlikely(rc) && partial >= 0 &&
+ if ( unlikely(rc) && !partial_ref &&
(!preemptible || page != current->arch.old_guest_table) )
put_page(page);
@@ -1104,7 +1118,7 @@ get_page_from_l1e(
define_get_linear_pagetable(l2);
static int
get_page_from_l2e(
- l2_pgentry_t l2e, unsigned long pfn, struct domain *d, int partial)
+ l2_pgentry_t l2e, unsigned long pfn, struct domain *d, unsigned int flags)
{
unsigned long mfn = l2e_get_pfn(l2e);
int rc;
@@ -1119,8 +1133,9 @@ get_page_from_l2e(
return -EINVAL;
}
- rc = get_page_and_type_from_mfn(_mfn(mfn), PGT_l1_page_table, d,
- partial, false);
+ ASSERT(!(flags & PTF_preemptible));
+
+ rc = get_page_and_type_from_mfn(_mfn(mfn), PGT_l1_page_table, d, flags);
if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) )
rc = 0;
@@ -1137,7 +1152,7 @@ get_page_from_l2e(
define_get_linear_pagetable(l3);
static int
get_page_from_l3e(
- l3_pgentry_t l3e, unsigned long pfn, struct domain *d, int partial)
+ l3_pgentry_t l3e, unsigned long pfn, struct domain *d, unsigned int flags)
{
int rc;
@@ -1152,7 +1167,7 @@ get_page_from_l3e(
}
rc = get_page_and_type_from_mfn(
- l3e_get_mfn(l3e), PGT_l2_page_table, d, partial, 1);
+ l3e_get_mfn(l3e), PGT_l2_page_table, d, flags | PTF_preemptible);
if ( unlikely(rc == -EINVAL) &&
!is_pv_32bit_domain(d) &&
get_l3_linear_pagetable(l3e, pfn, d) )
@@ -1170,7 +1185,7 @@ get_page_from_l3e(
define_get_linear_pagetable(l4);
static int
get_page_from_l4e(
- l4_pgentry_t l4e, unsigned long pfn, struct domain *d, int partial)
+ l4_pgentry_t l4e, unsigned long pfn, struct domain *d, unsigned int flags)
{
int rc;
@@ -1185,7 +1200,7 @@ get_page_from_l4e(
}
rc = get_page_and_type_from_mfn(
- l4e_get_mfn(l4e), PGT_l3_page_table, d, partial, 1);
+ l4e_get_mfn(l4e), PGT_l3_page_table, d, flags | PTF_preemptible);
if ( unlikely(rc == -EINVAL) && get_l4_linear_pagetable(l4e, pfn, d) )
rc = 0;
@@ -1275,7 +1290,7 @@ void put_page_from_l1e(l1_pgentry_t l1e, struct domain *l1e_owner)
* Note also that this automatically deals correctly with linear p.t.'s.
*/
static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn,
- int partial, bool defer)
+ unsigned int flags)
{
int rc = 0;
@@ -1295,12 +1310,13 @@ static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn,
struct page_info *pg = l2e_get_page(l2e);
struct page_info *ptpg = mfn_to_page(_mfn(pfn));
- if ( unlikely(partial > 0) )
+ if ( (flags & (PTF_partial_set | PTF_partial_general_ref)) ==
+ PTF_partial_set )
{
- ASSERT(!defer);
+ ASSERT(!(flags & PTF_defer));
rc = _put_page_type(pg, true, ptpg);
}
- else if ( defer )
+ else if ( flags & PTF_defer )
{
current->arch.old_guest_ptpg = ptpg;
current->arch.old_guest_table = pg;
@@ -1317,7 +1333,7 @@ static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn,
}
static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
- int partial, bool defer)
+ unsigned int flags)
{
struct page_info *pg;
int rc;
@@ -1340,13 +1356,14 @@ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
pg = l3e_get_page(l3e);
- if ( unlikely(partial > 0) )
+ if ( (flags & (PTF_partial_set | PTF_partial_general_ref)) ==
+ PTF_partial_set )
{
- ASSERT(!defer);
+ ASSERT(!(flags & PTF_defer));
return _put_page_type(pg, true, mfn_to_page(_mfn(pfn)));
}
- if ( defer )
+ if ( flags & PTF_defer )
{
current->arch.old_guest_ptpg = mfn_to_page(_mfn(pfn));
current->arch.old_guest_table = pg;
@@ -1361,7 +1378,7 @@ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
}
static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn,
- int partial, bool defer)
+ unsigned int flags)
{
int rc = 1;
@@ -1370,13 +1387,14 @@ static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn,
{
struct page_info *pg = l4e_get_page(l4e);
- if ( unlikely(partial > 0) )
+ if ( (flags & (PTF_partial_set | PTF_partial_general_ref)) ==
+ PTF_partial_set )
{
- ASSERT(!defer);
+ ASSERT(!(flags & PTF_defer));
return _put_page_type(pg, true, mfn_to_page(_mfn(pfn)));
}
- if ( defer )
+ if ( flags & PTF_defer )
{
current->arch.old_guest_ptpg = mfn_to_page(_mfn(pfn));
current->arch.old_guest_table = pg;
@@ -1483,12 +1501,13 @@ static int alloc_l2_table(struct page_info *page, unsigned long type)
unsigned long pfn = mfn_x(page_to_mfn(page));
l2_pgentry_t *pl2e;
unsigned int i;
- int rc = 0, partial = page->partial_pte;
+ int rc = 0;
+ unsigned int partial_flags = page->partial_flags;
pl2e = map_domain_page(_mfn(pfn));
for ( i = page->nr_validated_ptes; i < L2_PAGETABLE_ENTRIES;
- i++, partial = 0 )
+ i++, partial_flags = 0 )
{
if ( i > page->nr_validated_ptes && hypercall_preempt_check() )
{
@@ -1498,18 +1517,19 @@ static int alloc_l2_table(struct page_info *page, unsigned long type)
}
if ( !is_guest_l2_slot(d, type, i) ||
- (rc = get_page_from_l2e(pl2e[i], pfn, d, partial)) > 0 )
+ (rc = get_page_from_l2e(pl2e[i], pfn, d, partial_flags)) > 0 )
continue;
if ( rc == -ERESTART )
{
page->nr_validated_ptes = i;
- page->partial_pte = partial ?: 1;
+ /* Set 'set', retain 'general ref' */
+ page->partial_flags = partial_flags | PTF_partial_set;
}
else if ( rc == -EINTR && i )
{
page->nr_validated_ptes = i;
- page->partial_pte = 0;
+ page->partial_flags = 0;
rc = -ERESTART;
}
else if ( rc < 0 && rc != -EINTR )
@@ -1518,7 +1538,7 @@ static int alloc_l2_table(struct page_info *page, unsigned long type)
if ( i )
{
page->nr_validated_ptes = i;
- page->partial_pte = 0;
+ page->partial_flags = 0;
current->arch.old_guest_ptpg = NULL;
current->arch.old_guest_table = page;
}
@@ -1542,7 +1562,8 @@ static int alloc_l3_table(struct page_info *page)
unsigned long pfn = mfn_x(page_to_mfn(page));
l3_pgentry_t *pl3e;
unsigned int i;
- int rc = 0, partial = page->partial_pte;
+ int rc = 0;
+ unsigned int partial_flags = page->partial_flags;
pl3e = map_domain_page(_mfn(pfn));
@@ -1557,7 +1578,7 @@ static int alloc_l3_table(struct page_info *page)
memset(pl3e + 4, 0, (L3_PAGETABLE_ENTRIES - 4) * sizeof(*pl3e));
for ( i = page->nr_validated_ptes; i < L3_PAGETABLE_ENTRIES;
- i++, partial = 0 )
+ i++, partial_flags = 0 )
{
if ( i > page->nr_validated_ptes && hypercall_preempt_check() )
{
@@ -1574,20 +1595,22 @@ static int alloc_l3_table(struct page_info *page)
else
rc = get_page_and_type_from_mfn(
l3e_get_mfn(pl3e[i]),
- PGT_l2_page_table | PGT_pae_xen_l2, d, partial, 1);
+ PGT_l2_page_table | PGT_pae_xen_l2, d,
+ partial_flags | PTF_preemptible);
}
- else if ( (rc = get_page_from_l3e(pl3e[i], pfn, d, partial)) > 0 )
+ else if ( (rc = get_page_from_l3e(pl3e[i], pfn, d, partial_flags)) > 0 )
continue;
if ( rc == -ERESTART )
{
page->nr_validated_ptes = i;
- page->partial_pte = partial ?: 1;
+ /* Set 'set', leave 'general ref' set if this entry was set */
+ page->partial_flags = partial_flags | PTF_partial_set;
}
else if ( rc == -EINTR && i )
{
page->nr_validated_ptes = i;
- page->partial_pte = 0;
+ page->partial_flags = 0;
rc = -ERESTART;
}
if ( rc < 0 )
@@ -1604,7 +1627,7 @@ static int alloc_l3_table(struct page_info *page)
if ( i )
{
page->nr_validated_ptes = i;
- page->partial_pte = 0;
+ page->partial_flags = 0;
current->arch.old_guest_ptpg = NULL;
current->arch.old_guest_table = page;
}
@@ -1736,19 +1759,21 @@ static int alloc_l4_table(struct page_info *page)
unsigned long pfn = mfn_x(page_to_mfn(page));
l4_pgentry_t *pl4e = map_domain_page(_mfn(pfn));
unsigned int i;
- int rc = 0, partial = page->partial_pte;
+ int rc = 0;
+ unsigned int partial_flags = page->partial_flags;
for ( i = page->nr_validated_ptes; i < L4_PAGETABLE_ENTRIES;
- i++, partial = 0 )
+ i++, partial_flags = 0 )
{
if ( !is_guest_l4_slot(d, i) ||
- (rc = get_page_from_l4e(pl4e[i], pfn, d, partial)) > 0 )
+ (rc = get_page_from_l4e(pl4e[i], pfn, d, partial_flags)) > 0 )
continue;
if ( rc == -ERESTART )
{
page->nr_validated_ptes = i;
- page->partial_pte = partial ?: 1;
+ /* Set 'set', leave 'general ref' set if this entry was set */
+ page->partial_flags = partial_flags | PTF_partial_set;
}
else if ( rc < 0 )
{
@@ -1758,7 +1783,7 @@ static int alloc_l4_table(struct page_info *page)
if ( i )
{
page->nr_validated_ptes = i;
- page->partial_pte = 0;
+ page->partial_flags = 0;
if ( rc == -EINTR )
rc = -ERESTART;
else
@@ -1811,19 +1836,20 @@ static int free_l2_table(struct page_info *page)
struct domain *d = page_get_owner(page);
unsigned long pfn = mfn_x(page_to_mfn(page));
l2_pgentry_t *pl2e;
- int rc = 0, partial = page->partial_pte;
- unsigned int i = page->nr_validated_ptes - !partial;
+ int rc = 0;
+ unsigned int partial_flags = page->partial_flags,
+ i = page->nr_validated_ptes - !(partial_flags & PTF_partial_set);
pl2e = map_domain_page(_mfn(pfn));
for ( ; ; )
{
if ( is_guest_l2_slot(d, page->u.inuse.type_info, i) )
- rc = put_page_from_l2e(pl2e[i], pfn, partial, false);
+ rc = put_page_from_l2e(pl2e[i], pfn, partial_flags);
if ( rc < 0 )
break;
- partial = 0;
+ partial_flags = 0;
if ( !i-- )
break;
@@ -1845,12 +1871,14 @@ static int free_l2_table(struct page_info *page)
else if ( rc == -ERESTART )
{
page->nr_validated_ptes = i;
- page->partial_pte = partial ?: -1;
+ page->partial_flags = (partial_flags & PTF_partial_set) ?
+ partial_flags :
+ (PTF_partial_set | PTF_partial_general_ref);
}
else if ( rc == -EINTR && i < L2_PAGETABLE_ENTRIES - 1 )
{
page->nr_validated_ptes = i + 1;
- page->partial_pte = 0;
+ page->partial_flags = 0;
rc = -ERESTART;
}
@@ -1862,18 +1890,19 @@ static int free_l3_table(struct page_info *page)
struct domain *d = page_get_owner(page);
unsigned long pfn = mfn_x(page_to_mfn(page));
l3_pgentry_t *pl3e;
- int rc = 0, partial = page->partial_pte;
- unsigned int i = page->nr_validated_ptes - !partial;
+ int rc = 0;
+ unsigned int partial_flags = page->partial_flags,
+ i = page->nr_validated_ptes - !(partial_flags & PTF_partial_set);
pl3e = map_domain_page(_mfn(pfn));
for ( ; ; )
{
- rc = put_page_from_l3e(pl3e[i], pfn, partial, 0);
+ rc = put_page_from_l3e(pl3e[i], pfn, partial_flags);
if ( rc < 0 )
break;
- partial = 0;
+ partial_flags = 0;
if ( rc == 0 )
pl3e[i] = unadjust_guest_l3e(pl3e[i], d);
@@ -1892,12 +1921,14 @@ static int free_l3_table(struct page_info *page)
if ( rc == -ERESTART )
{
page->nr_validated_ptes = i;
- page->partial_pte = partial ?: -1;
+ page->partial_flags = (partial_flags & PTF_partial_set) ?
+ partial_flags :
+ (PTF_partial_set | PTF_partial_general_ref);
}
else if ( rc == -EINTR && i < L3_PAGETABLE_ENTRIES - 1 )
{
page->nr_validated_ptes = i + 1;
- page->partial_pte = 0;
+ page->partial_flags = 0;
rc = -ERESTART;
}
return rc > 0 ? 0 : rc;
@@ -1908,26 +1939,29 @@ static int free_l4_table(struct page_info *page)
struct domain *d = page_get_owner(page);
unsigned long pfn = mfn_x(page_to_mfn(page));
l4_pgentry_t *pl4e = map_domain_page(_mfn(pfn));
- int rc = 0, partial = page->partial_pte;
- unsigned int i = page->nr_validated_ptes - !partial;
+ int rc = 0;
+ unsigned partial_flags = page->partial_flags,
+ i = page->nr_validated_ptes - !(partial_flags & PTF_partial_set);
do {
if ( is_guest_l4_slot(d, i) )
- rc = put_page_from_l4e(pl4e[i], pfn, partial, 0);
+ rc = put_page_from_l4e(pl4e[i], pfn, partial_flags);
if ( rc < 0 )
break;
- partial = 0;
+ partial_flags = 0;
} while ( i-- );
if ( rc == -ERESTART )
{
page->nr_validated_ptes = i;
- page->partial_pte = partial ?: -1;
+ page->partial_flags = (partial_flags & PTF_partial_set) ?
+ partial_flags :
+ (PTF_partial_set | PTF_partial_general_ref);
}
else if ( rc == -EINTR && i < L4_PAGETABLE_ENTRIES - 1 )
{
page->nr_validated_ptes = i + 1;
- page->partial_pte = 0;
+ page->partial_flags = 0;
rc = -ERESTART;
}
@@ -2203,7 +2237,7 @@ static int mod_l2_entry(l2_pgentry_t *pl2e,
return -EBUSY;
}
- put_page_from_l2e(ol2e, pfn, 0, true);
+ put_page_from_l2e(ol2e, pfn, PTF_defer);
return rc;
}
@@ -2271,7 +2305,7 @@ static int mod_l3_entry(l3_pgentry_t *pl3e,
if ( !create_pae_xen_mappings(d, pl3e) )
BUG();
- put_page_from_l3e(ol3e, pfn, 0, 1);
+ put_page_from_l3e(ol3e, pfn, PTF_defer);
return rc;
}
@@ -2334,7 +2368,7 @@ static int mod_l4_entry(l4_pgentry_t *pl4e,
return -EFAULT;
}
- put_page_from_l4e(ol4e, pfn, 0, 1);
+ put_page_from_l4e(ol4e, pfn, PTF_defer);
return rc;
}
@@ -2598,7 +2632,7 @@ int free_page_type(struct page_info *page, unsigned long type,
if ( !(type & PGT_partial) )
{
page->nr_validated_ptes = 1U << PAGETABLE_ORDER;
- page->partial_pte = 0;
+ page->partial_flags = 0;
}
switch ( type & PGT_type_mask )
@@ -2889,7 +2923,7 @@ static int _get_page_type(struct page_info *page, unsigned long type,
if ( !(x & PGT_partial) )
{
page->nr_validated_ptes = 0;
- page->partial_pte = 0;
+ page->partial_flags = 0;
}
page->linear_pt_count = 0;
rc = alloc_page_type(page, type, preemptible);
@@ -3064,7 +3098,7 @@ int new_guest_cr3(mfn_t mfn)
return 0;
}
- rc = get_page_and_type_from_mfn(mfn, PGT_root_page_table, d, 0, 1);
+ rc = get_page_and_type_from_mfn(mfn, PGT_root_page_table, d, PTF_preemptible);
switch ( rc )
{
case 0:
@@ -3452,7 +3486,7 @@ long do_mmuext_op(
if ( op.arg1.mfn != 0 )
{
rc = get_page_and_type_from_mfn(
- _mfn(op.arg1.mfn), PGT_root_page_table, currd, 0, 1);
+ _mfn(op.arg1.mfn), PGT_root_page_table, currd, PTF_preemptible);
if ( unlikely(rc) )
{
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index 1ea173c555..46cba52941 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -228,19 +228,34 @@ struct page_info
* setting the flag must not drop that reference, whereas the instance
* clearing it will have to.
*
- * If @partial_pte is positive then PTE at @nr_validated_ptes+1 has
- * been partially validated. This implies that the general reference
- * to the page (acquired from get_page_from_lNe()) would be dropped
- * (again due to the apparent failure) and hence must be re-acquired
- * when resuming the validation, but must not be dropped when picking
- * up the page for invalidation.
+ * If partial_flags & PTF_partial_set is set, then the page at
+ * at @nr_validated_ptes had PGT_partial set as a result of an
+ * operation on the current page. (That page may or may not
+ * still have PGT_partial set.)
*
- * If @partial_pte is negative then PTE at @nr_validated_ptes+1 has
- * been partially invalidated. This is basically the opposite case of
- * above, i.e. the general reference to the page was not dropped in
- * put_page_from_lNe() (due to the apparent failure), and hence it
- * must be dropped when the put operation is resumed (and completes),
- * but it must not be acquired if picking up the page for validation.
+ * If PTF_partial_general_ref is set, then the PTE at
+ * @nr_validated_ptef holds a general reference count for the
+ * page.
+ *
+ * This happens:
+ * - During de-validation, if de-validation of the page was
+ * interrupted
+ * - During validation, if an invalid entry is encountered and
+ * validation is preemptible
+ * - During validation, if PTF_partial_general_ref was set on
+ * this entry to begin with (perhaps because we're picking
+ * up from a partial de-validation).
+ *
+ * When resuming validation, if PTF_partial_general_ref is clear,
+ * then a general reference must be re-acquired; if it is set, no
+ * reference should be acquired.
+ *
+ * When resuming de-validation, if PTF_partial_general_ref is
+ * clear, no reference should be dropped; if it is set, a
+ * reference should be dropped.
+ *
+ * NB that PTF_partial_set and PTF_partial_general_ref are
+ * defined in mm.c, the only place where they are used.
*
* The 3rd field, @linear_pt_count, indicates
* - by a positive value, how many same-level page table entries a page
@@ -251,7 +266,7 @@ struct page_info
struct {
u16 nr_validated_ptes:PAGETABLE_ORDER + 1;
u16 :16 - PAGETABLE_ORDER - 1 - 2;
- s16 partial_pte:2;
+ u16 partial_flags:2;
s16 linear_pt_count;
};
--
2.23.0