File libxc.sr.superpage.patch of Package xen.26345
tools/libxc: use superpages during restore of HVM guest
bsc#1035231 - migration of HVM domU does not use superpages on destination dom0
bsc#1055695 - XEN: 11SP4 and 12SP3 HVM guests can not be restored
During creating of a HVM domU meminit_hvm() tries to map superpages.
After save/restore or migration this mapping is lost, everything is
allocated in single pages. This causes a performance degradition after
migration.
Add neccessary code to preallocate a superpage for the chunk of pfns
that is received. In case a pfn was not populated on the sending side it
must be freed on the receiving side to avoid over-allocation.
The existing code for x86_pv is moved unmodified into its own file.
Index: xen-4.10.0-testing/tools/libxc/xc_dom_x86.c
===================================================================
--- xen-4.10.0-testing.orig/tools/libxc/xc_dom_x86.c
+++ xen-4.10.0-testing/tools/libxc/xc_dom_x86.c
@@ -43,11 +43,6 @@
#define SUPERPAGE_BATCH_SIZE 512
-#define SUPERPAGE_2MB_SHIFT 9
-#define SUPERPAGE_2MB_NR_PFNS (1UL << SUPERPAGE_2MB_SHIFT)
-#define SUPERPAGE_1GB_SHIFT 18
-#define SUPERPAGE_1GB_NR_PFNS (1UL << SUPERPAGE_1GB_SHIFT)
-
#define X86_CR0_PE 0x01
#define X86_CR0_ET 0x10
Index: xen-4.10.0-testing/tools/libxc/xc_private.h
===================================================================
--- xen-4.10.0-testing.orig/tools/libxc/xc_private.h
+++ xen-4.10.0-testing/tools/libxc/xc_private.h
@@ -69,6 +69,11 @@ struct iovec {
#define DECLARE_FLASK_OP struct xen_flask_op op
#define DECLARE_PLATFORM_OP struct xen_platform_op platform_op
+#define SUPERPAGE_2MB_SHIFT 9
+#define SUPERPAGE_2MB_NR_PFNS (1UL << SUPERPAGE_2MB_SHIFT)
+#define SUPERPAGE_1GB_SHIFT 18
+#define SUPERPAGE_1GB_NR_PFNS (1UL << SUPERPAGE_1GB_SHIFT)
+
#undef PAGE_SHIFT
#undef PAGE_SIZE
#undef PAGE_MASK
Index: xen-4.10.0-testing/tools/libxc/xc_sr_common.c
===================================================================
--- xen-4.10.0-testing.orig/tools/libxc/xc_sr_common.c
+++ xen-4.10.0-testing/tools/libxc/xc_sr_common.c
@@ -156,6 +156,47 @@ static void __attribute__((unused)) buil
}
/*
+ * Expand the tracking structures as needed.
+ * To avoid realloc()ing too excessively, the size increased to the nearest power
+ * of two large enough to contain the required number of bits.
+ */
+bool _xc_sr_bitmap_resize(struct xc_sr_bitmap *bm, unsigned long bits)
+{
+ if (bits > bm->bits)
+ {
+ size_t new_max;
+ size_t old_sz, new_sz;
+ void *p;
+
+ /* Round up to the nearest power of two larger than bit, less 1. */
+ new_max = bits;
+ new_max |= new_max >> 1;
+ new_max |= new_max >> 2;
+ new_max |= new_max >> 4;
+ new_max |= new_max >> 8;
+ new_max |= new_max >> 16;
+#ifdef __x86_64__
+ new_max |= new_max >> 32;
+#endif
+
+ old_sz = bitmap_size(bm->bits + 1);
+ new_sz = bitmap_size(new_max + 1);
+ p = realloc(bm->p, new_sz);
+ if (!p)
+ return false;
+
+ if (bm->p)
+ memset(p + old_sz, 0, new_sz - old_sz);
+ else
+ memset(p, 0, new_sz);
+
+ bm->p = p;
+ bm->bits = new_max;
+ }
+ return true;
+}
+
+/*
* Local variables:
* mode: C
* c-file-style: "BSD"
Index: xen-4.10.0-testing/tools/libxc/xc_sr_common.h
===================================================================
--- xen-4.10.0-testing.orig/tools/libxc/xc_sr_common.h
+++ xen-4.10.0-testing/tools/libxc/xc_sr_common.h
@@ -140,6 +140,16 @@ struct xc_sr_restore_ops
int (*setup)(struct xc_sr_context *ctx);
/**
+ * Populate PFNs
+ *
+ * Given a set of pfns, obtain memory from Xen to fill the physmap for the
+ * unpopulated subset.
+ */
+ int (*populate_pfns)(struct xc_sr_context *ctx, unsigned count,
+ const xen_pfn_t *original_pfns, const uint32_t *types);
+
+
+ /**
* Process an individual record from the stream. The caller shall take
* care of processing common records (e.g. END, PAGE_DATA).
*
@@ -172,6 +182,12 @@ struct xc_sr_x86_pv_restore_vcpu
size_t basicsz, extdsz, xsavesz, msrsz;
};
+struct xc_sr_bitmap
+{
+ void *p;
+ unsigned long bits;
+};
+
struct xc_sr_context
{
xc_interface *xch;
@@ -216,6 +232,8 @@ struct xc_sr_context
int send_back_fd;
unsigned long p2m_size;
+ unsigned long max_pages;
+ unsigned long tot_pages;
xc_hypercall_buffer_t dirty_bitmap_hbuf;
/* From Image Header. */
@@ -253,8 +271,7 @@ struct xc_sr_context
uint32_t xenstore_domid, console_domid;
/* Bitmap of currently populated PFNs during restore. */
- unsigned long *populated_pfns;
- xen_pfn_t max_populated_pfn;
+ struct xc_sr_bitmap populated_pfns;
/* Sender has invoked verify mode on the stream. */
bool verify;
@@ -329,6 +346,12 @@ struct xc_sr_context
/* HVM context blob. */
void *context;
size_t contextsz;
+
+ /* Bitmap of currently allocated PFNs during restore. */
+ struct xc_sr_bitmap attempted_1g;
+ struct xc_sr_bitmap attempted_2m;
+ struct xc_sr_bitmap allocated_pfns;
+ xen_pfn_t idx1G_prev, idx2M_prev;
} restore;
};
} x86_hvm;
@@ -341,6 +364,69 @@ extern struct xc_sr_save_ops save_ops_x8
extern struct xc_sr_restore_ops restore_ops_x86_pv;
extern struct xc_sr_restore_ops restore_ops_x86_hvm;
+extern bool _xc_sr_bitmap_resize(struct xc_sr_bitmap *bm, unsigned long bits);
+
+static inline bool xc_sr_bitmap_resize(struct xc_sr_bitmap *bm, unsigned long bits)
+{
+ if (bits > bm->bits)
+ return _xc_sr_bitmap_resize(bm, bits);
+ return true;
+}
+
+static inline void xc_sr_bitmap_free(struct xc_sr_bitmap *bm)
+{
+ free(bm->p);
+ bm->p = NULL;
+}
+
+static inline bool xc_sr_set_bit(unsigned long bit, struct xc_sr_bitmap *bm)
+{
+ if (!xc_sr_bitmap_resize(bm, bit))
+ return false;
+
+ set_bit(bit, bm->p);
+ return true;
+}
+
+static inline bool xc_sr_test_bit(unsigned long bit, struct xc_sr_bitmap *bm)
+{
+ if (bit > bm->bits)
+ return false;
+ return !!test_bit(bit, bm->p);
+}
+
+static inline bool xc_sr_test_and_clear_bit(unsigned long bit, struct xc_sr_bitmap *bm)
+{
+ if (bit > bm->bits)
+ return false;
+ return !!test_and_clear_bit(bit, bm->p);
+}
+
+static inline bool xc_sr_test_and_set_bit(unsigned long bit, struct xc_sr_bitmap *bm)
+{
+ if (bit > bm->bits)
+ return false;
+ return !!test_and_set_bit(bit, bm->p);
+}
+
+static inline bool pfn_is_populated(struct xc_sr_context *ctx, xen_pfn_t pfn)
+{
+ return xc_sr_test_bit(pfn, &ctx->restore.populated_pfns);
+}
+
+static inline int pfn_set_populated(struct xc_sr_context *ctx, xen_pfn_t pfn)
+{
+ xc_interface *xch = ctx->xch;
+
+ if ( !xc_sr_set_bit(pfn, &ctx->restore.populated_pfns) )
+ {
+ ERROR("Failed to realloc populated_pfns bitmap");
+ errno = ENOMEM;
+ return -1;
+ }
+ return 0;
+}
+
struct xc_sr_record
{
uint32_t type;
@@ -389,14 +475,6 @@ static inline int write_record(struct xc
*/
int read_record(struct xc_sr_context *ctx, int fd, struct xc_sr_record *rec);
-/*
- * This would ideally be private in restore.c, but is needed by
- * x86_pv_localise_page() if we receive pagetables frames ahead of the
- * contents of the frames they point at.
- */
-int populate_pfns(struct xc_sr_context *ctx, unsigned count,
- const xen_pfn_t *original_pfns, const uint32_t *types);
-
#endif
/*
* Local variables:
Index: xen-4.10.0-testing/tools/libxc/xc_sr_restore.c
===================================================================
--- xen-4.10.0-testing.orig/tools/libxc/xc_sr_restore.c
+++ xen-4.10.0-testing/tools/libxc/xc_sr_restore.c
@@ -69,132 +69,6 @@ static int read_headers(struct xc_sr_con
}
/*
- * Is a pfn populated?
- */
-static bool pfn_is_populated(const struct xc_sr_context *ctx, xen_pfn_t pfn)
-{
- if ( pfn > ctx->restore.max_populated_pfn )
- return false;
- return test_bit(pfn, ctx->restore.populated_pfns);
-}
-
-/*
- * Set a pfn as populated, expanding the tracking structures if needed. To
- * avoid realloc()ing too excessively, the size increased to the nearest power
- * of two large enough to contain the required pfn.
- */
-static int pfn_set_populated(struct xc_sr_context *ctx, xen_pfn_t pfn)
-{
- xc_interface *xch = ctx->xch;
-
- if ( pfn > ctx->restore.max_populated_pfn )
- {
- xen_pfn_t new_max;
- size_t old_sz, new_sz;
- unsigned long *p;
-
- /* Round up to the nearest power of two larger than pfn, less 1. */
- new_max = pfn;
- new_max |= new_max >> 1;
- new_max |= new_max >> 2;
- new_max |= new_max >> 4;
- new_max |= new_max >> 8;
- new_max |= new_max >> 16;
-#ifdef __x86_64__
- new_max |= new_max >> 32;
-#endif
-
- old_sz = bitmap_size(ctx->restore.max_populated_pfn + 1);
- new_sz = bitmap_size(new_max + 1);
- p = realloc(ctx->restore.populated_pfns, new_sz);
- if ( !p )
- {
- ERROR("Failed to realloc populated bitmap");
- errno = ENOMEM;
- return -1;
- }
-
- memset((uint8_t *)p + old_sz, 0x00, new_sz - old_sz);
-
- ctx->restore.populated_pfns = p;
- ctx->restore.max_populated_pfn = new_max;
- }
-
- assert(!test_bit(pfn, ctx->restore.populated_pfns));
- set_bit(pfn, ctx->restore.populated_pfns);
-
- return 0;
-}
-
-/*
- * Given a set of pfns, obtain memory from Xen to fill the physmap for the
- * unpopulated subset. If types is NULL, no page type checking is performed
- * and all unpopulated pfns are populated.
- */
-int populate_pfns(struct xc_sr_context *ctx, unsigned count,
- const xen_pfn_t *original_pfns, const uint32_t *types)
-{
- xc_interface *xch = ctx->xch;
- xen_pfn_t *mfns = malloc(count * sizeof(*mfns)),
- *pfns = malloc(count * sizeof(*pfns));
- unsigned i, nr_pfns = 0;
- int rc = -1;
-
- if ( !mfns || !pfns )
- {
- ERROR("Failed to allocate %zu bytes for populating the physmap",
- 2 * count * sizeof(*mfns));
- goto err;
- }
-
- for ( i = 0; i < count; ++i )
- {
- if ( (!types || (types &&
- (types[i] != XEN_DOMCTL_PFINFO_XTAB &&
- types[i] != XEN_DOMCTL_PFINFO_BROKEN))) &&
- !pfn_is_populated(ctx, original_pfns[i]) )
- {
- rc = pfn_set_populated(ctx, original_pfns[i]);
- if ( rc )
- goto err;
- pfns[nr_pfns] = mfns[nr_pfns] = original_pfns[i];
- ++nr_pfns;
- }
- }
-
- if ( nr_pfns )
- {
- rc = xc_domain_populate_physmap_exact(
- xch, ctx->domid, nr_pfns, 0, 0, mfns);
- if ( rc )
- {
- PERROR("Failed to populate physmap");
- goto err;
- }
-
- for ( i = 0; i < nr_pfns; ++i )
- {
- if ( mfns[i] == INVALID_MFN )
- {
- ERROR("Populate physmap failed for pfn %u", i);
- rc = -1;
- goto err;
- }
-
- ctx->restore.ops.set_gfn(ctx, pfns[i], mfns[i]);
- }
- }
-
- rc = 0;
-
- err:
- free(pfns);
- free(mfns);
-
- return rc;
-}
-
-/*
* Given a list of pfns, their types, and a block of page data from the
* stream, populate and record their types, map the relevant subset and copy
* the data into the guest.
@@ -219,7 +93,7 @@ static int process_page_data(struct xc_s
goto err;
}
- rc = populate_pfns(ctx, count, pfns, types);
+ rc = ctx->restore.ops.populate_pfns(ctx, count, pfns, types);
if ( rc )
{
ERROR("Failed to populate pfns for batch of %u pages", count);
@@ -684,10 +558,8 @@ static int setup(struct xc_sr_context *c
if ( rc )
goto err;
- ctx->restore.max_populated_pfn = (32 * 1024 / 4) - 1;
- ctx->restore.populated_pfns = bitmap_alloc(
- ctx->restore.max_populated_pfn + 1);
- if ( !ctx->restore.populated_pfns )
+ rc = !xc_sr_bitmap_resize(&ctx->restore.populated_pfns, 32 * 1024 / 4);
+ if ( rc )
{
ERROR("Unable to allocate memory for populated_pfns bitmap");
rc = -1;
@@ -722,7 +594,7 @@ static void cleanup(struct xc_sr_context
xc_hypercall_buffer_free_pages(xch, dirty_bitmap,
NRPAGES(bitmap_size(ctx->restore.p2m_size)));
free(ctx->restore.buffered_records);
- free(ctx->restore.populated_pfns);
+ xc_sr_bitmap_free(&ctx->restore.populated_pfns);
if ( ctx->restore.ops.cleanup(ctx) )
PERROR("Failed to clean up");
}
@@ -886,7 +758,12 @@ int xc_domain_restore(xc_interface *xch,
return -1;
}
+ /* See xc_domain_getinfo */
+ ctx.restore.max_pages = ctx.dominfo.max_memkb >> (PAGE_SHIFT-10);
+ ctx.restore.tot_pages = ctx.dominfo.nr_pages;
ctx.restore.p2m_size = nr_pfns;
+ DPRINTF("dom %u p2m_size %lx max_pages %lx",
+ ctx.domid, ctx.restore.p2m_size, ctx.restore.max_pages);
if ( ctx.dominfo.hvm )
{
Index: xen-4.10.0-testing/tools/libxc/xc_sr_restore_x86_hvm.c
===================================================================
--- xen-4.10.0-testing.orig/tools/libxc/xc_sr_restore_x86_hvm.c
+++ xen-4.10.0-testing/tools/libxc/xc_sr_restore_x86_hvm.c
@@ -135,6 +135,8 @@ static int x86_hvm_localise_page(struct
static int x86_hvm_setup(struct xc_sr_context *ctx)
{
xc_interface *xch = ctx->xch;
+ struct xc_sr_bitmap *bm;
+ unsigned long bits;
if ( ctx->restore.guest_type != DHDR_TYPE_X86_HVM )
{
@@ -158,7 +160,41 @@ static int x86_hvm_setup(struct xc_sr_co
}
#endif
+ bm = &ctx->x86_hvm.restore.attempted_1g;
+ bits = (ctx->restore.p2m_size >> SUPERPAGE_1GB_SHIFT) + 1;
+ if ( xc_sr_bitmap_resize(bm, bits) == false )
+ goto out;
+
+ bm = &ctx->x86_hvm.restore.attempted_2m;
+ bits = (ctx->restore.p2m_size >> SUPERPAGE_2MB_SHIFT) + 1;
+ if ( xc_sr_bitmap_resize(bm, bits) == false )
+ goto out;
+
+ bm = &ctx->x86_hvm.restore.allocated_pfns;
+ bits = ctx->restore.p2m_size + 1;
+ if ( xc_sr_bitmap_resize(bm, bits) == false )
+ goto out;
+
+ /* No superpage in 1st 2MB due to VGA hole */
+#define LAPIC_BASE_ADDRESS 0xfee00000u
+#define ACPI_INFO_PHYSICAL_ADDRESS 0xfc000000u
+#define LAPIC_BASE_PFN (LAPIC_BASE_ADDRESS >> XC_PAGE_SHIFT)
+#define ACPI_INFO_PFN (ACPI_INFO_PHYSICAL_ADDRESS >> XC_PAGE_SHIFT)
+ bm = &ctx->x86_hvm.restore.attempted_1g;
+ xc_sr_set_bit(0, bm);
+ xc_sr_set_bit(LAPIC_BASE_PFN >> SUPERPAGE_1GB_SHIFT, bm);
+ xc_sr_set_bit(ACPI_INFO_PFN >> SUPERPAGE_1GB_SHIFT, bm);
+
+ bm = &ctx->x86_hvm.restore.attempted_2m;
+ xc_sr_set_bit(0, bm);
+ xc_sr_set_bit(LAPIC_BASE_PFN >> SUPERPAGE_2MB_SHIFT, bm);
+ xc_sr_set_bit(ACPI_INFO_PFN >> SUPERPAGE_2MB_SHIFT, bm);
+
return 0;
+
+out:
+ ERROR("Unable to allocate memory for pfn bitmaps");
+ return -1;
}
/*
@@ -233,10 +269,325 @@ static int x86_hvm_stream_complete(struc
static int x86_hvm_cleanup(struct xc_sr_context *ctx)
{
free(ctx->x86_hvm.restore.context);
+ xc_sr_bitmap_free(&ctx->x86_hvm.restore.attempted_1g);
+ xc_sr_bitmap_free(&ctx->x86_hvm.restore.attempted_2m);
+ xc_sr_bitmap_free(&ctx->x86_hvm.restore.allocated_pfns);
+
+ return 0;
+}
+
+/*
+ * Set a pfn as allocated, expanding the tracking structures if needed.
+ */
+static int pfn_set_allocated(struct xc_sr_context *ctx, xen_pfn_t pfn)
+{
+ xc_interface *xch = ctx->xch;
+ if ( !xc_sr_set_bit(pfn, &ctx->x86_hvm.restore.allocated_pfns) )
+ {
+ ERROR("Failed to realloc allocated_pfns bitmap");
+ errno = ENOMEM;
+ return -1;
+ }
return 0;
}
+struct x86_hvm_sp {
+ xen_pfn_t pfn;
+ xen_pfn_t base_pfn;
+ unsigned long index;
+ unsigned long count;
+};
+
+/*
+ * Try to allocate a 1GB page for this pfn, but avoid Over-allocation.
+ * If this succeeds, mark the range of 2MB pages as busy.
+ */
+static bool x86_hvm_alloc_1g(struct xc_sr_context *ctx, struct x86_hvm_sp *sp)
+{
+ xc_interface *xch = ctx->xch;
+ struct xc_sr_bitmap *bm;
+ unsigned int order, shift;
+ int i, done;
+ xen_pfn_t extent;
+
+ bm = &ctx->x86_hvm.restore.attempted_1g;
+
+ /* Only one attempt to avoid overlapping allocation */
+ if ( xc_sr_test_and_set_bit(sp->index, bm) )
+ return false;
+
+ order = SUPERPAGE_1GB_SHIFT;
+ sp->count = 1ULL << order;
+
+ /* Allocate only if there is room for another superpage */
+ if ( ctx->restore.tot_pages + sp->count > ctx->restore.max_pages )
+ return false;
+
+ extent = sp->base_pfn = (sp->pfn >> order) << order;
+ done = xc_domain_populate_physmap(xch, ctx->domid, 1, order, 0, &extent);
+ if ( done < 0 ) {
+ PERROR("populate_physmap failed.");
+ return false;
+ }
+ if ( done == 0 )
+ return false;
+
+ DPRINTF("1G base_pfn %" PRI_xen_pfn "\n", sp->base_pfn);
+
+ /* Mark all 2MB pages as done to avoid overlapping allocation */
+ bm = &ctx->x86_hvm.restore.attempted_2m;
+ shift = SUPERPAGE_1GB_SHIFT - SUPERPAGE_2MB_SHIFT;
+ for ( i = 0; i < (sp->count >> shift); i++ )
+ xc_sr_set_bit((sp->base_pfn >> SUPERPAGE_2MB_SHIFT) + i, bm);
+
+ return true;
+}
+
+/* Allocate a 2MB page if x86_hvm_alloc_1g failed, avoid Over-allocation. */
+static bool x86_hvm_alloc_2m(struct xc_sr_context *ctx, struct x86_hvm_sp *sp)
+{
+ xc_interface *xch = ctx->xch;
+ struct xc_sr_bitmap *bm;
+ unsigned int order;
+ int done;
+ xen_pfn_t extent;
+
+ bm = &ctx->x86_hvm.restore.attempted_2m;
+
+ /* Only one attempt to avoid overlapping allocation */
+ if ( xc_sr_test_and_set_bit(sp->index, bm) )
+ return false;
+
+ order = SUPERPAGE_2MB_SHIFT;
+ sp->count = 1ULL << order;
+
+ /* Allocate only if there is room for another superpage */
+ if ( ctx->restore.tot_pages + sp->count > ctx->restore.max_pages )
+ return false;
+
+ extent = sp->base_pfn = (sp->pfn >> order) << order;
+ done = xc_domain_populate_physmap(xch, ctx->domid, 1, order, 0, &extent);
+ if ( done < 0 ) {
+ PERROR("populate_physmap failed.");
+ return false;
+ }
+ if ( done == 0 )
+ return false;
+
+ DPRINTF("2M base_pfn %" PRI_xen_pfn "\n", sp->base_pfn);
+ return true;
+}
+
+/* Allocate a single page if x86_hvm_alloc_2m failed. */
+static bool x86_hvm_alloc_4k(struct xc_sr_context *ctx, struct x86_hvm_sp *sp)
+{
+ xc_interface *xch = ctx->xch;
+ unsigned int order;
+ int done;
+ xen_pfn_t extent;
+
+ order = 0;
+ sp->count = 1ULL << order;
+
+ /* Allocate only if there is room for another page */
+ if ( ctx->restore.tot_pages + sp->count > ctx->restore.max_pages )
+ return false;
+
+ extent = sp->base_pfn = (sp->pfn >> order) << order;
+ done = xc_domain_populate_physmap(xch, ctx->domid, 1, order, 0, &extent);
+ if ( done < 0 ) {
+ PERROR("populate_physmap failed.");
+ return false;
+ }
+ if ( done == 0 )
+ return false;
+
+ DPRINTF("4K base_pfn %" PRI_xen_pfn "\n", sp->base_pfn);
+ return true;
+}
+/*
+ * Attempt to allocate a superpage where the pfn resides.
+ */
+static int x86_hvm_allocate_pfn(struct xc_sr_context *ctx, xen_pfn_t pfn)
+{
+ xc_interface *xch = ctx->xch;
+ bool success;
+ int rc = -1;
+ unsigned long idx_1g, idx_2m;
+ struct x86_hvm_sp sp = {
+ .pfn = pfn
+ };
+
+ if ( xc_sr_test_bit(pfn, &ctx->x86_hvm.restore.allocated_pfns) )
+ return 0;
+
+ idx_1g = pfn >> SUPERPAGE_1GB_SHIFT;
+ idx_2m = pfn >> SUPERPAGE_2MB_SHIFT;
+ if ( !xc_sr_bitmap_resize(&ctx->x86_hvm.restore.attempted_1g, idx_1g) )
+ {
+ PERROR("Failed to realloc attempted_1g");
+ return -1;
+ }
+ if ( !xc_sr_bitmap_resize(&ctx->x86_hvm.restore.attempted_2m, idx_2m) )
+ {
+ PERROR("Failed to realloc attempted_2m");
+ return -1;
+ }
+
+ sp.index = idx_1g;
+ success = x86_hvm_alloc_1g(ctx, &sp);
+
+ if ( success == false ) {
+ sp.index = idx_2m;
+ success = x86_hvm_alloc_2m(ctx, &sp);
+ }
+
+ if ( success == false ) {
+ sp.index = 0;
+ success = x86_hvm_alloc_4k(ctx, &sp);
+ }
+
+ if ( success == true ) {
+ do {
+ sp.count--;
+ ctx->restore.tot_pages++;
+ rc = pfn_set_allocated(ctx, sp.base_pfn + sp.count);
+ if ( rc )
+ break;
+ } while ( sp.count );
+ }
+ return rc;
+}
+
+static bool x86_hvm_punch_hole(struct xc_sr_context *ctx, xen_pfn_t max_pfn)
+{
+ xc_interface *xch = ctx->xch;
+ struct xc_sr_bitmap *bm;
+ xen_pfn_t _pfn, pfn, min_pfn;
+ uint32_t domid, freed = 0, order;
+ int rc = -1;
+
+ /*
+ * Scan the entire superpage because several batches will fit into
+ * a superpage, and it is unknown which pfn triggered the allocation.
+ */
+ order = SUPERPAGE_1GB_SHIFT;
+ pfn = min_pfn = (max_pfn >> order) << order;
+
+ while ( pfn <= max_pfn )
+ {
+ bm = &ctx->x86_hvm.restore.allocated_pfns;
+ if ( !xc_sr_bitmap_resize(bm, pfn) )
+ {
+ PERROR("Failed to realloc allocated_pfns %" PRI_xen_pfn, pfn);
+ return false;
+ }
+ if ( !pfn_is_populated(ctx, pfn) &&
+ xc_sr_test_and_clear_bit(pfn, bm) ) {
+ domid = ctx->domid;
+ _pfn = pfn;
+ rc = xc_domain_decrease_reservation_exact(xch, domid, 1, 0, &_pfn);
+ if ( rc )
+ {
+ PERROR("Failed to release pfn %" PRI_xen_pfn, pfn);
+ return false;
+ }
+ ctx->restore.tot_pages--;
+ freed++;
+ }
+ pfn++;
+ }
+ if ( freed )
+ DPRINTF("freed %u between %" PRI_xen_pfn " %" PRI_xen_pfn "\n",
+ freed, min_pfn, max_pfn);
+ return true;
+}
+
+/*
+ * Try to allocate superpages.
+ * This works without memory map only if the pfns arrive in incremental order.
+ */
+static int x86_hvm_populate_pfns(struct xc_sr_context *ctx, unsigned count,
+ const xen_pfn_t *original_pfns,
+ const uint32_t *types)
+{
+ xc_interface *xch = ctx->xch;
+ xen_pfn_t pfn, min_pfn = original_pfns[0], max_pfn = original_pfns[0];
+ xen_pfn_t idx1G, idx2M;
+ unsigned i, order;
+ int rc = -1;
+
+ /* Loop once over the array to show statistics */
+ for ( i = 0; i < count; ++i )
+ {
+ if ( original_pfns[i] < min_pfn )
+ min_pfn = original_pfns[i];
+ if ( original_pfns[i] > max_pfn )
+ max_pfn = original_pfns[i];
+ }
+ DPRINTF("batch of %u pfns between %" PRI_xen_pfn " %" PRI_xen_pfn "\n",
+ count, min_pfn, max_pfn);
+
+ for ( i = 0; i < count; ++i )
+ {
+ pfn = original_pfns[i];
+ idx1G = pfn >> SUPERPAGE_1GB_SHIFT;
+ idx2M = pfn >> SUPERPAGE_2MB_SHIFT;
+
+ /*
+ * If this pfn is in another 2MB superpage it is required to punch holes
+ * to release memory, starting from the 1GB boundary up to the highest
+ * pfn within the previous 2MB superpage.
+ */
+ if ( ctx->x86_hvm.restore.idx1G_prev == idx1G &&
+ ctx->x86_hvm.restore.idx2M_prev == idx2M )
+ {
+ /* Same 2MB superpage, nothing to do */
+ ;
+ } else {
+ /*
+ * If this next pfn is within another 1GB or 2MB superpage it is
+ * required to scan the entire previous superpage because there
+ * might be holes between the last pfn and the end of the superpage.
+ */
+ if ( ctx->x86_hvm.restore.idx1G_prev != idx1G )
+ {
+ order = SUPERPAGE_1GB_SHIFT;
+ max_pfn = ((ctx->x86_hvm.restore.idx1G_prev + 1) << order) - 1;
+ }
+ else
+ {
+ order = SUPERPAGE_2MB_SHIFT;
+ max_pfn = ((ctx->x86_hvm.restore.idx2M_prev + 1) << order) - 1;
+ }
+
+ if ( x86_hvm_punch_hole(ctx, max_pfn) == false )
+ goto err;
+ }
+
+ if ( (types[i] != XEN_DOMCTL_PFINFO_XTAB &&
+ types[i] != XEN_DOMCTL_PFINFO_BROKEN) &&
+ !pfn_is_populated(ctx, pfn) )
+ {
+ rc = x86_hvm_allocate_pfn(ctx, pfn);
+ if ( rc )
+ goto err;
+ rc = pfn_set_populated(ctx, pfn);
+ if ( rc )
+ goto err;
+ }
+ ctx->x86_hvm.restore.idx1G_prev = idx1G;
+ ctx->x86_hvm.restore.idx2M_prev = idx2M;
+ }
+
+ rc = 0;
+
+ err:
+ return rc;
+}
+
+
struct xc_sr_restore_ops restore_ops_x86_hvm =
{
.pfn_is_valid = x86_hvm_pfn_is_valid,
@@ -245,6 +596,7 @@ struct xc_sr_restore_ops restore_ops_x86
.set_page_type = x86_hvm_set_page_type,
.localise_page = x86_hvm_localise_page,
.setup = x86_hvm_setup,
+ .populate_pfns = x86_hvm_populate_pfns,
.process_record = x86_hvm_process_record,
.stream_complete = x86_hvm_stream_complete,
.cleanup = x86_hvm_cleanup,
Index: xen-4.10.0-testing/tools/libxc/xc_sr_restore_x86_pv.c
===================================================================
--- xen-4.10.0-testing.orig/tools/libxc/xc_sr_restore_x86_pv.c
+++ xen-4.10.0-testing/tools/libxc/xc_sr_restore_x86_pv.c
@@ -937,6 +937,75 @@ static void x86_pv_set_gfn(struct xc_sr_
}
/*
+ * Given a set of pfns, obtain memory from Xen to fill the physmap for the
+ * unpopulated subset. If types is NULL, no page type checking is performed
+ * and all unpopulated pfns are populated.
+ */
+static int x86_pv_populate_pfns(struct xc_sr_context *ctx, unsigned count,
+ const xen_pfn_t *original_pfns,
+ const uint32_t *types)
+{
+ xc_interface *xch = ctx->xch;
+ xen_pfn_t *mfns = malloc(count * sizeof(*mfns)),
+ *pfns = malloc(count * sizeof(*pfns));
+ unsigned i, nr_pfns = 0;
+ int rc = -1;
+
+ if ( !mfns || !pfns )
+ {
+ ERROR("Failed to allocate %zu bytes for populating the physmap",
+ 2 * count * sizeof(*mfns));
+ goto err;
+ }
+
+ for ( i = 0; i < count; ++i )
+ {
+ if ( (!types || (types &&
+ (types[i] != XEN_DOMCTL_PFINFO_XTAB &&
+ types[i] != XEN_DOMCTL_PFINFO_BROKEN))) &&
+ !pfn_is_populated(ctx, original_pfns[i]) )
+ {
+ rc = pfn_set_populated(ctx, original_pfns[i]);
+ if ( rc )
+ goto err;
+ pfns[nr_pfns] = mfns[nr_pfns] = original_pfns[i];
+ ++nr_pfns;
+ }
+ }
+
+ if ( nr_pfns )
+ {
+ rc = xc_domain_populate_physmap_exact(
+ xch, ctx->domid, nr_pfns, 0, 0, mfns);
+ if ( rc )
+ {
+ PERROR("Failed to populate physmap");
+ goto err;
+ }
+
+ for ( i = 0; i < nr_pfns; ++i )
+ {
+ if ( mfns[i] == INVALID_MFN )
+ {
+ ERROR("Populate physmap failed for pfn %u", i);
+ rc = -1;
+ goto err;
+ }
+
+ ctx->restore.ops.set_gfn(ctx, pfns[i], mfns[i]);
+ }
+ }
+
+ rc = 0;
+
+ err:
+ free(pfns);
+ free(mfns);
+
+ return rc;
+}
+
+/*
* restore_ops function. Convert pfns back to mfns in pagetables. Possibly
* needs to populate new frames if a PTE is found referring to a frame which
* hasn't yet been seen from PAGE_DATA records.
@@ -980,7 +1049,7 @@ static int x86_pv_localise_page(struct x
}
}
- if ( to_populate && populate_pfns(ctx, to_populate, pfns, NULL) )
+ if ( to_populate && x86_pv_populate_pfns(ctx, to_populate, pfns, NULL) )
return -1;
for ( i = 0; i < (PAGE_SIZE / sizeof(uint64_t)); ++i )
@@ -1160,6 +1229,7 @@ struct xc_sr_restore_ops restore_ops_x86
.set_gfn = x86_pv_set_gfn,
.localise_page = x86_pv_localise_page,
.setup = x86_pv_setup,
+ .populate_pfns = x86_pv_populate_pfns,
.process_record = x86_pv_process_record,
.stream_complete = x86_pv_stream_complete,
.cleanup = x86_pv_cleanup,