Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
home:olh:xen-unstable
xen
xen.sr-restore-hvm-legacy-superpage.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File xen.sr-restore-hvm-legacy-superpage.patch of Package xen
From: Olaf Hering <olaf@aepfle.de> Date: Mon, 7 Aug 2017 12:58:02 +0000 Subject: sr restore hvm legacy superpage tools: use superpages during restore of HVM guest bsc#1035231 - migration of HVM domU does not use superpages on destination dom0 bsc#1055695 - XEN: 11SP4 and 12SP3 HVM guests can not be restored During creating of a HVM domU meminit_hvm() tries to map superpages. After save/restore or migration this mapping is lost, everything is allocated in single pages. This causes a performance degradation after migration. Add neccessary code to preallocate a superpage for an incoming chunk of pfns. In case a pfn was not populated on the sending side, it must be freed on the receiving side to avoid over-allocation. The existing code for x86_pv is moved unmodified into its own file. Signed-off-by: Olaf Hering <olaf@aepfle.de> --- tools/libs/guest/xg_dom_x86.c | 5 - tools/libs/guest/xg_private.h | 5 + tools/libs/guest/xg_sr_common.h | 28 +- tools/libs/guest/xg_sr_restore.c | 60 +- tools/libs/guest/xg_sr_restore_x86_hvm.c | 381 ++++++++- tools/libs/guest/xg_sr_restore_x86_pv.c | 61 +- 6 files changed, 467 insertions(+), 73 deletions(-) --- a/tools/libs/guest/xg_dom_x86.c +++ b/tools/libs/guest/xg_dom_x86.c @@ -35,29 +35,24 @@ #include <xen/arch-x86/hvm/start_info.h> #include <xen/io/protocols.h> #include <xen-tools/common-macros.h> #include "xg_private.h" #include "xenctrl.h" /* ------------------------------------------------------------------------ */ #define SUPERPAGE_BATCH_SIZE 512 -#define SUPERPAGE_2MB_SHIFT 9 -#define SUPERPAGE_2MB_NR_PFNS (1UL << SUPERPAGE_2MB_SHIFT) -#define SUPERPAGE_1GB_SHIFT 18 -#define SUPERPAGE_1GB_NR_PFNS (1UL << SUPERPAGE_1GB_SHIFT) - #define X86_CR0_PE 0x01 #define X86_CR0_ET 0x10 #define X86_DR6_DEFAULT 0xffff0ff0u #define X86_DR7_DEFAULT 0x00000400u #define MTRR_TYPE_WRBACK 6 #define MTRR_DEF_TYPE_ENABLE (1u << 11) #define SPECIALPAGE_PAGING 0 #define SPECIALPAGE_ACCESS 1 #define SPECIALPAGE_SHARING 2 --- a/tools/libs/guest/xg_private.h +++ b/tools/libs/guest/xg_private.h @@ -171,13 +171,18 @@ int pin_table(xc_interface *xch, unsigned int type, unsigned long mfn, #define M2P_CHUNKS(_m) (M2P_SIZE((_m)) >> M2P_SHIFT) #if defined(__x86_64__) || defined(__i386__) #include <xen/lib/x86/cpu-policy.h> struct xc_cpu_policy { struct cpu_policy policy; xen_cpuid_leaf_t leaves[CPUID_MAX_SERIALISED_LEAVES]; xen_msr_entry_t msrs[MSR_MAX_SERIALISED_ENTRIES]; }; #endif /* x86 */ +#define SUPERPAGE_2MB_SHIFT 9 +#define SUPERPAGE_2MB_NR_PFNS (1UL << SUPERPAGE_2MB_SHIFT) +#define SUPERPAGE_1GB_SHIFT 18 +#define SUPERPAGE_1GB_NR_PFNS (1UL << SUPERPAGE_1GB_SHIFT) + #endif /* XG_PRIVATE_H */ --- a/tools/libs/guest/xg_sr_common.h +++ b/tools/libs/guest/xg_sr_common.h @@ -198,24 +198,34 @@ struct xc_sr_restore_ops * @returns 0 for success, -1 for failure, with errno appropriately set. */ int (*localise_page)(struct xc_sr_context *ctx, uint32_t type, void *page); /** * Set up local environment to restore a domain. * * This is called once before any common setup has occurred, allowing for * guest-specific adjustments to be made to common state. */ int (*setup)(struct xc_sr_context *ctx); + /** + * Populate PFNs + * + * Given a set of pfns, obtain memory from Xen to fill the physmap for the + * unpopulated subset. + */ + int (*populate_pfns)(struct xc_sr_context *ctx, unsigned count, + const xen_pfn_t *original_pfns, const uint32_t *types); + + /** * Process an individual record from the stream. The caller shall take * care of processing common records (e.g. END, PAGE_DATA). * * @return 0 for success, -1 for failure, or the following sentinels: * - RECORD_NOT_PROCESSED * - BROKEN_CHANNEL: under Remus/COLO, this means master may be dead, and * a failover is needed. */ #define RECORD_NOT_PROCESSED 1 #define BROKEN_CHANNEL 2 int (*process_record)(struct xc_sr_context *ctx, struct xc_sr_record *rec); @@ -329,24 +339,26 @@ struct xc_sr_context int *map_errs; xen_pfn_t *pp_pfns; xen_pfn_t *pp_mfns; void **guest_data; struct iovec *iov; struct xc_sr_rec_page_data_header *pages; void *guest_mapping; uint32_t nr_mapped_pages; int send_back_fd; unsigned long p2m_size; + unsigned long max_pages; + unsigned long tot_pages; xc_hypercall_buffer_t dirty_bitmap_hbuf; /* From Image Header. */ uint32_t format_version; /* From Domain Header. */ uint32_t guest_type; uint32_t guest_page_size; /* Currently buffering records between a checkpoint */ bool buffer_all_records; @@ -462,24 +474,32 @@ struct xc_sr_context { struct { /* Whether qemu enabled logdirty mode, and we should * disable on cleanup. */ bool qemu_enabled_logdirty; } save; struct { /* HVM context blob. */ struct xc_sr_blob context; + + /* Bitmap of currently allocated PFNs during restore. */ + struct sr_bitmap attempted_1g; + struct sr_bitmap attempted_2m; + struct sr_bitmap allocated_pfns; + xen_pfn_t prev_populated_pfn; + xen_pfn_t iteration_tracker_pfn; + unsigned long iteration; } restore; }; } hvm; } x86; }; }; extern struct xc_sr_save_ops save_ops_x86_pv; extern struct xc_sr_save_ops save_ops_x86_hvm; extern struct xc_sr_restore_ops restore_ops_x86_pv; @@ -526,32 +546,24 @@ static inline int write_record(struct xc_sr_context *ctx, * * On success, the records type and size shall be valid. * - If size is 0, data shall be NULL. * - If size is non-0, data shall be a buffer allocated by malloc() which must * be passed to free() by the caller. * * On failure, the contents of the record structure are undefined. */ int read_record_header(struct xc_sr_context *ctx, int fd, struct xc_sr_rhdr *rhdr); int read_record_data(struct xc_sr_context *ctx, int fd, struct xc_sr_rhdr *rhdr, struct xc_sr_record *rec); -/* - * This would ideally be private in restore.c, but is needed by - * x86_pv_localise_page() if we receive pagetables frames ahead of the - * contents of the frames they point at. - */ -int populate_pfns(struct xc_sr_context *ctx, unsigned int count, - const xen_pfn_t *original_pfns, const uint32_t *types); - /* Handle a STATIC_DATA_END record. */ int handle_static_data_end(struct xc_sr_context *ctx); /* Page type known to the migration logic? */ static inline bool is_known_page_type(uint32_t type) { switch ( type ) { case XEN_DOMCTL_PFINFO_NOTAB: case XEN_DOMCTL_PFINFO_L1TAB: case XEN_DOMCTL_PFINFO_L1TAB | XEN_DOMCTL_PFINFO_LPINTAB: --- a/tools/libs/guest/xg_sr_restore.c +++ b/tools/libs/guest/xg_sr_restore.c @@ -62,78 +62,24 @@ static int read_headers(struct xc_sr_context *ctx) if ( dhdr.xen_major == 0 ) { IPRINTF("Found %s domain, converted from legacy stream format", dhdr_type_to_str(dhdr.type)); DPRINTF(" Legacy conversion script version %u", dhdr.xen_minor); } else IPRINTF("Found %s domain from Xen %u.%u", dhdr_type_to_str(dhdr.type), dhdr.xen_major, dhdr.xen_minor); return 0; } -/* - * Given a set of pfns, obtain memory from Xen to fill the physmap for the - * unpopulated subset. If types is NULL, no page type checking is performed - * and all unpopulated pfns are populated. - */ -int populate_pfns(struct xc_sr_context *ctx, unsigned int count, - const xen_pfn_t *original_pfns, const uint32_t *types) -{ - xc_interface *xch = ctx->xch; - unsigned int i, nr_pfns = 0; - int rc = -1; - - for ( i = 0; i < count; ++i ) - { - if ( (!types || page_type_to_populate(types[i])) && - !pfn_is_populated(ctx, original_pfns[i]) ) - { - rc = pfn_set_populated(ctx, original_pfns[i]); - if ( rc ) - goto err; - ctx->restore.pp_pfns[nr_pfns] = ctx->restore.pp_mfns[nr_pfns] = original_pfns[i]; - ++nr_pfns; - } - } - - if ( nr_pfns ) - { - rc = xc_domain_populate_physmap_exact( - xch, ctx->domid, nr_pfns, 0, 0, ctx->restore.pp_mfns); - if ( rc ) - { - PERROR("Failed to populate physmap"); - goto err; - } - - for ( i = 0; i < nr_pfns; ++i ) - { - if ( ctx->restore.pp_mfns[i] == INVALID_MFN ) - { - ERROR("Populate physmap failed for pfn %u", i); - rc = -1; - goto err; - } - - ctx->restore.ops.set_gfn(ctx, ctx->restore.pp_pfns[i], ctx->restore.pp_mfns[i]); - } - } - - rc = 0; - - err: - return rc; -} - static int handle_static_data_end_v2(struct xc_sr_context *ctx) { int rc = 0; #if defined(__i386__) || defined(__x86_64__) xc_interface *xch = ctx->xch; /* * v2 compatibility only exists for x86 streams. This is a bit of a * bodge, but it is less bad than duplicating handle_page_data() between * different architectures. */ @@ -250,25 +196,26 @@ err: /* * Populate pfns, if required * Fill guest_data with either mapped address or NULL * The caller must unmap guest_mapping */ static int map_guest_pages(struct xc_sr_context *ctx, struct xc_sr_rec_page_data_header *pages) { xc_interface *xch = ctx->xch; uint32_t i, p; int rc; - rc = populate_pfns(ctx, pages->count, ctx->restore.pfns, ctx->restore.types); + rc = ctx->restore.ops.populate_pfns(ctx, pages->count, ctx->restore.pfns, + ctx->restore.types); if ( rc ) { ERROR("Failed to populate pfns for batch of %u pages", pages->count); goto err; } ctx->restore.nr_mapped_pages = 0; for ( i = 0; i < pages->count; i++ ) { ctx->restore.ops.set_page_type(ctx, ctx->restore.pfns[i], ctx->restore.types[i]); @@ -1065,24 +1012,27 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, ctx.domid = dom; if ( read_headers(&ctx) ) return -1; if ( xc_domain_nr_gpfns(xch, dom, &nr_pfns) < 0 ) { PERROR("Unable to obtain the guest p2m size"); return -1; } + /* See xc_domain_getinfo */ + ctx.restore.max_pages = ctx.dominfo.max_pages; + ctx.restore.tot_pages = ctx.dominfo.tot_pages; ctx.restore.p2m_size = nr_pfns; ctx.restore.ops = hvm ? restore_ops_x86_hvm : restore_ops_x86_pv; if ( restore(&ctx) ) return -1; IPRINTF("XenStore: mfn %#"PRIpfn", dom %d, evt %u", ctx.restore.xenstore_gfn, ctx.restore.xenstore_domid, ctx.restore.xenstore_evtchn); IPRINTF("Console: mfn %#"PRIpfn", dom %d, evt %u", --- a/tools/libs/guest/xg_sr_restore_x86_hvm.c +++ b/tools/libs/guest/xg_sr_restore_x86_hvm.c @@ -121,24 +121,51 @@ static void x86_hvm_set_page_type(struct xc_sr_context *ctx, { /* no-op */ } /* restore_ops function. */ static int x86_hvm_localise_page(struct xc_sr_context *ctx, uint32_t type, void *page) { /* no-op */ return 0; } +static bool x86_hvm_expand_sp_bitmaps(struct xc_sr_context *ctx, unsigned long max_pfn) +{ + struct sr_bitmap *bm; + + bm = &ctx->x86.hvm.restore.attempted_1g; + if ( !sr_bitmap_expand(bm, max_pfn >> SUPERPAGE_1GB_SHIFT) ) + return false; + + bm = &ctx->x86.hvm.restore.attempted_2m; + if ( !sr_bitmap_expand(bm, max_pfn >> SUPERPAGE_2MB_SHIFT) ) + return false; + + bm = &ctx->x86.hvm.restore.allocated_pfns; + if ( !sr_bitmap_expand(bm, max_pfn) ) + return false; + + return true; +} + +static void x86_hvm_no_superpage(struct xc_sr_context *ctx, unsigned long addr) +{ + unsigned long pfn = addr >> XC_PAGE_SHIFT; + + sr_set_bit(pfn >> SUPERPAGE_1GB_SHIFT, &ctx->x86.hvm.restore.attempted_1g); + sr_set_bit(pfn >> SUPERPAGE_2MB_SHIFT, &ctx->x86.hvm.restore.attempted_2m); +} + /* * restore_ops function. Confirms the stream matches the domain. */ static int x86_hvm_setup(struct xc_sr_context *ctx) { xc_interface *xch = ctx->xch; unsigned long max_pfn; if ( ctx->restore.guest_type != DHDR_TYPE_X86_HVM ) { ERROR("Unable to restore %s domain into an x86 HVM domain", dhdr_type_to_str(ctx->restore.guest_type)); @@ -155,30 +182,42 @@ static int x86_hvm_setup(struct xc_sr_context *ctx) #ifdef __i386__ /* Very large domains (> 1TB) will exhaust virtual address space. */ if ( ctx->restore.p2m_size > 0x0fffffff ) { errno = E2BIG; PERROR("Cannot restore this big a guest"); return -1; } #endif max_pfn = max(ctx->restore.p2m_size, ctx->dominfo.max_pages); if ( !sr_bitmap_expand(&ctx->restore.populated_pfns, max_pfn) ) - { - PERROR("Unable to allocate memory for populated_pfns bitmap"); - return -1; - } + goto out; + + if ( !x86_hvm_expand_sp_bitmaps(ctx, max_pfn) ) + goto out; + + /* FIXME: distinguish between PVH and HVM */ + /* No superpage in 1st 2MB due to VGA hole */ + x86_hvm_no_superpage(ctx, 0xA0000u); +#define LAPIC_BASE_ADDRESS 0xfee00000u +#define ACPI_INFO_PHYSICAL_ADDRESS 0xfc000000u + x86_hvm_no_superpage(ctx, LAPIC_BASE_ADDRESS); + x86_hvm_no_superpage(ctx, ACPI_INFO_PHYSICAL_ADDRESS); return 0; + +out: + PERROR("Unable to allocate memory for pfn bitmaps"); + return -1; } /* * restore_ops function. */ static int x86_hvm_process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec) { switch ( rec->type ) { case REC_TYPE_X86_TSC_INFO: return handle_x86_tsc_info(ctx, rec); @@ -241,40 +280,374 @@ static int x86_hvm_stream_complete(struct xc_sr_context *ctx) if ( rc ) { PERROR("Failed to seed grant table"); return rc; } return rc; } static int x86_hvm_cleanup(struct xc_sr_context *ctx) { sr_bitmap_free(&ctx->restore.populated_pfns); + sr_bitmap_free(&ctx->x86.hvm.restore.attempted_1g); + sr_bitmap_free(&ctx->x86.hvm.restore.attempted_2m); + sr_bitmap_free(&ctx->x86.hvm.restore.allocated_pfns); free(ctx->x86.hvm.restore.context.ptr); free(ctx->x86.restore.cpuid.ptr); free(ctx->x86.restore.msr.ptr); return 0; } +/* + * Set a range of pfns as allocated + */ +static void pfn_set_long_allocated(struct xc_sr_context *ctx, xen_pfn_t base_pfn) +{ + sr_set_long_bit(base_pfn, &ctx->x86.hvm.restore.allocated_pfns); +} + +static void pfn_set_allocated(struct xc_sr_context *ctx, xen_pfn_t pfn) +{ + sr_set_bit(pfn, &ctx->x86.hvm.restore.allocated_pfns); +} + +struct x86_hvm_sp { + xen_pfn_t pfn; + xen_pfn_t base_pfn; + unsigned long index; + unsigned long count; +}; + +/* + * Try to allocate a 1GB page for this pfn, but avoid Over-allocation. + * If this succeeds, mark the range of 2MB pages as busy. + */ +static bool x86_hvm_alloc_1g(struct xc_sr_context *ctx, struct x86_hvm_sp *sp) +{ + xc_interface *xch = ctx->xch; + unsigned int order; + int i, done; + xen_pfn_t extent; + + /* Only one attempt to avoid overlapping allocation */ + if ( sr_test_and_set_bit(sp->index, &ctx->x86.hvm.restore.attempted_1g) ) + return false; + + order = SUPERPAGE_1GB_SHIFT; + sp->count = SUPERPAGE_1GB_NR_PFNS; + + /* Allocate only if there is room for another superpage */ + if ( ctx->restore.tot_pages + sp->count > ctx->restore.max_pages ) + return false; + + extent = sp->base_pfn = (sp->pfn >> order) << order; + done = xc_domain_populate_physmap(xch, ctx->domid, 1, order, 0, &extent); + if ( done < 0 ) { + PERROR("populate_physmap failed."); + return false; + } + if ( done == 0 ) + return false; + + DPRINTF("1G %" PRI_xen_pfn "\n", sp->base_pfn); + + /* Mark all 2MB pages as done to avoid overlapping allocation */ + for ( i = 0; i < (SUPERPAGE_1GB_NR_PFNS/SUPERPAGE_2MB_NR_PFNS); i++ ) + sr_set_bit((sp->base_pfn >> SUPERPAGE_2MB_SHIFT) + i, &ctx->x86.hvm.restore.attempted_2m); + + return true; +} + +/* Allocate a 2MB page if x86_hvm_alloc_1g failed, avoid Over-allocation. */ +static bool x86_hvm_alloc_2m(struct xc_sr_context *ctx, struct x86_hvm_sp *sp) +{ + xc_interface *xch = ctx->xch; + unsigned int order; + int done; + xen_pfn_t extent; + + /* Only one attempt to avoid overlapping allocation */ + if ( sr_test_and_set_bit(sp->index, &ctx->x86.hvm.restore.attempted_2m) ) + return false; + + order = SUPERPAGE_2MB_SHIFT; + sp->count = SUPERPAGE_2MB_NR_PFNS; + + /* Allocate only if there is room for another superpage */ + if ( ctx->restore.tot_pages + sp->count > ctx->restore.max_pages ) + return false; + + extent = sp->base_pfn = (sp->pfn >> order) << order; + done = xc_domain_populate_physmap(xch, ctx->domid, 1, order, 0, &extent); + if ( done < 0 ) { + PERROR("populate_physmap failed."); + return false; + } + if ( done == 0 ) + return false; + + DPRINTF("2M %" PRI_xen_pfn "\n", sp->base_pfn); + return true; +} + +/* Allocate a single page if x86_hvm_alloc_2m failed. */ +static bool x86_hvm_alloc_4k(struct xc_sr_context *ctx, struct x86_hvm_sp *sp) +{ + xc_interface *xch = ctx->xch; + unsigned int order; + int done; + xen_pfn_t extent; + + order = 0; + sp->count = 1UL; + + /* Allocate only if there is room for another page */ + if ( ctx->restore.tot_pages + sp->count > ctx->restore.max_pages ) { + errno = E2BIG; + return false; + } + + extent = sp->base_pfn = (sp->pfn >> order) << order; + done = xc_domain_populate_physmap(xch, ctx->domid, 1, order, 0, &extent); + if ( done < 0 ) { + PERROR("populate_physmap failed."); + return false; + } + if ( done == 0 ) { + errno = ENOMEM; + return false; + } + + DPRINTF("4K %" PRI_xen_pfn "\n", sp->base_pfn); + return true; +} +/* + * Attempt to allocate a superpage where the pfn resides. + */ +static int x86_hvm_allocate_pfn(struct xc_sr_context *ctx, xen_pfn_t pfn) +{ + bool success; + unsigned long idx_1g, idx_2m; + struct x86_hvm_sp sp = { + .pfn = pfn + }; + + if ( sr_test_bit(pfn, &ctx->x86.hvm.restore.allocated_pfns) ) + return 0; + + idx_1g = pfn >> SUPERPAGE_1GB_SHIFT; + idx_2m = pfn >> SUPERPAGE_2MB_SHIFT; + + sp.index = idx_1g; + success = x86_hvm_alloc_1g(ctx, &sp); + + if ( success == false ) { + sp.index = idx_2m; + success = x86_hvm_alloc_2m(ctx, &sp); + } + + if ( success == false ) { + sp.index = 0; + success = x86_hvm_alloc_4k(ctx, &sp); + } + + if ( success == false ) + return -1; + + do { + if ( sp.count >= BITS_PER_LONG && (sp.count % BITS_PER_LONG) == 0 ) { + sp.count -= BITS_PER_LONG; + ctx->restore.tot_pages += BITS_PER_LONG; + pfn_set_long_allocated(ctx, sp.base_pfn + sp.count); + } else { + sp.count--; + ctx->restore.tot_pages++; + pfn_set_allocated(ctx, sp.base_pfn + sp.count); + } + } while ( sp.count ); + + return 0; +} + +/* + * Deallocate memory. + * There was likely an optimistic superpage allocation. + * This means more pages may have been allocated past gap_end. + * This range is not freed now. Incoming higher pfns will release it. + */ +static int x86_hvm_punch_hole(struct xc_sr_context *ctx, + xen_pfn_t gap_start, xen_pfn_t gap_end) +{ + xc_interface *xch = ctx->xch; + xen_pfn_t _pfn, pfn; + uint32_t domid, freed = 0; + int rc; + + pfn = gap_start >> SUPERPAGE_1GB_SHIFT; + do + { + sr_set_bit(pfn, &ctx->x86.hvm.restore.attempted_1g); + } while (++pfn <= gap_end >> SUPERPAGE_1GB_SHIFT); + + pfn = gap_start >> SUPERPAGE_2MB_SHIFT; + do + { + sr_set_bit(pfn, &ctx->x86.hvm.restore.attempted_2m); + } while (++pfn <= gap_end >> SUPERPAGE_2MB_SHIFT); + + pfn = gap_start; + + while ( pfn <= gap_end ) + { + if ( sr_test_and_clear_bit(pfn, &ctx->x86.hvm.restore.allocated_pfns) ) + { + domid = ctx->domid; + _pfn = pfn; + rc = xc_domain_decrease_reservation_exact(xch, domid, 1, 0, &_pfn); + if ( rc ) + { + PERROR("Failed to release pfn %" PRI_xen_pfn, pfn); + return -1; + } + ctx->restore.tot_pages--; + freed++; + } + pfn++; + } + if ( freed ) + DPRINTF("freed %u between %" PRI_xen_pfn " %" PRI_xen_pfn "\n", + freed, gap_start, gap_end); + return 0; +} + +static int x86_hvm_unpopulate_page(struct xc_sr_context *ctx, xen_pfn_t pfn) +{ + sr_clear_bit(pfn, &ctx->restore.populated_pfns); + return x86_hvm_punch_hole(ctx, pfn, pfn); +} + +static int x86_hvm_populate_page(struct xc_sr_context *ctx, xen_pfn_t pfn) +{ + xen_pfn_t gap_start, gap_end; + bool has_gap, first_iteration; + int rc; + + /* + * Check for a gap between the previous populated pfn and this pfn. + * In case a gap exists, it is required to punch a hole to release memory, + * starting after the previous pfn and before this pfn. + * + * But: this can be done only during the first iteration, which is the + * only place where superpage allocations are attempted. All following + * iterations lack the info to properly maintain prev_populated_pfn. + */ + has_gap = ctx->x86.hvm.restore.prev_populated_pfn + 1 < pfn; + first_iteration = ctx->x86.hvm.restore.iteration == 0; + if ( has_gap && first_iteration ) + { + gap_start = ctx->x86.hvm.restore.prev_populated_pfn + 1; + gap_end = pfn - 1; + + rc = x86_hvm_punch_hole(ctx, gap_start, gap_end); + if ( rc ) + goto err; + } + + rc = x86_hvm_allocate_pfn(ctx, pfn); + if ( rc ) + goto err; + pfn_set_populated(ctx, pfn); + ctx->x86.hvm.restore.prev_populated_pfn = pfn; + + rc = 0; +err: + return rc; +} + +/* + * Try to allocate superpages. + * This works without memory map because the pfns arrive in incremental order. + * All pfn numbers and their type are submitted. + * Only pfns with data will have also pfn content transmitted. + */ +static int x86_hvm_populate_pfns(struct xc_sr_context *ctx, unsigned count, + const xen_pfn_t *original_pfns, + const uint32_t *types) +{ + xc_interface *xch = ctx->xch; + xen_pfn_t pfn, min_pfn, max_pfn; + bool to_populate, populated; + unsigned i = count; + int rc = 0; + + min_pfn = count ? original_pfns[0] : 0; + max_pfn = count ? original_pfns[count - 1] : 0; + DPRINTF("batch of %u pfns between %" PRI_xen_pfn " %" PRI_xen_pfn "\n", + count, min_pfn, max_pfn); + + if ( !x86_hvm_expand_sp_bitmaps(ctx, max_pfn) ) + { + ERROR("Unable to allocate memory for pfn bitmaps"); + return -1; + } + + /* + * There is no indicator for a new iteration. + * Simulate it by checking if a lower pfn is coming in. + * In the end it matters only to know if this iteration is the first one. + */ + if ( min_pfn < ctx->x86.hvm.restore.iteration_tracker_pfn ) + ctx->x86.hvm.restore.iteration++; + ctx->x86.hvm.restore.iteration_tracker_pfn = min_pfn; + + for ( i = 0; i < count; ++i ) + { + pfn = original_pfns[i]; + + to_populate = page_type_to_populate(types[i]); + populated = pfn_is_populated(ctx, pfn); + + /* + * page has data, pfn populated: nothing to do + * page has data, pfn not populated: likely never seen before + * page has no data, pfn populated: likely ballooned out during migration + * page has no data, pfn not populated: nothing to do + */ + if ( to_populate && !populated ) + { + rc = x86_hvm_populate_page(ctx, pfn); + } else if ( !to_populate && populated ) + { + rc = x86_hvm_unpopulate_page(ctx, pfn); + } + if ( rc ) + break; + } + + return rc; +} + + struct xc_sr_restore_ops restore_ops_x86_hvm = { .pfn_is_valid = x86_hvm_pfn_is_valid, .pfn_to_gfn = x86_hvm_pfn_to_gfn, .set_gfn = x86_hvm_set_gfn, .set_page_type = x86_hvm_set_page_type, .localise_page = x86_hvm_localise_page, .setup = x86_hvm_setup, + .populate_pfns = x86_hvm_populate_pfns, .process_record = x86_hvm_process_record, .static_data_complete = x86_static_data_complete, .stream_complete = x86_hvm_stream_complete, .cleanup = x86_hvm_cleanup, }; /* * Local variables: * mode: C * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 --- a/tools/libs/guest/xg_sr_restore_x86_pv.c +++ b/tools/libs/guest/xg_sr_restore_x86_pv.c @@ -950,24 +950,82 @@ static void x86_pv_set_gfn(struct xc_sr_context *ctx, xen_pfn_t pfn, xen_pfn_t mfn) { assert(pfn <= ctx->x86.pv.max_pfn); if ( ctx->x86.pv.width == sizeof(uint64_t) ) /* 64 bit guest. Need to expand INVALID_MFN for 32 bit toolstacks. */ ((uint64_t *)ctx->x86.pv.p2m)[pfn] = mfn == INVALID_MFN ? ~0ULL : mfn; else /* 32 bit guest. Can truncate INVALID_MFN for 64 bit toolstacks. */ ((uint32_t *)ctx->x86.pv.p2m)[pfn] = mfn; } +/* + * Given a set of pfns, obtain memory from Xen to fill the physmap for the + * unpopulated subset. If types is NULL, no page type checking is performed + * and all unpopulated pfns are populated. + */ +static int x86_pv_populate_pfns(struct xc_sr_context *ctx, unsigned count, + const xen_pfn_t *original_pfns, + const uint32_t *types) +{ + xc_interface *xch = ctx->xch; + xen_pfn_t *mfns = ctx->restore.pp_mfns, + *pfns = ctx->restore.pp_pfns; + unsigned int i, nr_pfns = 0; + int rc = -1; + + for ( i = 0; i < count; ++i ) + { + if ( (!types || + (types && page_type_has_stream_data(types[i]) == true)) && + !pfn_is_populated(ctx, original_pfns[i]) ) + { + rc = pfn_set_populated(ctx, original_pfns[i]); + if ( rc ) + goto err; + pfns[nr_pfns] = mfns[nr_pfns] = original_pfns[i]; + ++nr_pfns; + } + } + + if ( nr_pfns ) + { + rc = xc_domain_populate_physmap_exact( + xch, ctx->domid, nr_pfns, 0, 0, mfns); + if ( rc ) + { + PERROR("Failed to populate physmap"); + goto err; + } + + for ( i = 0; i < nr_pfns; ++i ) + { + if ( mfns[i] == INVALID_MFN ) + { + ERROR("Populate physmap failed for pfn %u", i); + rc = -1; + goto err; + } + + ctx->restore.ops.set_gfn(ctx, pfns[i], mfns[i]); + } + } + + rc = 0; + + err: + return rc; +} + /* * restore_ops function. Convert pfns back to mfns in pagetables. Possibly * needs to populate new frames if a PTE is found referring to a frame which * hasn't yet been seen from PAGE_DATA records. */ static int x86_pv_localise_page(struct xc_sr_context *ctx, uint32_t type, void *page) { xc_interface *xch = ctx->xch; uint64_t *table = page; uint64_t pte; unsigned int i, to_populate; @@ -994,25 +1052,25 @@ static int x86_pv_localise_page(struct xc_sr_context *ctx, ERROR("PTE truncation detected. L%u[%u] = %016"PRIx64, type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT, i, pte); errno = E2BIG; return -1; } #endif if ( pfn_to_mfn(ctx, pfn) == INVALID_MFN ) pfns[to_populate++] = pfn; } } - if ( to_populate && populate_pfns(ctx, to_populate, pfns, NULL) ) + if ( to_populate && x86_pv_populate_pfns(ctx, to_populate, pfns, NULL) ) return -1; for ( i = 0; i < (PAGE_SIZE / sizeof(uint64_t)); ++i ) { pte = table[i]; if ( pte & _PAGE_PRESENT ) { xen_pfn_t mfn, pfn; pfn = pte_to_frame(pte); mfn = pfn_to_mfn(ctx, pfn); @@ -1191,24 +1249,25 @@ static int x86_pv_cleanup(struct xc_sr_context *ctx) return 0; } struct xc_sr_restore_ops restore_ops_x86_pv = { .pfn_is_valid = x86_pv_pfn_is_valid, .pfn_to_gfn = pfn_to_mfn, .set_page_type = x86_pv_set_page_type, .set_gfn = x86_pv_set_gfn, .localise_page = x86_pv_localise_page, .setup = x86_pv_setup, + .populate_pfns = x86_pv_populate_pfns, .process_record = x86_pv_process_record, .static_data_complete = x86_static_data_complete, .stream_complete = x86_pv_stream_complete, .cleanup = x86_pv_cleanup, }; /* * Local variables: * mode: C * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor