File xen.sr-restore-handle_incoming_page_data.patch of Package xen
From: Olaf Hering <olaf@aepfle.de>
Date: Thu, 29 Oct 2020 16:13:10 +0100
Subject: sr restore handle_incoming_page_data
tools: restore: write data directly into guest
Read incoming migration stream directly into the guest memory.
This avoids the memory allocation and copying, and the resulting
performance penalty.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
---
tools/libs/guest/xg_sr_common.h | 3 +
tools/libs/guest/xg_sr_restore.c | 155 ++++++++-
2 files changed, 153 insertions(+), 5 deletions(-)
--- a/tools/libs/guest/xg_sr_common.h
+++ b/tools/libs/guest/xg_sr_common.h
@@ -254,24 +254,26 @@ struct xc_sr_context
struct /* Restore data. */
{
struct xc_sr_restore_ops ops;
struct restore_callbacks *callbacks;
xen_pfn_t *pfns;
uint32_t *types;
xen_pfn_t *mfns;
int *map_errs;
xen_pfn_t *pp_pfns;
xen_pfn_t *pp_mfns;
void **guest_data;
+ struct iovec *iov;
+ struct xc_sr_rec_page_data_header *pages;
void *guest_mapping;
uint32_t nr_mapped_pages;
int send_back_fd;
unsigned long p2m_size;
xc_hypercall_buffer_t dirty_bitmap_hbuf;
/* From Image Header. */
uint32_t format_version;
/* From Domain Header. */
@@ -302,24 +304,25 @@ struct xc_sr_context
* OUTPUT: gfn
*/
xen_pfn_t xenstore_gfn, console_gfn;
unsigned int xenstore_evtchn, console_evtchn;
uint32_t xenstore_domid, console_domid;
/* Bitmap of currently populated PFNs during restore. */
unsigned long *populated_pfns;
xen_pfn_t max_populated_pfn;
/* Sender has invoked verify mode on the stream. */
bool verify;
+ void *verify_buf;
} restore;
};
union /* Guest-arch specific data. */
{
struct /* x86 */
{
/* Common save/restore data. */
union
{
struct
{
--- a/tools/libs/guest/xg_sr_restore.c
+++ b/tools/libs/guest/xg_sr_restore.c
@@ -372,24 +372,147 @@ static int map_guest_pages(struct xc_sr_context *ctx,
PERROR("Mapping pfn %#"PRIpfn" (mfn %#"PRIpfn", type %#"PRIx32") failed",
ctx->restore.pfns[i], ctx->restore.mfns[p], ctx->restore.types[i]);
goto err;
}
done:
rc = 0;
err:
return rc;
}
+/*
+ * Handle PAGE_DATA record from the stream.
+ * Given a list of pfns, their types, and a block of page data from the
+ * stream, populate and record their types, map the relevant subset and copy
+ * the data into the guest.
+ */
+static int handle_incoming_page_data(struct xc_sr_context *ctx,
+ struct xc_sr_rhdr *rhdr)
+{
+ xc_interface *xch = ctx->xch;
+ struct xc_sr_rec_page_data_header *pages = ctx->restore.pages;
+ uint64_t *pfn_nums = &pages->pfn[0];
+ uint32_t i;
+ int rc, iov_idx;
+
+ rc = handle_static_data_end_v2(ctx);
+ if ( rc )
+ goto err;
+
+ /* First read and verify the header */
+ rc = read_exact(ctx->fd, pages, sizeof(*pages));
+ if ( rc )
+ {
+ PERROR("Could not read rec_pfn header");
+ goto err;
+ }
+
+ if ( !verify_rec_page_hdr(ctx, rhdr->length, pages) )
+ {
+ rc = -1;
+ goto err;
+ }
+
+ /* Then read and verify the incoming pfn numbers */
+ rc = read_exact(ctx->fd, pfn_nums, sizeof(*pfn_nums) * pages->count);
+ if ( rc )
+ {
+ PERROR("Could not read rec_pfn data");
+ goto err;
+ }
+
+ if ( !verify_rec_page_pfns(ctx, rhdr->length, pages) )
+ {
+ rc = -1;
+ goto err;
+ }
+
+ /* Finally read and verify the incoming pfn data */
+ rc = map_guest_pages(ctx, pages);
+ if ( rc )
+ goto err;
+
+ /* Prepare read buffers, either guest or throw-away memory */
+ for ( i = 0, iov_idx = 0; i < pages->count; i++ )
+ {
+ struct iovec *iov;
+
+ if ( !ctx->restore.guest_data[i] )
+ continue;
+
+ iov = &ctx->restore.iov[iov_idx];
+ iov->iov_len = PAGE_SIZE;
+ if ( ctx->restore.verify )
+ iov->iov_base = ctx->restore.verify_buf + (i * PAGE_SIZE);
+ else
+ iov->iov_base = ctx->restore.guest_data[i];
+ iov_idx++;
+ }
+
+ if ( !iov_idx )
+ goto done;
+
+ rc = readv_exact(ctx->fd, ctx->restore.iov, iov_idx);
+ if ( rc )
+ {
+ PERROR("read of %d pages failed", iov_idx);
+ goto err;
+ }
+
+ /* Post-processing of pfn data */
+ for ( i = 0, iov_idx = 0; i < pages->count; i++ )
+ {
+ void *addr;
+
+ if ( !ctx->restore.guest_data[i] )
+ continue;
+
+ addr = ctx->restore.iov[iov_idx].iov_base;
+ rc = ctx->restore.ops.localise_page(ctx, ctx->restore.types[i], addr);
+ if ( rc )
+ {
+ ERROR("Failed to localise pfn %#"PRIpfn" (type %#"PRIx32")",
+ ctx->restore.pfns[i],
+ ctx->restore.types[i] >> XEN_DOMCTL_PFINFO_LTAB_SHIFT);
+ goto err;
+
+ }
+
+ if ( ctx->restore.verify )
+ {
+ if ( memcmp(ctx->restore.guest_data[i], addr, PAGE_SIZE) )
+ {
+ ERROR("verify pfn %#"PRIpfn" failed (type %#"PRIx32")",
+ ctx->restore.pfns[i],
+ ctx->restore.types[i] >> XEN_DOMCTL_PFINFO_LTAB_SHIFT);
+ }
+ }
+
+ iov_idx++;
+ }
+
+done:
+ rc = 0;
+
+err:
+ if ( ctx->restore.guest_mapping )
+ {
+ xenforeignmemory_unmap(xch->fmem, ctx->restore.guest_mapping, ctx->restore.nr_mapped_pages);
+ ctx->restore.guest_mapping = NULL;
+ }
+ return rc;
+}
+
/*
* Handle PAGE_DATA record from an existing buffer
* Given a list of pfns, their types, and a block of page data from the
* stream, populate and record their types, map the relevant subset and copy
* the data into the guest.
*/
static int handle_buffered_page_data(struct xc_sr_context *ctx,
struct xc_sr_record *rec)
{
xc_interface *xch = ctx->xch;
struct xc_sr_rec_page_data_header *pages = rec->data;
void *p;
@@ -704,24 +827,33 @@ static int process_buffered_record(struct xc_sr_context *ctx, struct xc_sr_recor
switch ( rec->type )
{
case REC_TYPE_END:
break;
case REC_TYPE_PAGE_DATA:
rc = handle_buffered_page_data(ctx, rec);
break;
case REC_TYPE_VERIFY:
DPRINTF("Verify mode enabled");
ctx->restore.verify = true;
+ if ( !ctx->restore.verify_buf )
+ {
+ ctx->restore.verify_buf = malloc(MAX_BATCH_SIZE * PAGE_SIZE);
+ if ( !ctx->restore.verify_buf )
+ {
+ PERROR("Unable to allocate verify_buf");
+ rc = -1;
+ }
+ }
break;
case REC_TYPE_CHECKPOINT:
rc = handle_checkpoint(ctx);
break;
case REC_TYPE_STATIC_DATA_END:
rc = handle_static_data_end(ctx);
break;
default:
rc = ctx->restore.ops.process_record(ctx, rec);
@@ -730,29 +862,37 @@ static int process_buffered_record(struct xc_sr_context *ctx, struct xc_sr_recor
free(rec->data);
rec->data = NULL;
return rc;
}
static int process_incoming_record_header(struct xc_sr_context *ctx, struct xc_sr_rhdr *rhdr)
{
struct xc_sr_record rec;
int rc;
- rc = read_record_data(ctx, ctx->fd, rhdr, &rec);
- if ( rc )
- return rc;
+ switch ( rhdr->type )
+ {
+ case REC_TYPE_PAGE_DATA:
+ rc = handle_incoming_page_data(ctx, rhdr);
+ break;
+ default:
+ rc = read_record_data(ctx, ctx->fd, rhdr, &rec);
+ if ( rc == 0 )
+ rc = process_buffered_record(ctx, &rec);;
+ break;
+ }
- return process_buffered_record(ctx, &rec);
+ return rc;
}
static int setup(struct xc_sr_context *ctx)
{
xc_interface *xch = ctx->xch;
int rc;
DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
&ctx->restore.dirty_bitmap_hbuf);
if ( ctx->stream_type == XC_STREAM_COLO )
{
@@ -779,27 +919,30 @@ static int setup(struct xc_sr_context *ctx)
ERROR("Unable to allocate memory for populated_pfns bitmap");
rc = -1;
goto err;
}
ctx->restore.pfns = malloc(MAX_BATCH_SIZE * sizeof(*ctx->restore.pfns));
ctx->restore.types = malloc(MAX_BATCH_SIZE * sizeof(*ctx->restore.types));
ctx->restore.mfns = malloc(MAX_BATCH_SIZE * sizeof(*ctx->restore.mfns));
ctx->restore.map_errs = malloc(MAX_BATCH_SIZE * sizeof(*ctx->restore.map_errs));
ctx->restore.pp_pfns = malloc(MAX_BATCH_SIZE * sizeof(*ctx->restore.pp_pfns));
ctx->restore.pp_mfns = malloc(MAX_BATCH_SIZE * sizeof(*ctx->restore.pp_mfns));
ctx->restore.guest_data = malloc(MAX_BATCH_SIZE * sizeof(*ctx->restore.guest_data));
+ ctx->restore.iov = malloc(MAX_BATCH_SIZE * sizeof(*ctx->restore.iov));
+ ctx->restore.pages = malloc(MAX_BATCH_SIZE * sizeof(*ctx->restore.pages->pfn) + sizeof(*ctx->restore.pages));
if ( !ctx->restore.pfns || !ctx->restore.types || !ctx->restore.mfns ||
!ctx->restore.map_errs || !ctx->restore.pp_pfns ||
- !ctx->restore.pp_mfns || !ctx->restore.guest_data )
+ !ctx->restore.pp_mfns || !ctx->restore.guest_data ||
+ !ctx->restore.iov || !ctx->restore.pages )
{
ERROR("Unable to allocate memory");
rc = -1;
goto err;
}
ctx->restore.buffered_records = malloc(
DEFAULT_BUF_RECORDS * sizeof(struct xc_sr_record));
if ( !ctx->restore.buffered_records )
{
ERROR("Unable to allocate memory for buffered records");
rc = -1;
@@ -818,24 +961,26 @@ static void cleanup(struct xc_sr_context *ctx)
DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
&ctx->restore.dirty_bitmap_hbuf);
for ( i = 0; i < ctx->restore.buffered_rec_num; i++ )
free(ctx->restore.buffered_records[i].data);
if ( ctx->stream_type == XC_STREAM_COLO )
xc_hypercall_buffer_free_pages(
xch, dirty_bitmap, NRPAGES(bitmap_size(ctx->restore.p2m_size)));
free(ctx->restore.buffered_records);
free(ctx->restore.populated_pfns);
+ free(ctx->restore.pages);
+ free(ctx->restore.iov);
free(ctx->restore.guest_data);
free(ctx->restore.pp_mfns);
free(ctx->restore.pp_pfns);
free(ctx->restore.map_errs);
free(ctx->restore.mfns);
free(ctx->restore.types);
free(ctx->restore.pfns);
if ( ctx->restore.ops.cleanup(ctx) )
PERROR("Failed to clean up");
}