File xen.sr-save-local_pages.patch of Package xen

From: Olaf Hering <olaf@aepfle.de>
Date: Fri, 23 Oct 2020 12:47:56 +0200
Subject: sr save local_pages

tools/guest: save: move local_pages array

Remove allocation from hotpath, move local_pages array into preallocated space.

Adjust the code to use the src page as is in case of HVM.
In case of PV the page may need to be normalised, use an private memory
area for this purpose.

Signed-off-by: Olaf Hering <olaf@aepfle.de>
---
 tools/libs/guest/xg_sr_common.h       | 22 ++++++++++++----------
 tools/libs/guest/xg_sr_save.c         | 25 +++----------------------
 tools/libs/guest/xg_sr_save_x86_hvm.c |  5 +++--
 tools/libs/guest/xg_sr_save_x86_pv.c  | 31 +++++++++++++++++++++----------
 4 files changed, 39 insertions(+), 44 deletions(-)

--- a/tools/libs/guest/xg_sr_common.h
+++ b/tools/libs/guest/xg_sr_common.h
@@ -24,42 +24,38 @@ struct xc_sr_record;
  *
  * Every function must be implemented, even if only with a no-op stub.
  */
 struct xc_sr_save_ops
 {
     /* Convert a PFN to GFN.  May return ~0UL for an invalid mapping. */
     xen_pfn_t (*pfn_to_gfn)(const struct xc_sr_context *ctx, xen_pfn_t pfn);
 
     /**
      * Optionally transform the contents of a page from being specific to the
      * sending environment, to being generic for the stream.
      *
-     * The page of data at the end of 'page' may be a read-only mapping of a
-     * running guest; it must not be modified.  If no transformation is
-     * required, the callee should leave '*pages' untouched.
+     * The page of data '*src' may be a read-only mapping of a running guest;
+     * it must not be modified. If no transformation is required, the callee
+     * should leave '*src' untouched, and return it via '**ptr'.
      *
-     * If a transformation is required, the callee should allocate themselves
-     * a local page using malloc() and return it via '*page'.
-     *
-     * The caller shall free() '*page' in all cases.  In the case that the
-     * callee encounters an error, it should *NOT* free() the memory it
-     * allocated for '*page'.
+     * If a transformation is required, the callee should provide the
+     * transformed page in a private buffer and return it via '**ptr'.
      *
      * It is valid to fail with EAGAIN if the transformation is not able to be
      * completed at this point.  The page shall be retried later.
      *
      * @returns 0 for success, -1 for failure, with errno appropriately set.
      */
     int (*normalise_page)(struct xc_sr_context *ctx, xen_pfn_t type,
-                          void **page);
+                          void *src, unsigned int idx, void **ptr);
 
     /**
      * Set up local environment to save a domain. (Typically querying
      * running domain state, setting up mappings etc.)
      *
      * This is called once before any common setup has occurred, allowing for
      * guest-specific adjustments to be made to common state.
      */
     int (*setup)(struct xc_sr_context *ctx);
 
     /**
      * Send static records at the head of the stream.  This is called once,
@@ -362,24 +358,30 @@ struct xc_sr_context
                 void *p2m;
                 /* The guest pfns containing the p2m leaves */
                 xen_pfn_t *p2m_pfns;
 
                 /* Read-only mapping of guests shared info page */
                 shared_info_any_t *shinfo;
 
                 /* p2m generation count for verifying validity of local p2m. */
                 uint64_t p2m_generation;
 
                 union
                 {
+                    struct
+                    {
+                        /* Used by write_batch for modified pages. */
+                        void *normalised_pages;
+                    } save;
+
                     struct
                     {
                         /* State machine for the order of received records. */
                         bool seen_pv_info;
 
                         /* Types for each page (bounded by max_pfn). */
                         uint32_t *pfn_types;
 
                         /* x86 PV per-vcpu storage structure for blobs. */
                         struct xc_sr_x86_pv_restore_vcpu
                         {
                             struct xc_sr_blob basic, extd, xsave, msr;
--- a/tools/libs/guest/xg_sr_save.c
+++ b/tools/libs/guest/xg_sr_save.c
@@ -82,48 +82,37 @@ static int write_checkpoint_record(struct xc_sr_context *ctx)
  * This function:
  * - gets the types for each pfn in the batch.
  * - for each pfn with real data:
  *   - maps and attempts to localise the pages.
  * - construct and writes a PAGE_DATA record into the stream.
  */
 static int write_batch(struct xc_sr_context *ctx)
 {
     xc_interface *xch = ctx->xch;
     xen_pfn_t *mfns = ctx->save.m->mfns, *types = ctx->save.m->types;
     void *guest_mapping = NULL;
     void **guest_data = ctx->save.m->guest_data;
-    void **local_pages = NULL;
     int *errors = ctx->save.m->errors, rc = -1;
     unsigned int i, p, nr_pages = 0, nr_pages_mapped = 0;
     unsigned int nr_pfns = ctx->save.nr_batch_pfns;
-    void *page, *orig_page;
+    void *src;
     uint64_t *rec_pfns = ctx->save.m->rec_pfns;
     struct iovec *iov = ctx->save.m->iov; int iovcnt = 0;
     struct xc_sr_rec_page_data_header hdr = { 0 };
     struct xc_sr_record rec = {
         .type = REC_TYPE_PAGE_DATA,
     };
 
     assert(nr_pfns != 0);
 
-    /* Pointers to locally allocated pages.  Need freeing. */
-    local_pages = calloc(nr_pfns, sizeof(*local_pages));
-
-    if ( !local_pages )
-    {
-        ERROR("Unable to allocate arrays for a batch of %u pages",
-              nr_pfns);
-        goto err;
-    }
-
     for ( i = 0; i < nr_pfns; ++i )
     {
         types[i] = mfns[i] = ctx->save.ops.pfn_to_gfn(ctx,
                                                       ctx->save.m->batch_pfns[i]);
 
         /* Likely a ballooned page. */
         if ( mfns[i] == INVALID_MFN )
         {
             set_bit(ctx->save.m->batch_pfns[i], ctx->save.deferred_pages);
             ++ctx->save.nr_deferred_pages;
         }
     }
@@ -167,45 +156,40 @@ static int write_batch(struct xc_sr_context *ctx)
             {
                 guest_data[i] = NULL;
                 continue;
             }
 
             if ( errors[p] )
             {
                 ERROR("Mapping of pfn %#"PRIpfn" (mfn %#"PRIpfn") failed %d",
                       ctx->save.m->batch_pfns[i], mfns[p], errors[p]);
                 goto err;
             }
 
-            orig_page = page = guest_mapping + (p * PAGE_SIZE);
-            rc = ctx->save.ops.normalise_page(ctx, types[i], &page);
-
-            if ( orig_page != page )
-                local_pages[i] = page;
+            src = guest_mapping + (p * PAGE_SIZE);
+            rc = ctx->save.ops.normalise_page(ctx, types[i], src, i, &guest_data[i]);
 
             if ( rc )
             {
                 guest_data[i] = NULL;
                 if ( rc == -1 && errno == EAGAIN )
                 {
                     set_bit(ctx->save.m->batch_pfns[i], ctx->save.deferred_pages);
                     ++ctx->save.nr_deferred_pages;
                     types[i] = XEN_DOMCTL_PFINFO_XTAB;
                     --nr_pages;
                 }
                 else
                     goto err;
             }
-            else
-                guest_data[i] = page;
 
             rc = -1;
             ++p;
         }
     }
 
     hdr.count = nr_pfns;
 
     rec.length = sizeof(hdr);
     rec.length += nr_pfns * sizeof(*rec_pfns);
     rec.length += nr_pages * PAGE_SIZE;
 
@@ -246,27 +230,24 @@ static int write_batch(struct xc_sr_context *ctx)
     {
         PERROR("Failed to write page data to stream");
         goto err;
     }
 
     /* Sanity check we have sent all the pages we expected to. */
     assert(nr_pages == 0);
     rc = ctx->save.nr_batch_pfns = 0;
 
  err:
     if ( guest_mapping )
         xenforeignmemory_unmap(xch->fmem, guest_mapping, nr_pages_mapped);
-    for ( i = 0; local_pages && i < nr_pfns; ++i )
-        free(local_pages[i]);
-    free(local_pages);
 
     return rc;
 }
 
 /*
  * Flush a batch of pfns into the stream.
  */
 static int flush_batch(struct xc_sr_context *ctx)
 {
     int rc = 0;
 
     if ( ctx->save.nr_batch_pfns == 0 )
--- a/tools/libs/guest/xg_sr_save_x86_hvm.c
+++ b/tools/libs/guest/xg_sr_save_x86_hvm.c
@@ -120,27 +120,28 @@ static int write_hvm_params(struct xc_sr_context *ctx)
         PERROR("Failed to write HVM_PARAMS record");
 
     return rc;
 }
 
 static xen_pfn_t x86_hvm_pfn_to_gfn(const struct xc_sr_context *ctx,
                                     xen_pfn_t pfn)
 {
     /* identity map */
     return pfn;
 }
 
-static int x86_hvm_normalise_page(struct xc_sr_context *ctx,
-                                  xen_pfn_t type, void **page)
+static int x86_hvm_normalise_page(struct xc_sr_context *ctx, xen_pfn_t type,
+                                  void *src, unsigned int idx, void **ptr)
 {
+    *ptr = src;
     return 0;
 }
 
 static int x86_hvm_setup(struct xc_sr_context *ctx)
 {
     xc_interface *xch = ctx->xch;
     xen_pfn_t nr_pfns;
 
     if ( xc_domain_nr_gpfns(xch, ctx->domid, &nr_pfns) < 0 )
     {
         PERROR("Unable to obtain the guest p2m size");
         return -1;
--- a/tools/libs/guest/xg_sr_save_x86_pv.c
+++ b/tools/libs/guest/xg_sr_save_x86_pv.c
@@ -990,58 +990,68 @@ static xen_pfn_t x86_pv_pfn_to_gfn(const struct xc_sr_context *ctx,
                                    xen_pfn_t pfn)
 {
     assert(pfn <= ctx->x86.pv.max_pfn);
 
     return xc_pfn_to_mfn(pfn, ctx->x86.pv.p2m, ctx->x86.pv.width);
 }
 
 
 /*
  * save_ops function.  Performs pagetable normalisation on appropriate pages.
  */
 static int x86_pv_normalise_page(struct xc_sr_context *ctx, xen_pfn_t type,
-                                 void **page)
+                                  void *src, unsigned int idx, void **ptr)
 {
     xc_interface *xch = ctx->xch;
-    void *local_page;
     int rc;
+    void *dst;
 
     type &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK;
 
     if ( type < XEN_DOMCTL_PFINFO_L1TAB || type > XEN_DOMCTL_PFINFO_L4TAB )
+    {
+        *ptr = src;
         return 0;
+    }
 
-    local_page = malloc(PAGE_SIZE);
-    if ( !local_page )
+    if ( idx >= MAX_BATCH_SIZE )
     {
-        ERROR("Unable to allocate scratch page");
-        rc = -1;
-        goto out;
+        ERROR("idx %u out of range", idx);
+        errno = ERANGE;
+        return -1;
     }
 
-    rc = normalise_pagetable(ctx, *page, local_page, type);
-    *page = local_page;
+    dst = ctx->x86.pv.save.normalised_pages + idx * PAGE_SIZE;
+    rc = normalise_pagetable(ctx, src, dst, type);
+    *ptr = dst;
 
- out:
     return rc;
 }
 
 /*
  * save_ops function.  Queries domain information and maps the Xen m2p and the
  * guests shinfo and p2m table.
  */
 static int x86_pv_setup(struct xc_sr_context *ctx)
 {
+    xc_interface *xch = ctx->xch;
     int rc;
 
+    ctx->x86.pv.save.normalised_pages = malloc(MAX_BATCH_SIZE * PAGE_SIZE);
+    if ( !ctx->x86.pv.save.normalised_pages )
+    {
+        PERROR("Failed to allocate normalised_pages");
+        return -1;
+    }
+
     rc = x86_pv_domain_info(ctx);
     if ( rc )
         return rc;
 
     rc = x86_pv_map_m2p(ctx);
     if ( rc )
         return rc;
 
     rc = map_shinfo(ctx);
     if ( rc )
         return rc;
 
@@ -1109,24 +1119,25 @@ static int x86_pv_end_of_checkpoint(struct xc_sr_context *ctx)
 }
 
 static int x86_pv_check_vm_state(struct xc_sr_context *ctx)
 {
     if ( ctx->x86.pv.p2m_generation == ~0ULL )
         return 0;
 
     return x86_pv_check_vm_state_p2m_list(ctx);
 }
 
 static int x86_pv_cleanup(struct xc_sr_context *ctx)
 {
+    free(ctx->x86.pv.save.normalised_pages);
     free(ctx->x86.pv.p2m_pfns);
 
     if ( ctx->x86.pv.p2m )
         munmap(ctx->x86.pv.p2m, ctx->x86.pv.p2m_frames * PAGE_SIZE);
 
     if ( ctx->x86.pv.shinfo )
         munmap(ctx->x86.pv.shinfo, PAGE_SIZE);
 
     if ( ctx->x86.pv.m2p )
         munmap(ctx->x86.pv.m2p, ctx->x86.pv.nr_m2p_frames * PAGE_SIZE);
 
     return 0;
openSUSE Build Service is sponsored by