File xen.sr-abort_if_busy.patch of Package xen

From: Olaf Hering <olaf@aepfle.de>
Date: Thu, 7 Jan 2021 20:25:28 +0100
Subject: sr abort_if_busy

tools: add --abort_if_busy to libxl_domain_suspend

Provide a knob to the host admin to abort the live migration of a
running domU if the downtime during final transit will be too long
for the workload within domU.

Adjust error reporting. Add ERROR_MIGRATION_ABORTED to allow callers of
libxl_domain_suspend to distinguish between errors and the requested
constraint.

Adjust precopy_policy to simplify reporting of remaining dirty pages.
The loop in send_memory_live populates ->dirty_count in a different
place than ->iteration. Let it proceeed one more time to provide the
desired information before leaving the loop.

This patch adjusts xl(1) and the libxl API.
External users check LIBXL_HAVE_DOMAIN_SUSPEND_PROPS for the availibility
of the new .abort_if_busy property.

Signed-off-by: Olaf Hering <olaf@aepfle.de>
---
 docs/man/xl.1.pod.in                  |  8 +++
 tools/include/libxl.h                 |  1 +
 tools/libs/light/libxl_dom_save.c     |  7 +-
 tools/libs/light/libxl_domain.c       |  1 +
 tools/libs/light/libxl_internal.h     |  2 +
 tools/libs/light/libxl_stream_write.c |  9 ++-
 tools/libs/light/libxl_types.idl      |  1 +
 tools/xl/xl_cmdtable.c                |  6 +-
 tools/xl/xl_migrate.c                 | 30 +++++++--
 9 files changed, 55 insertions(+), 10 deletions(-)

--- a/docs/man/xl.1.pod.in
+++ b/docs/man/xl.1.pod.in
@@ -498,24 +498,32 @@ possible to use this option for a 'localhost' migration.
 =item B<--max_iters> I<iterations>
 
 Number of copy iterations before final suspend+move (default: 5)
 
 =item B<--min_remaing> I<pages>
 
 Number of remaining dirty pages. If the number of dirty pages drops that
 low, the guest is suspended and the domU will finally be moved to I<host>.
 
 This allows the host admin to control for how long the domU will likely
 be suspended during transit.
 
+=item B<--abort_if_busy>
+
+Abort migration instead of doing final suspend/move/resume if the
+guest produced more than I<min_remaining> dirty pages during th number
+of I<max_iters> iterations.
+This avoids long periods of time where the guest is suspended, which
+may confuse the workload within domU.
+
 =back
 
 =item B<remus> [I<OPTIONS>] I<domain-id> I<host>
 
 Enable Remus HA or COLO HA for domain. By default B<xl> relies on ssh as a
 transport mechanism between the two hosts.
 
 B<NOTES>
 
 =over 4
 
 Remus support in xl is still in experimental (proof-of-concept) phase.
--- a/tools/include/libxl.h
+++ b/tools/include/libxl.h
@@ -1791,24 +1791,25 @@ static inline int libxl_retrieve_domain_configuration_0x041200(
  * LIBXL_HAVE_DOMAIN_SUSPEND_PROPS indicates that the
  * libxl_domain_suspend_props() function takes a props struct.
  */
 #define LIBXL_HAVE_DOMAIN_SUSPEND_PROPS 1
 
 typedef struct {
     uint32_t flags; /* LIBXL_SUSPEND_* */
     uint32_t max_iters;
     uint32_t min_remaining;
 } libxl_domain_suspend_props;
 #define LIBXL_SUSPEND_DEBUG 1
 #define LIBXL_SUSPEND_LIVE 2
+#define LIBXL_SUSPEND_ABORT_IF_BUSY 4
 
 int libxl_domain_suspend(libxl_ctx *ctx, uint32_t domid, int fd,
                          libxl_domain_suspend_props *props,
                          const libxl_asyncop_how *ao_how)
                          LIBXL_EXTERNAL_CALLERS_ONLY;
 #if defined(LIBXL_API_VERSION) && LIBXL_API_VERSION < 0x041600
 static inline int libxl_domain_suspend_0x041500(libxl_ctx *ctx, uint32_t domid,
                          int fd, int flags, /* LIBXL_SUSPEND_* */
                          const libxl_asyncop_how *ao_how)
 {
     libxl_domain_suspend_props props = { .flags = flags, };
     return libxl_domain_suspend(ctx, domid, fd, &props, ao_how);
--- a/tools/libs/light/libxl_dom_save.c
+++ b/tools/libs/light/libxl_dom_save.c
@@ -374,29 +374,34 @@ int libxl__save_emulator_xenstore_data(libxl__domain_save_state *dss,
 }
 
 static int libxl__domain_save_precopy_policy(struct precopy_stats stats, void *user)
 {
     libxl__save_helper_state *shs = user;
     libxl__domain_save_state *dss = shs->caller_state;
     STATE_AO_GC(dss->ao);
 
     LOGD(DEBUG, shs->domid, "iteration %u dirty_count %ld total_written %lu",
          stats.iteration, stats.dirty_count, stats.total_written);
     if (stats.dirty_count >= 0 && stats.dirty_count < dss->min_remaining)
         goto stop_copy;
-    if (stats.iteration >= dss->max_iters)
+    if (stats.dirty_count >= 0 && stats.iteration >= dss->max_iters)
         goto stop_copy;
     return XGS_POLICY_CONTINUE_PRECOPY;
 
 stop_copy:
+    if (dss->abort_if_busy)
+    {
+        dss->remaining_dirty_pages = stats.dirty_count;
+        return XGS_POLICY_ABORT;
+    }
     return XGS_POLICY_STOP_AND_COPY;
 }
 
 /*----- main code for saving, in order of execution -----*/
 
 void libxl__domain_save(libxl__egc *egc, libxl__domain_save_state *dss)
 {
     STATE_AO_GC(dss->ao);
     int rc, ret;
 
     /* Convenience aliases */
     const uint32_t domid = dss->domid;
--- a/tools/libs/light/libxl_domain.c
+++ b/tools/libs/light/libxl_domain.c
@@ -517,24 +517,25 @@ int libxl_domain_suspend(libxl_ctx *ctx, uint32_t domid, int fd,
 
     libxl__domain_save_state *dss;
     GCNEW(dss);
 
     dss->ao = ao;
     dss->callback = domain_suspend_cb;
 
     dss->domid = domid;
     dss->fd = fd;
     dss->type = type;
     dss->max_iters = props->max_iters ?: LIBXL_XGS_POLICY_MAX_ITERATIONS;
     dss->min_remaining = props->min_remaining ?: LIBXL_XGS_POLICY_TARGET_DIRTY_COUNT;
+    dss->abort_if_busy = props->flags & LIBXL_SUSPEND_ABORT_IF_BUSY;
     dss->live = props->flags & LIBXL_SUSPEND_LIVE;
     dss->debug = props->flags & LIBXL_SUSPEND_DEBUG;
     dss->checkpointed_stream = LIBXL_CHECKPOINTED_STREAM_NONE;
 
     rc = libxl__fd_flags_modify_save(gc, dss->fd,
                                      ~(O_NONBLOCK|O_NDELAY), 0,
                                      &dss->fdfl);
     if (rc < 0) goto out_err;
 
     libxl__domain_save(egc, dss);
     return AO_INPROGRESS;
 
--- a/tools/libs/light/libxl_internal.h
+++ b/tools/libs/light/libxl_internal.h
@@ -3647,27 +3647,29 @@ _hidden void libxl__qmp_suspend_save(libxl__egc *egc,
 struct libxl__domain_save_state {
     /* set by caller of libxl__domain_save */
     libxl__ao *ao;
     libxl__domain_save_cb *callback;
 
     uint32_t domid;
     int fd;
     int fdfl; /* original flags on fd */
     int recv_fd;
     libxl_domain_type type;
     int live;
     int debug;
+    int abort_if_busy;
     int checkpointed_stream;
     uint32_t max_iters;
     uint32_t min_remaining;
+    long remaining_dirty_pages;
     const libxl_domain_remus_info *remus;
     /* private */
     int rc;
     int xcflags;
     libxl__domain_suspend_state dsps;
     union {
         /* for Remus */
         libxl__remus_state rs;
         /* for COLO */
         libxl__colo_save_state css;
     };
     libxl__checkpoint_devices_state cds;
--- a/tools/libs/light/libxl_stream_write.c
+++ b/tools/libs/light/libxl_stream_write.c
@@ -335,29 +335,36 @@ static void libxc_header_done(libxl__egc *egc,
 
 void libxl__xc_domain_save_done(libxl__egc *egc, void *dss_void,
                                 int rc, int retval, int errnoval)
 {
     libxl__domain_save_state *dss = dss_void;
     libxl__stream_write_state *stream = &dss->sws;
     STATE_AO_GC(dss->ao);
 
     if (rc)
         goto err;
 
     if (retval) {
+        if (dss->remaining_dirty_pages) {
+            LOGD(NOTICE, dss->domid, "saving domain: aborted,"
+                 " %ld remaining dirty pages.", dss->remaining_dirty_pages);
+        } else {
         LOGEVD(ERROR, errnoval, dss->domid, "saving domain: %s",
               dss->dsps.guest_responded ?
               "domain responded to suspend request" :
               "domain did not respond to suspend request");
-        if (!dss->dsps.guest_responded)
+        }
+        if (dss->remaining_dirty_pages)
+           rc = ERROR_MIGRATION_ABORTED;
+        else if(!dss->dsps.guest_responded)
             rc = ERROR_GUEST_TIMEDOUT;
         else if (dss->rc)
             rc = dss->rc;
         else
             rc = ERROR_FAIL;
         goto err;
     }
 
  err:
     check_all_finished(egc, stream, rc);
 
     /*
--- a/tools/libs/light/libxl_types.idl
+++ b/tools/libs/light/libxl_types.idl
@@ -67,24 +67,25 @@ libxl_error = Enumeration("error", [
     (-21, "DOMAIN_NOTFOUND"),
     (-22, "ABORTED"),
     (-23, "NOTFOUND"),
     (-24, "DOMAIN_DESTROYED"), # Target domain ceased to exist during op
     (-25, "FEATURE_REMOVED"), # For functionality that has been removed
     (-26, "PROTOCOL_ERROR_QMP"),
     (-27, "UNKNOWN_QMP_ERROR"),
     (-28, "QMP_GENERIC_ERROR"), # unspecified qmp error
     (-29, "QMP_COMMAND_NOT_FOUND"), # the requested command has not been found
     (-30, "QMP_DEVICE_NOT_ACTIVE"), # a device has failed to be become active
     (-31, "QMP_DEVICE_NOT_FOUND"), # the requested device has not been found
     (-32, "QEMU_API"), # QEMU's replies don't contains expected members
+    (-33, "MIGRATION_ABORTED"),
     ], value_namespace = "")
 
 libxl_domain_type = Enumeration("domain_type", [
     (-1, "INVALID"),
     (1, "HVM"),
     (2, "PV"),
     (3, "PVH"),
     ], init_val = "LIBXL_DOMAIN_TYPE_INVALID")
 
 libxl_rdm_reserve_strategy = Enumeration("rdm_reserve_strategy", [
     (0, "ignore"),
     (1, "host"),
--- a/tools/xl/xl_cmdtable.c
+++ b/tools/xl/xl_cmdtable.c
@@ -167,25 +167,29 @@ const struct cmd_spec cmd_table[] = {
       "[options] <Domain> <host>",
       "-h                Print this help.\n"
       "-C <config>       Send <config> instead of config file from creation.\n"
       "-s <sshcommand>   Use <sshcommand> instead of ssh.  String will be passed\n"
       "                  to sh. If empty, run <host> instead of ssh <host> xl\n"
       "                  migrate-receive [-d -e]\n"
       "-e                Do not wait in the background (on <host>) for the death\n"
       "                  of the domain.\n"
       "--debug           Enable verification mode.\n"
       "-p                Do not unpause domain after migrating it.\n"
       "-D                Preserve the domain id\n"
       "--max_iters N     Number of copy iterations before final stop+move\n"
-      "--min_remaining N Number of remaining dirty pages before final stop+move"
+      "--min_remaining N Number of remaining dirty pages before final stop+move\n"
+      "--abort_if_busy   Abort migration instead of doing final stop+move,\n"
+      "                  if the number of dirty pages is higher than <min_remaining>\n"
+      "                  after <max_iters> iterations. Otherwise the amount of memory\n"
+      "                  to be transfered would exceed maximum allowed domU downtime."
     },
     { "restore",
       &main_restore, 0, 1,
       "Restore a domain from a saved state",
       "[options] [<ConfigFile>] <CheckpointFile>",
       "-h                       Print this help.\n"
       "-p                       Do not unpause domain after restoring it.\n"
       "-e                       Do not wait in the background for the death of the domain.\n"
       "-d                       Enable debug messages.\n"
       "-V, --vncviewer          Connect to the VNC display after the domain is created.\n"
       "-A, --vncviewer-autopass Pass VNC password to viewer via stdin."
     },
--- a/tools/xl/xl_migrate.c
+++ b/tools/xl/xl_migrate.c
@@ -168,25 +168,25 @@ static void migrate_do_preamble(int send_fd, int recv_fd, pid_t child,
     if (rc) {
         close(send_fd);
         migration_child_report(recv_fd);
         exit(EXIT_FAILURE);
     }
 
     save_domain_core_writeconfig(send_fd, "migration stream",
                                  config_data, config_len);
 
 }
 
 static void migrate_domain(uint32_t domid, int preserve_domid,
-                           const char *rune, int debug,
+                           const char *rune, int debug, int abort_if_busy,
                            uint32_t max_iters,
                            uint32_t min_remaining,
                            const char *override_config_file)
 {
     pid_t child = -1;
     int rc;
     int send_fd = -1, recv_fd = -1;
     char *away_domname;
     char rc_buf;
     uint8_t *config_data;
     int config_len;
     libxl_domain_suspend_props props = {
@@ -204,32 +204,38 @@ static void migrate_domain(uint32_t domid, int preserve_domid,
         exit(EXIT_FAILURE);
     }
 
     child = create_migration_child(rune, &send_fd, &recv_fd);
 
     migrate_do_preamble(send_fd, recv_fd, child, config_data, config_len,
                         rune);
 
     xtl_stdiostream_adjust_flags(logger, XTL_STDIOSTREAM_HIDE_PROGRESS, 0);
 
     if (debug)
         props.flags |= LIBXL_SUSPEND_DEBUG;
+    if (abort_if_busy)
+        props.flags |= LIBXL_SUSPEND_ABORT_IF_BUSY;
     rc = libxl_domain_suspend(ctx, domid, send_fd, &props, NULL);
     if (rc) {
         fprintf(stderr, "migration sender: libxl_domain_suspend failed"
                 " (rc=%d)\n", rc);
-        if (rc == ERROR_GUEST_TIMEDOUT)
-            goto failed_suspend;
-        else
-            goto failed_resume;
+        switch (rc) {
+            case ERROR_GUEST_TIMEDOUT:
+                goto failed_suspend;
+            case ERROR_MIGRATION_ABORTED:
+                goto failed_busy;
+            default:
+                goto failed_resume;
+        }
     }
 
     //fprintf(stderr, "migration sender: Transfer complete.\n");
     // Should only be printed when debugging as it's a bit messy with
     // progress indication.
 
     rc = migrate_read_fixedmessage(recv_fd, migrate_receiver_ready,
                                    sizeof(migrate_receiver_ready),
                                    "ready message", rune);
     if (rc) goto failed_resume;
 
     xtl_stdiostream_adjust_flags(logger, 0, XTL_STDIOSTREAM_HIDE_PROGRESS);
@@ -293,24 +299,30 @@ static void migrate_domain(uint32_t domid, int preserve_domid,
 
     fprintf(stderr, "migration sender: Target reports successful startup.\n");
     libxl_domain_destroy(ctx, domid, 0); /* bang! */
     fprintf(stderr, "Migration successful.\n");
     exit(EXIT_SUCCESS);
 
  failed_suspend:
     close(send_fd);
     migration_child_report(recv_fd);
     fprintf(stderr, "Migration failed, failed to suspend at sender.\n");
     exit(EXIT_FAILURE);
 
+ failed_busy:
+    close(send_fd);
+    migration_child_report(recv_fd);
+    fprintf(stderr, "Migration aborted as requested, domain is too busy.\n");
+    exit(EXIT_FAILURE);
+
  failed_resume:
     close(send_fd);
     migration_child_report(recv_fd);
     fprintf(stderr, "Migration failed, resuming at sender.\n");
     libxl_domain_resume(ctx, domid, 1, 0);
     exit(EXIT_FAILURE);
 
  failed_badly:
     fprintf(stderr,
  "** Migration failed during final handshake **\n"
  "Domain state is now undefined !\n"
  "Please CHECK AT BOTH ENDS for running instances, before renaming and\n"
@@ -536,31 +548,32 @@ int main_migrate_receive(int argc, char **argv)
 
     return EXIT_SUCCESS;
 }
 
 int main_migrate(int argc, char **argv)
 {
     uint32_t domid;
     const char *config_filename = NULL;
     const char *ssh_command = "ssh";
     char *rune = NULL;
     char *host;
     int opt, daemonize = 1, monitor = 1, debug = 0, pause_after_migration = 0;
-    int preserve_domid = 0;
+    int preserve_domid = 0, abort_if_busy = 0;
     uint32_t max_iters = 0;
     uint32_t min_remaining = 0;
     static struct option opts[] = {
         {"debug", 0, 0, 0x100},
         {"max_iters", 1, 0, 0x101},
         {"min_remaining", 1, 0, 0x102},
+        {"abort_if_busy", 0, 0, 0x103},
         {"live", 0, 0, 0x200},
         COMMON_LONG_OPTS
     };
 
     SWITCH_FOREACH_OPT(opt, "FC:s:epD", opts, "migrate", 2) {
     case 'C':
         config_filename = optarg;
         break;
     case 's':
         ssh_command = optarg;
         break;
     case 'F':
@@ -576,24 +589,27 @@ int main_migrate(int argc, char **argv)
     case 'D':
         preserve_domid = 1;
         break;
     case 0x100: /* --debug */
         debug = 1;
         break;
     case 0x101: /* --max_iters */
         max_iters = atoi(optarg);
         break;
     case 0x102: /* --min_remaining */
         min_remaining = atoi(optarg);
         break;
+    case 0x103: /* --abort_if_busy */
+        abort_if_busy = 1;
+        break;
     case 0x200: /* --live */
         /* ignored for compatibility with xm */
         break;
     }
 
     domid = find_domain(argv[optind]);
     host = argv[optind + 1];
 
     bool pass_tty_arg = progress_use_cr || (isatty(2) > 0);
 
     if (!ssh_command[0]) {
         rune= host;
@@ -610,25 +626,25 @@ int main_migrate(int argc, char **argv)
             verbose_len = (minmsglevel_default - minmsglevel) + 2;
         }
         xasprintf(&rune, "exec %s %s xl%s%s%.*s migrate-receive%s%s%s",
                   ssh_command, host,
                   pass_tty_arg ? " -t" : "",
                   timestamps ? " -T" : "",
                   verbose_len, verbose_buf,
                   daemonize ? "" : " -e",
                   debug ? " -d" : "",
                   pause_after_migration ? " -p" : "");
     }
 
-    migrate_domain(domid, preserve_domid, rune, debug,
+    migrate_domain(domid, preserve_domid, rune, debug, abort_if_busy,
                    max_iters, min_remaining, config_filename);
     return EXIT_SUCCESS;
 }
 
 int main_remus(int argc, char **argv)
 {
     uint32_t domid;
     int opt, rc, daemonize = 1;
     const char *ssh_command = "ssh";
     char *host = NULL, *rune = NULL;
     libxl_domain_remus_info r_info;
     int send_fd = -1, recv_fd = -1;
openSUSE Build Service is sponsored by