Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
home:olh:xen-unstable
xen
xen.sr-abort_if_busy.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File xen.sr-abort_if_busy.patch of Package xen
From: Olaf Hering <olaf@aepfle.de> Date: Thu, 7 Jan 2021 20:25:28 +0100 Subject: sr abort_if_busy tools: add --abort_if_busy to libxl_domain_suspend Provide a knob to the host admin to abort the live migration of a running domU if the downtime during final transit will be too long for the workload within domU. Adjust error reporting. Add ERROR_MIGRATION_ABORTED to allow callers of libxl_domain_suspend to distinguish between errors and the requested constraint. Adjust precopy_policy to simplify reporting of remaining dirty pages. The loop in send_memory_live populates ->dirty_count in a different place than ->iteration. Let it proceeed one more time to provide the desired information before leaving the loop. This patch adjusts xl(1) and the libxl API. External users check LIBXL_HAVE_DOMAIN_SUSPEND_PROPS for the availibility of the new .abort_if_busy property. Signed-off-by: Olaf Hering <olaf@aepfle.de> --- docs/man/xl.1.pod.in | 8 +++ tools/include/libxl.h | 1 + tools/libs/light/libxl_dom_save.c | 7 +- tools/libs/light/libxl_domain.c | 1 + tools/libs/light/libxl_internal.h | 2 + tools/libs/light/libxl_stream_write.c | 9 ++- tools/libs/light/libxl_types.idl | 1 + tools/xl/xl_cmdtable.c | 6 +- tools/xl/xl_migrate.c | 30 +++++++-- 9 files changed, 55 insertions(+), 10 deletions(-) --- a/docs/man/xl.1.pod.in +++ b/docs/man/xl.1.pod.in @@ -498,24 +498,32 @@ possible to use this option for a 'localhost' migration. =item B<--max_iters> I<iterations> Number of copy iterations before final suspend+move (default: 5) =item B<--min_remaing> I<pages> Number of remaining dirty pages. If the number of dirty pages drops that low, the guest is suspended and the domU will finally be moved to I<host>. This allows the host admin to control for how long the domU will likely be suspended during transit. +=item B<--abort_if_busy> + +Abort migration instead of doing final suspend/move/resume if the +guest produced more than I<min_remaining> dirty pages during th number +of I<max_iters> iterations. +This avoids long periods of time where the guest is suspended, which +may confuse the workload within domU. + =back =item B<remus> [I<OPTIONS>] I<domain-id> I<host> Enable Remus HA or COLO HA for domain. By default B<xl> relies on ssh as a transport mechanism between the two hosts. B<NOTES> =over 4 Remus support in xl is still in experimental (proof-of-concept) phase. --- a/tools/include/libxl.h +++ b/tools/include/libxl.h @@ -1791,24 +1791,25 @@ static inline int libxl_retrieve_domain_configuration_0x041200( * LIBXL_HAVE_DOMAIN_SUSPEND_PROPS indicates that the * libxl_domain_suspend_props() function takes a props struct. */ #define LIBXL_HAVE_DOMAIN_SUSPEND_PROPS 1 typedef struct { uint32_t flags; /* LIBXL_SUSPEND_* */ uint32_t max_iters; uint32_t min_remaining; } libxl_domain_suspend_props; #define LIBXL_SUSPEND_DEBUG 1 #define LIBXL_SUSPEND_LIVE 2 +#define LIBXL_SUSPEND_ABORT_IF_BUSY 4 int libxl_domain_suspend(libxl_ctx *ctx, uint32_t domid, int fd, libxl_domain_suspend_props *props, const libxl_asyncop_how *ao_how) LIBXL_EXTERNAL_CALLERS_ONLY; #if defined(LIBXL_API_VERSION) && LIBXL_API_VERSION < 0x041600 static inline int libxl_domain_suspend_0x041500(libxl_ctx *ctx, uint32_t domid, int fd, int flags, /* LIBXL_SUSPEND_* */ const libxl_asyncop_how *ao_how) { libxl_domain_suspend_props props = { .flags = flags, }; return libxl_domain_suspend(ctx, domid, fd, &props, ao_how); --- a/tools/libs/light/libxl_dom_save.c +++ b/tools/libs/light/libxl_dom_save.c @@ -374,29 +374,34 @@ int libxl__save_emulator_xenstore_data(libxl__domain_save_state *dss, } static int libxl__domain_save_precopy_policy(struct precopy_stats stats, void *user) { libxl__save_helper_state *shs = user; libxl__domain_save_state *dss = shs->caller_state; STATE_AO_GC(dss->ao); LOGD(DEBUG, shs->domid, "iteration %u dirty_count %ld total_written %lu", stats.iteration, stats.dirty_count, stats.total_written); if (stats.dirty_count >= 0 && stats.dirty_count < dss->min_remaining) goto stop_copy; - if (stats.iteration >= dss->max_iters) + if (stats.dirty_count >= 0 && stats.iteration >= dss->max_iters) goto stop_copy; return XGS_POLICY_CONTINUE_PRECOPY; stop_copy: + if (dss->abort_if_busy) + { + dss->remaining_dirty_pages = stats.dirty_count; + return XGS_POLICY_ABORT; + } return XGS_POLICY_STOP_AND_COPY; } /*----- main code for saving, in order of execution -----*/ void libxl__domain_save(libxl__egc *egc, libxl__domain_save_state *dss) { STATE_AO_GC(dss->ao); int rc, ret; /* Convenience aliases */ const uint32_t domid = dss->domid; --- a/tools/libs/light/libxl_domain.c +++ b/tools/libs/light/libxl_domain.c @@ -517,24 +517,25 @@ int libxl_domain_suspend(libxl_ctx *ctx, uint32_t domid, int fd, libxl__domain_save_state *dss; GCNEW(dss); dss->ao = ao; dss->callback = domain_suspend_cb; dss->domid = domid; dss->fd = fd; dss->type = type; dss->max_iters = props->max_iters ?: LIBXL_XGS_POLICY_MAX_ITERATIONS; dss->min_remaining = props->min_remaining ?: LIBXL_XGS_POLICY_TARGET_DIRTY_COUNT; + dss->abort_if_busy = props->flags & LIBXL_SUSPEND_ABORT_IF_BUSY; dss->live = props->flags & LIBXL_SUSPEND_LIVE; dss->debug = props->flags & LIBXL_SUSPEND_DEBUG; dss->checkpointed_stream = LIBXL_CHECKPOINTED_STREAM_NONE; rc = libxl__fd_flags_modify_save(gc, dss->fd, ~(O_NONBLOCK|O_NDELAY), 0, &dss->fdfl); if (rc < 0) goto out_err; libxl__domain_save(egc, dss); return AO_INPROGRESS; --- a/tools/libs/light/libxl_internal.h +++ b/tools/libs/light/libxl_internal.h @@ -3647,27 +3647,29 @@ _hidden void libxl__qmp_suspend_save(libxl__egc *egc, struct libxl__domain_save_state { /* set by caller of libxl__domain_save */ libxl__ao *ao; libxl__domain_save_cb *callback; uint32_t domid; int fd; int fdfl; /* original flags on fd */ int recv_fd; libxl_domain_type type; int live; int debug; + int abort_if_busy; int checkpointed_stream; uint32_t max_iters; uint32_t min_remaining; + long remaining_dirty_pages; const libxl_domain_remus_info *remus; /* private */ int rc; int xcflags; libxl__domain_suspend_state dsps; union { /* for Remus */ libxl__remus_state rs; /* for COLO */ libxl__colo_save_state css; }; libxl__checkpoint_devices_state cds; --- a/tools/libs/light/libxl_stream_write.c +++ b/tools/libs/light/libxl_stream_write.c @@ -335,29 +335,36 @@ static void libxc_header_done(libxl__egc *egc, void libxl__xc_domain_save_done(libxl__egc *egc, void *dss_void, int rc, int retval, int errnoval) { libxl__domain_save_state *dss = dss_void; libxl__stream_write_state *stream = &dss->sws; STATE_AO_GC(dss->ao); if (rc) goto err; if (retval) { + if (dss->remaining_dirty_pages) { + LOGD(NOTICE, dss->domid, "saving domain: aborted," + " %ld remaining dirty pages.", dss->remaining_dirty_pages); + } else { LOGEVD(ERROR, errnoval, dss->domid, "saving domain: %s", dss->dsps.guest_responded ? "domain responded to suspend request" : "domain did not respond to suspend request"); - if (!dss->dsps.guest_responded) + } + if (dss->remaining_dirty_pages) + rc = ERROR_MIGRATION_ABORTED; + else if(!dss->dsps.guest_responded) rc = ERROR_GUEST_TIMEDOUT; else if (dss->rc) rc = dss->rc; else rc = ERROR_FAIL; goto err; } err: check_all_finished(egc, stream, rc); /* --- a/tools/libs/light/libxl_types.idl +++ b/tools/libs/light/libxl_types.idl @@ -67,24 +67,25 @@ libxl_error = Enumeration("error", [ (-21, "DOMAIN_NOTFOUND"), (-22, "ABORTED"), (-23, "NOTFOUND"), (-24, "DOMAIN_DESTROYED"), # Target domain ceased to exist during op (-25, "FEATURE_REMOVED"), # For functionality that has been removed (-26, "PROTOCOL_ERROR_QMP"), (-27, "UNKNOWN_QMP_ERROR"), (-28, "QMP_GENERIC_ERROR"), # unspecified qmp error (-29, "QMP_COMMAND_NOT_FOUND"), # the requested command has not been found (-30, "QMP_DEVICE_NOT_ACTIVE"), # a device has failed to be become active (-31, "QMP_DEVICE_NOT_FOUND"), # the requested device has not been found (-32, "QEMU_API"), # QEMU's replies don't contains expected members + (-33, "MIGRATION_ABORTED"), ], value_namespace = "") libxl_domain_type = Enumeration("domain_type", [ (-1, "INVALID"), (1, "HVM"), (2, "PV"), (3, "PVH"), ], init_val = "LIBXL_DOMAIN_TYPE_INVALID") libxl_rdm_reserve_strategy = Enumeration("rdm_reserve_strategy", [ (0, "ignore"), (1, "host"), --- a/tools/xl/xl_cmdtable.c +++ b/tools/xl/xl_cmdtable.c @@ -167,25 +167,29 @@ const struct cmd_spec cmd_table[] = { "[options] <Domain> <host>", "-h Print this help.\n" "-C <config> Send <config> instead of config file from creation.\n" "-s <sshcommand> Use <sshcommand> instead of ssh. String will be passed\n" " to sh. If empty, run <host> instead of ssh <host> xl\n" " migrate-receive [-d -e]\n" "-e Do not wait in the background (on <host>) for the death\n" " of the domain.\n" "--debug Enable verification mode.\n" "-p Do not unpause domain after migrating it.\n" "-D Preserve the domain id\n" "--max_iters N Number of copy iterations before final stop+move\n" - "--min_remaining N Number of remaining dirty pages before final stop+move" + "--min_remaining N Number of remaining dirty pages before final stop+move\n" + "--abort_if_busy Abort migration instead of doing final stop+move,\n" + " if the number of dirty pages is higher than <min_remaining>\n" + " after <max_iters> iterations. Otherwise the amount of memory\n" + " to be transfered would exceed maximum allowed domU downtime." }, { "restore", &main_restore, 0, 1, "Restore a domain from a saved state", "[options] [<ConfigFile>] <CheckpointFile>", "-h Print this help.\n" "-p Do not unpause domain after restoring it.\n" "-e Do not wait in the background for the death of the domain.\n" "-d Enable debug messages.\n" "-V, --vncviewer Connect to the VNC display after the domain is created.\n" "-A, --vncviewer-autopass Pass VNC password to viewer via stdin." }, --- a/tools/xl/xl_migrate.c +++ b/tools/xl/xl_migrate.c @@ -168,25 +168,25 @@ static void migrate_do_preamble(int send_fd, int recv_fd, pid_t child, if (rc) { close(send_fd); migration_child_report(recv_fd); exit(EXIT_FAILURE); } save_domain_core_writeconfig(send_fd, "migration stream", config_data, config_len); } static void migrate_domain(uint32_t domid, int preserve_domid, - const char *rune, int debug, + const char *rune, int debug, int abort_if_busy, uint32_t max_iters, uint32_t min_remaining, const char *override_config_file) { pid_t child = -1; int rc; int send_fd = -1, recv_fd = -1; char *away_domname; char rc_buf; uint8_t *config_data; int config_len; libxl_domain_suspend_props props = { @@ -204,32 +204,38 @@ static void migrate_domain(uint32_t domid, int preserve_domid, exit(EXIT_FAILURE); } child = create_migration_child(rune, &send_fd, &recv_fd); migrate_do_preamble(send_fd, recv_fd, child, config_data, config_len, rune); xtl_stdiostream_adjust_flags(logger, XTL_STDIOSTREAM_HIDE_PROGRESS, 0); if (debug) props.flags |= LIBXL_SUSPEND_DEBUG; + if (abort_if_busy) + props.flags |= LIBXL_SUSPEND_ABORT_IF_BUSY; rc = libxl_domain_suspend(ctx, domid, send_fd, &props, NULL); if (rc) { fprintf(stderr, "migration sender: libxl_domain_suspend failed" " (rc=%d)\n", rc); - if (rc == ERROR_GUEST_TIMEDOUT) - goto failed_suspend; - else - goto failed_resume; + switch (rc) { + case ERROR_GUEST_TIMEDOUT: + goto failed_suspend; + case ERROR_MIGRATION_ABORTED: + goto failed_busy; + default: + goto failed_resume; + } } //fprintf(stderr, "migration sender: Transfer complete.\n"); // Should only be printed when debugging as it's a bit messy with // progress indication. rc = migrate_read_fixedmessage(recv_fd, migrate_receiver_ready, sizeof(migrate_receiver_ready), "ready message", rune); if (rc) goto failed_resume; xtl_stdiostream_adjust_flags(logger, 0, XTL_STDIOSTREAM_HIDE_PROGRESS); @@ -293,24 +299,30 @@ static void migrate_domain(uint32_t domid, int preserve_domid, fprintf(stderr, "migration sender: Target reports successful startup.\n"); libxl_domain_destroy(ctx, domid, 0); /* bang! */ fprintf(stderr, "Migration successful.\n"); exit(EXIT_SUCCESS); failed_suspend: close(send_fd); migration_child_report(recv_fd); fprintf(stderr, "Migration failed, failed to suspend at sender.\n"); exit(EXIT_FAILURE); + failed_busy: + close(send_fd); + migration_child_report(recv_fd); + fprintf(stderr, "Migration aborted as requested, domain is too busy.\n"); + exit(EXIT_FAILURE); + failed_resume: close(send_fd); migration_child_report(recv_fd); fprintf(stderr, "Migration failed, resuming at sender.\n"); libxl_domain_resume(ctx, domid, 1, 0); exit(EXIT_FAILURE); failed_badly: fprintf(stderr, "** Migration failed during final handshake **\n" "Domain state is now undefined !\n" "Please CHECK AT BOTH ENDS for running instances, before renaming and\n" @@ -536,31 +548,32 @@ int main_migrate_receive(int argc, char **argv) return EXIT_SUCCESS; } int main_migrate(int argc, char **argv) { uint32_t domid; const char *config_filename = NULL; const char *ssh_command = "ssh"; char *rune = NULL; char *host; int opt, daemonize = 1, monitor = 1, debug = 0, pause_after_migration = 0; - int preserve_domid = 0; + int preserve_domid = 0, abort_if_busy = 0; uint32_t max_iters = 0; uint32_t min_remaining = 0; static struct option opts[] = { {"debug", 0, 0, 0x100}, {"max_iters", 1, 0, 0x101}, {"min_remaining", 1, 0, 0x102}, + {"abort_if_busy", 0, 0, 0x103}, {"live", 0, 0, 0x200}, COMMON_LONG_OPTS }; SWITCH_FOREACH_OPT(opt, "FC:s:epD", opts, "migrate", 2) { case 'C': config_filename = optarg; break; case 's': ssh_command = optarg; break; case 'F': @@ -576,24 +589,27 @@ int main_migrate(int argc, char **argv) case 'D': preserve_domid = 1; break; case 0x100: /* --debug */ debug = 1; break; case 0x101: /* --max_iters */ max_iters = atoi(optarg); break; case 0x102: /* --min_remaining */ min_remaining = atoi(optarg); break; + case 0x103: /* --abort_if_busy */ + abort_if_busy = 1; + break; case 0x200: /* --live */ /* ignored for compatibility with xm */ break; } domid = find_domain(argv[optind]); host = argv[optind + 1]; bool pass_tty_arg = progress_use_cr || (isatty(2) > 0); if (!ssh_command[0]) { rune= host; @@ -610,25 +626,25 @@ int main_migrate(int argc, char **argv) verbose_len = (minmsglevel_default - minmsglevel) + 2; } xasprintf(&rune, "exec %s %s xl%s%s%.*s migrate-receive%s%s%s", ssh_command, host, pass_tty_arg ? " -t" : "", timestamps ? " -T" : "", verbose_len, verbose_buf, daemonize ? "" : " -e", debug ? " -d" : "", pause_after_migration ? " -p" : ""); } - migrate_domain(domid, preserve_domid, rune, debug, + migrate_domain(domid, preserve_domid, rune, debug, abort_if_busy, max_iters, min_remaining, config_filename); return EXIT_SUCCESS; } int main_remus(int argc, char **argv) { uint32_t domid; int opt, rc, daemonize = 1; const char *ssh_command = "ssh"; char *host = NULL, *rune = NULL; libxl_domain_remus_info r_info; int send_fd = -1, recv_fd = -1;
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor