File 0012-compute-Fix-unfencing-and-ensure-fencing-occurs-in-p.patch of Package fence-agents.openSUSE_Leap_42.3_Update
From 303c53d09b5223d1b87c6e2959755218d6e41140 Mon Sep 17 00:00:00 2001
From: Andrew Beekhof <andrew@beekhof.net>
Date: Wed, 13 Sep 2017 21:48:21 +1000
Subject: [PATCH 12/14] compute: Fix unfencing and ensure fencing occurs in
partial up/down states
---
fence/agents/compute/fence_compute.py | 112 ++++++++++++++++++++++++----------
1 file changed, 81 insertions(+), 31 deletions(-)
diff --git a/fence/agents/compute/fence_compute.py b/fence/agents/compute/fence_compute.py
index 4294b22c..d5b7bc0d 100644
--- a/fence/agents/compute/fence_compute.py
+++ b/fence/agents/compute/fence_compute.py
@@ -35,14 +35,14 @@ def get_power_status(connection, options):
try:
services = connection.services.list(host=options["--plug"], binary="nova-compute")
for service in services:
- logging.debug("Status of %s is %s, %s" % (service.binary, service.state, service.status))
+ logging.debug("Status of %s on %s is %s, %s" % (service.binary, options["--plug"], service.state, service.status))
if service.state == "up" and service.status == "enabled":
# Up and operational
- status = "up"
+ status = "on"
elif service.state == "down" and service.status == "disabled":
# Down and fenced
- status = "down"
+ status = "off"
elif service.state == "down":
# Down and requires fencing
@@ -57,33 +57,46 @@ def get_power_status(connection, options):
break
except requests.exception.ConnectionError as err:
logging.warning("Nova connection failed: " + str(err))
+ logging.debug("Final status of %s is %s" % (options["--plug"], status))
return status
+def get_power_status_simple(connection, options):
+ status = get_power_status(connection, options)
+ if status in [ "off" ]:
+ return status
+ return "on"
+
def set_attrd_status(host, status, options):
logging.debug("Setting fencing status for %s to %s" % (host, status))
run_command(options, "attrd_updater -p -n evacuate -Q -N %s -U %s" % (host, status))
def get_attrd_status(host, options):
(status, pipe_stdout, pipe_stderr) = run_command(options, "attrd_updater -p -n evacuate -Q -N %s" % (host))
- return pipe_stdout
+ fields = pipe_stdout.split('"')
+ if len(fields) > 6:
+ return fields[5]
+ logging.debug("Got %s: o:%s e:%s n:%d" % (status, pipe_stdout, pipe_stderr, len(fields)))
+ return ""
def set_power_status_on(connection, options):
- status = get_power_status(connection, options)
- if status in [ "down", "running" ]:
- # Wait for any evacuations to complete
- out = ""
- while out != "no":
- if len(out) > 0:
- time.sleep(2)
- logging.info("Waiting for %s to complete evacuations: %s" % (options["--plug"], out))
- out = get_attrd_status(options["--plug"], options)
+ # Wait for any evacuations to complete
+ while True:
+ current = get_attrd_status(options["--plug"], options)
+ if current in ["no", ""]:
+ logging.info("Evacuation complete for: %s '%s'" % (options["--plug"], current))
+ break
+ else:
+ logging.info("Waiting for %s to complete evacuations: %s" % (options["--plug"], current))
+ time.sleep(2)
- # Forcing the service back up in case it was disabled
- connection.services.enable(options["--plug"], 'nova-compute')
+ status = get_power_status(connection, options)
+ # Should we do it for 'failed' too?
+ if status in [ "off", "running", "failed" ]:
try:
# Forcing the host back up
- connection.services.force_down(
- options["--plug"], "nova-compute", force_down=False)
+ logging.info("Forcing nova-compute back up on "+options["--plug"])
+ connection.services.force_down(options["--plug"], "nova-compute", force_down=False)
+ logging.info("Forced nova-compute back up on "+options["--plug"])
except Exception as e:
# In theory, if force_down=False fails, that's for the exact
# same possible reasons that below with force_down=True
@@ -91,20 +104,28 @@ def set_power_status_on(connection, options):
# Since it's about forcing back to a default value, there is
# no real worries to just consider it's still okay even if the
# command failed
- logging.info("Exception from attempt to force "
+ logging.warn("Exception from attempt to force "
"host back up via nova API: "
"%s: %s" % (e.__class__.__name__, e))
+ # Forcing the service back up in case it was disabled
+ logging.info("Enabling nova-compute on "+options["--plug"])
+ connection.services.enable(options["--plug"], 'nova-compute')
+
# Pretend we're 'on' so that the fencing library doesn't loop forever waiting for the node to boot
override_status = "on"
- elif status in ["on"]:
- # Nothing to do
- else:
+ elif status not in ["on"]:
# Not safe to unfence, don't waste time looping to see if the status changes to "on"
options["--power-timeout"] = "0"
def set_power_status_off(connection, options):
+ status = get_power_status(connection, options)
+ if status in [ "off" ]:
+ return
+
+ connection.services.disable(options["--plug"], 'nova-compute')
try:
+ # Until 2.53
connection.services.force_down(
options["--plug"], "nova-compute", force_down=True)
except Exception as e:
@@ -119,7 +140,7 @@ def set_power_status_off(connection, options):
"%s: %s" % (e.__class__.__name__, e))
# need to wait for nova to update its internal status or we
# cannot call host-evacuate
- while get_power_status(connection, options) != "off":
+ while get_power_status(connection, options) not in ["off"]:
# Loop forever if need be.
#
# Some callers (such as Pacemaker) will have a timer
@@ -135,19 +156,21 @@ def set_power_status(connection, options):
override_status = ""
logging.debug("set action: " + options["--action"])
- if not nova:
+ if not connection:
return
if options["--action"] in ["off", "reboot"]:
set_power_status_off(connection, options)
else:
set_power_status_on(connection, options)
+ logging.debug("set action passed: " + options["--action"])
+ sys.exit(0)
def fix_domain(connection, options):
domains = {}
last_domain = None
- if nova:
+ if connection:
# Find it in nova
services = connection.services.list(binary="nova-compute")
@@ -223,7 +246,7 @@ def fix_plug_name(connection, options):
def get_plugs_list(connection, options):
result = {}
- if nova:
+ if connection:
services = connection.services.list(binary="nova-compute")
for service in services:
longhost = service.host
@@ -294,7 +317,7 @@ def create_nova_connection(options):
logging.warning("Nova connection failed. %s: %s" % (e.__class__.__name__, e))
logging.warning("Couldn't obtain a supported connection to nova, tried: %s\n" % repr(versions))
- return None
+ return None
def define_new_opts():
all_opt["endpoint-type"] = {
@@ -378,11 +401,23 @@ def define_new_opts():
"order": 5,
}
+def set_multi_power_fn(connection, options, set_power_fn, get_power_fn, retry_attempts=1):
+ for _ in range(retry_attempts):
+ set_power_fn(connection, options)
+ time.sleep(int(options["--power-wait"]))
+
+ for _ in range(int(options["--power-timeout"])):
+ if get_power_fn(connection, options) != options["--action"]:
+ time.sleep(1)
+ else:
+ return True
+ return False
+
def main():
global override_status
atexit.register(atexit_handler)
- device_opt = ["login", "passwd", "tenant-name", "auth-url", "fabric_fencing", "on_target",
+ device_opt = ["login", "passwd", "tenant-name", "auth-url", "fabric_fencing",
"no_login", "no_password", "port", "domain", "no-shared-storage", "endpoint-type",
"record-only", "instance-filtering", "insecure", "region-name"]
define_new_opts()
@@ -402,13 +437,28 @@ def main():
run_delay(options)
+ logging.debug("Running "+options["--action"])
connection = create_nova_connection(options)
- fix_plug_name(connection, options)
- if options["--action"] in ["monitor", "status"]:
- sys.exit(0)
+ if options["--action"] in ["off", "on", "reboot", "status"]:
+ fix_plug_name(connection, options)
+
+
+ if options["--action"] in ["reboot"]:
+ options["--action"]="off"
+
+ if options["--action"] in ["off", "on"]:
+ # No status first, call our own version
+ result = not set_multi_power_fn(connection, options, set_power_status, get_power_status_simple,
+ 1 + int(options["--retry-on"]))
+ elif options["--action"] in ["monitor"]:
+ result = 0
+ else:
+ result = fence_action(connection, options, set_power_status, get_power_status_simple, get_plugs_list, None)
- result = fence_action(connection, options, set_power_status, get_power_status, get_plugs_list, None)
+ logging.debug("Result for "+options["--action"]+": "+repr(result))
+ if result == None:
+ result = 0
sys.exit(result)
if __name__ == "__main__":
--
2.13.6