File 0001-Fix-cleaning-up-evacuated-instances.patch of Package openstack-nova
From fb968e18b2472b0dd7231ff3244b683d59f04cd0 Mon Sep 17 00:00:00 2001
From: Balazs Gibizer <balazs.gibizer@ericsson.com>
Date: Thu, 12 Oct 2017 16:07:28 +0200
Subject: fix cleaning up evacuated instances
When bug 1709902 was fixed in I0df401a7c91f012fdb25cb0e6b344ca51de8c309
the fix assumed that when the _destroy_evacuated_instances() is called
during the init of the nova-compute service the resource tracker
already knows the compute node ids associated to the given compute
host. This is not true and therefore _destroy_evacuated_instances fails
with and exception and does not clean up the evacuated instance.
The resource tracker's compute_nodes dict only initalized during the
first update_available_resource call that happens in the
pre_start_hook. While the _destroy_evacuate_instances called from
init_host which is called before the pre_hook_start.
The _destroy_evacuated_instances call uses the
_delete_allocation_for_moved_instance that relies on the resource
tracker's compute_nodes dict.
This patch inlines _delete_allocation_for_moved_instance in
_destroy_evacuated_instances and queries the db for the compute node
uuid. As ironic uses 1:M host:node setup we cannot ask the db only once
about the node uuid as different instances might be on different nodes.
NOTE(mriedem): A couple of changes had to be made to the compute
manager code since I0883c2ba1989c5d5a46e23bcbcda53598707bcbc is
not in stable/pike.
Change-Id: I35749374ff09b0e98064c75ff9c33dad577579c6
Closes-Bug: #1721652
Related-Bug: #1709902
(cherry picked from commit 9252ffdacf262008bc41409d4fb574ec472dc913)
---
nova/compute/manager.py | 33 ++++++-
nova/tests/functional/test_servers.py | 32 ++-----
nova/tests/unit/compute/test_compute_mgr.py | 136 +++++++++++++++++++++++++---
3 files changed, 159 insertions(+), 42 deletions(-)
Index: nova-14.0.11.dev13/nova/compute/manager.py
===================================================================
--- nova-14.0.11.dev13.orig/nova/compute/manager.py
+++ nova-14.0.11.dev13/nova/compute/manager.py
@@ -92,6 +92,7 @@ from nova.pci import whitelist
from nova import rpc
from nova import safe_utils
from nova.scheduler import client as scheduler_client
+from nova.scheduler import utils as scheduler_utils
from nova import utils
from nova.virt import block_device as driver_block_device
from nova.virt import configdrive
@@ -514,6 +515,7 @@ class ComputeManager(manager.Manager):
self.consoleauth_rpcapi = consoleauth.rpcapi.ConsoleAuthAPI()
self.cells_rpcapi = cells_rpcapi.CellsAPI()
self.scheduler_client = scheduler_client.SchedulerClient()
+ self.reportclient = self.scheduler_client.reportclient
self._resource_tracker_dict = {}
self.instance_events = InstanceEvents()
self._sync_power_pool = eventlet.GreenPool(
@@ -653,6 +655,12 @@ class ComputeManager(manager.Manager):
local_instances = self._get_instances_on_driver(context, filters)
evacuated = [inst for inst in local_instances
if inst.uuid in evacuations]
+
+ # NOTE(gibi): We are called from init_host and at this point the
+ # compute_nodes of the resource tracker has not been populated yet so
+ # we cannot rely on the resource tracker here.
+ compute_nodes = {}
+
for instance in evacuated:
migration = evacuations[instance.uuid]
LOG.info(_LI('Deleting instance as it has been evacuated from '
@@ -676,9 +684,28 @@ class ComputeManager(manager.Manager):
network_info,
bdi, destroy_disks)
- rt = self._get_resource_tracker()
- rt.delete_allocation_for_evacuated_instance(
- instance, migration.source_node)
+ # delete the allocation of the evacuated instance from this host
+ if migration.source_node not in compute_nodes:
+ try:
+ cn_uuid = objects.ComputeNode.get_by_host_and_nodename(
+ context, self.host, migration.source_node).uuid
+ compute_nodes[migration.source_node] = cn_uuid
+ except exception.ComputeHostNotFound:
+ LOG.error("Failed to clean allocation of evacuated "
+ "instance as the source node %s is not found",
+ migration.source_node, instance=instance)
+ continue
+ cn_uuid = compute_nodes[migration.source_node]
+
+ my_resources = scheduler_utils.resources_from_flavor(
+ instance, instance.flavor)
+ res = self.reportclient.remove_provider_from_instance_allocation(
+ instance.uuid, cn_uuid, instance.user_id,
+ instance.project_id, my_resources)
+ if not res:
+ LOG.error("Failed to clean allocation of evacuated instance "
+ "on the source node %s",
+ cn_uuid, instance=instance)
migration.status = 'completed'
migration.save()