File 0001-Validate-new-image-via-scheduler-during-rebuild.patch of Package openstack-nova
From bad534fc3fd05ffc09e50b13a9b5e3f51056a9ac Mon Sep 17 00:00:00 2001
From: Matt Riedemann <mriedem.os@gmail.com>
Date: Fri, 27 Oct 2017 16:03:15 -0400
Subject: [PATCH] Validate new image via scheduler during rebuild
During a rebuild we bypass the scheduler because we are
always rebuilding the instance on the same host it's already
on. However, we allow passing a new image during rebuild
and that new image needs to be validated to work with the
instance host by running it through the scheduler filters,
like the ImagePropertiesFilter. Otherwise the new image
could violate constraints placed on the host by the admin.
This change checks to see if there is a new image provided
and if so, modifies the request spec passed to the scheduler
so that the new image is validated all while restricting
the scheduler to still pick the same host that the instance
is running on. If the image is not valid for the host, the
scheduler will raise NoValidHost and the rebuild stops.
A functional test is added to show the recreate of the bug
and that we probably stop the rebuild now in conductor by
calling the scheduler to validate the image.
Co-Authored-By: Sylvain Bauza <sbauza@redhat.com>
Closes-Bug: #1664931
NOTE(mriedem): There were a few changes needed for Newton:
1. There is no PlacementFixture but it's not needed.
2. The API client needs to have the microversion set from
the test.
3. The enabled_filters config option wasn't in Newton.
4. The scheduler has to be started before compute otherwise
we get a MessagingTimeout due to the CastAsCall fixture
during the compute startup.
NOTE(bbobrov): stuff related to request_spec was removed
Depends-On: I344d8fdded9b7d5385fcb41b699f1352acb4cda7
Change-Id: I11746d1ea996a0f18b7c54b4c9c21df58cc4714b
(cherry picked from commit 984dd8ad6add4523d93c7ce5a666a32233e02e34)
(cherry picked from commit 9e2d63da94db63d97bd02e373bfc53d95808b833)
(cherry picked from commit b72105c1c49fcddc94992af63fc2f8078023491a)
(cherry picked from commit 97a51d981bd603b964b04b3568218ce57ac57338)
---
nova/compute/api.py | 6 ++-
nova/tests/functional/api/client.py | 10 ++++
nova/tests/functional/integrated_helpers.py | 80 ++++++++++++++++++++++++++++-
nova/tests/functional/test_servers.py | 70 +++++++++++++++++++++++++
nova/tests/unit/compute/test_compute_api.py | 2 +-
5 files changed, 164 insertions(+), 4 deletions(-)
diff --git a/nova/compute/api.py b/nova/compute/api.py
index f16f6c4cad..798b9f9c6f 100644
--- a/nova/compute/api.py
+++ b/nova/compute/api.py
@@ -2514,11 +2514,15 @@ class API(base.Base):
self._record_action_start(context, instance, instance_actions.REBUILD)
+ host = instance.host
+ if orig_image_ref != image_href:
+ host = None # This tells conductor to call the scheduler.
+
self.compute_task_api.rebuild_instance(context, instance=instance,
new_pass=admin_password, injected_files=files_to_inject,
image_ref=image_href, orig_image_ref=orig_image_ref,
orig_sys_metadata=orig_sys_metadata, bdms=bdms,
- preserve_ephemeral=preserve_ephemeral, host=instance.host,
+ preserve_ephemeral=preserve_ephemeral, host=host,
kwargs=kwargs)
@wrap_check_policy
diff --git a/nova/tests/functional/api/client.py b/nova/tests/functional/api/client.py
index c108c2af0e..d4907861d0 100644
--- a/nova/tests/functional/api/client.py
+++ b/nova/tests/functional/api/client.py
@@ -281,6 +281,16 @@ class TestOpenStackClient(object):
def delete_image(self, image_id):
return self.api_delete('/images/%s' % image_id)
+ def put_image_meta_key(self, image_id, key, value):
+ """Creates or updates a given image metadata key/value pair."""
+ req_body = {
+ 'meta': {
+ key: value
+ }
+ }
+ return self.api_put('/images/%s/metadata/%s' % (image_id, key),
+ req_body)
+
def get_flavor(self, flavor_id):
return self.api_get('/flavors/%s' % flavor_id).body['flavor']
diff --git a/nova/tests/functional/integrated_helpers.py b/nova/tests/functional/integrated_helpers.py
index 7362da73cc..c4d6e91824 100644
--- a/nova/tests/functional/integrated_helpers.py
+++ b/nova/tests/functional/integrated_helpers.py
@@ -19,6 +19,7 @@ Provides common functionality for integrated unit tests
import random
import string
+import time
import uuid
from oslo_config import cfg
@@ -82,7 +83,6 @@ class _IntegratedTestBase(test.TestCase):
fake_crypto.fetch_ca)
self.stubs.Set(crypto, 'generate_x509_cert',
fake_crypto.generate_x509_cert)
- self._setup_services()
self.api_fixture = self.useFixture(
nova_fixtures.OSAPIFixture(self._api_version))
@@ -94,8 +94,13 @@ class _IntegratedTestBase(test.TestCase):
else:
self.api = self.api_fixture.api
+ if hasattr(self, 'microversion'):
+ self.api.microversion = self.microversion
+
self.useFixture(cast_as_call.CastAsCall(self.stubs))
+ self._setup_services()
+
self.addCleanup(nova.tests.unit.image.fake.FakeImageService_reset)
def _setup_compute_service(self):
@@ -109,12 +114,12 @@ class _IntegratedTestBase(test.TestCase):
def _setup_services(self):
self.conductor = self.start_service('conductor',
manager=CONF.conductor.manager)
- self.compute = self._setup_compute_service()
self.cert = self.start_service('cert')
self.consoleauth = self.start_service('consoleauth')
self.network = self.start_service('network')
self.scheduler = self._setup_scheduler_service()
+ self.compute = self._setup_compute_service()
self.cells = self.start_service('cells', manager=CONF.cells.manager)
def _get_test_client(self):
@@ -135,6 +140,8 @@ class _IntegratedTestBase(test.TestCase):
# point we can get rid of this.
return {}
+ self.compute = self._setup_compute_service()
+
def get_unused_server_name(self):
servers = self.api.get_servers()
server_names = [server['name'] for server in servers]
@@ -253,3 +260,72 @@ class _IntegratedTestBase(test.TestCase):
expected_middleware,
("The expected wsgi middlewares %s are not "
"existed") % expected_middleware)
+
+
+class InstanceHelperMixin(object):
+ def _wait_for_state_change(self, admin_api, server, expected_status,
+ max_retries=10):
+ retry_count = 0
+ while True:
+ server = admin_api.get_server(server['id'])
+ if server['status'] == expected_status:
+ break
+ retry_count += 1
+ if retry_count == max_retries:
+ self.fail('Wait for state change failed, '
+ 'expected_status=%s, actual_status=%s'
+ % (expected_status, server['status']))
+ time.sleep(0.5)
+
+ return server
+
+ def _build_minimal_create_server_request(self, api, name, image_uuid=None,
+ flavor_id=None):
+ server = {}
+
+ # We now have a valid imageId
+ server['imageRef'] = image_uuid or api.get_images()[0]['id']
+
+ if not flavor_id:
+ # Set a valid flavorId
+ flavor_id = api.get_flavors()[1]['id']
+ server['flavorRef'] = ('http://fake.server/%s' % flavor_id)
+ server['name'] = name
+ return server
+
+ def _wait_for_action_fail_completion(
+ self, server, expected_action, event_name, api=None):
+ """Polls instance action events for the given instance, action and
+ action event name until it finds the action event with an error
+ result.
+ """
+ if api is None:
+ api = self.api
+ completion_event = None
+ for attempt in range(10):
+ actions = api.get_instance_actions(server['id'])
+ # Look for the migrate action.
+ for action in actions:
+ if action['action'] == expected_action:
+ events = (
+ api.api_get(
+ '/servers/%s/os-instance-actions/%s' %
+ (server['id'], action['request_id'])
+ ).body['instanceAction']['events'])
+ # Look for the action event being in error state.
+ for event in events:
+ if (event['event'] == event_name and
+ event['result'] is not None and
+ event['result'].lower() == 'error'):
+ completion_event = event
+ # Break out of the events loop.
+ break
+ if completion_event:
+ # Break out of the actions loop.
+ break
+ # We didn't find the completion event yet, so wait a bit.
+ time.sleep(0.5)
+
+ if completion_event is None:
+ self.fail('Timed out waiting for %s failure event. Current '
+ 'instance actions: %s' % (event_name, actions))
diff --git a/nova/tests/functional/test_servers.py b/nova/tests/functional/test_servers.py
index a003ce2e3e..d5b2212c71 100644
--- a/nova/tests/functional/test_servers.py
+++ b/nova/tests/functional/test_servers.py
@@ -20,12 +20,14 @@ import zlib
from oslo_log import log as logging
from oslo_utils import timeutils
+from nova.compute import instance_actions
from nova import context
from nova import exception
from nova.tests.functional.api import client
from nova.tests.functional import integrated_helpers
from nova.tests.unit import fake_network
import nova.virt.fake
+import nova.tests.unit.image.fake
LOG = logging.getLogger(__name__)
@@ -511,3 +513,71 @@ class ServersTest(ServersTestBase):
class ServersTestV21(ServersTest):
_api_version = 'v2.1'
+
+
+class ServerRebuildTestCase(integrated_helpers._IntegratedTestBase,
+ integrated_helpers.InstanceHelperMixin):
+ api_major_version = 'v2.1'
+ # We have to cap the microversion at 2.38 because that's the max we
+ # can use to update image metadata via our compute images proxy API.
+ microversion = '2.38'
+
+ # We need the ImagePropertiesFilter so override the base class setup
+ # which configures to use the chance_scheduler.
+ def _setup_scheduler_service(self):
+ self.flags(scheduler_default_filters=['ImagePropertiesFilter'])
+ return self.start_service('scheduler')
+
+ def test_rebuild_with_image_novalidhost(self):
+ """Creates a server with an image that is valid for the single compute
+ that we have. Then rebuilds the server, passing in an image with
+ metadata that does not fit the single compute which should result in
+ a NoValidHost error. The ImagePropertiesFilter filter is enabled by
+ default so that should filter out the host based on the image meta.
+ """
+ server_req_body = {
+ 'server': {
+ # We hard-code from a fake image since we can't get images
+ # via the compute /images proxy API with microversion > 2.35.
+ 'imageRef': '155d900f-4e14-4e4c-a73d-069cbf4541e6',
+ 'flavorRef': '1', # m1.tiny from DefaultFlavorsFixture,
+ 'name': 'test_rebuild_with_image_novalidhost',
+ # We don't care about networking for this test. This requires
+ # microversion >= 2.37.
+ 'networks': 'none'
+ }
+ }
+ server = self.api.post_server(server_req_body)
+ self._wait_for_state_change(self.api, server, 'ACTIVE')
+ # Now update the image metadata to be something that won't work with
+ # the fake compute driver we're using since the fake driver has an
+ # "x86_64" architecture.
+ rebuild_image_ref = (
+ nova.tests.unit.image.fake.AUTO_DISK_CONFIG_ENABLED_IMAGE_UUID)
+ self.api.put_image_meta_key(
+ rebuild_image_ref, 'hw_architecture', 'unicore32')
+ # Now rebuild the server with that updated image and it should result
+ # in a NoValidHost failure from the scheduler.
+ rebuild_req_body = {
+ 'rebuild': {
+ 'imageRef': rebuild_image_ref
+ }
+ }
+ # Since we're using the CastAsCall fixture, the NoValidHost error
+ # should actually come back to the API and result in a 500 error.
+ # Normally the user would get a 202 response because nova-api RPC casts
+ # to nova-conductor which RPC calls the scheduler which raises the
+ # NoValidHost. We can mimic the end user way to figure out the failure
+ # by looking for the failed 'rebuild' instance action event.
+ self.api.api_post('/servers/%s/action' % server['id'],
+ rebuild_req_body, check_response_status=[500])
+ # Look for the failed rebuild action.
+ self._wait_for_action_fail_completion(
+ server, instance_actions.REBUILD, 'rebuild_server',
+ # Before microversion 2.51 events are only returned for instance
+ # actions if you're an admin.
+ self.api_fixture.admin_api)
+ # Unfortunately the server's image_ref is updated to be the new image
+ # even though the rebuild should not work.
+ server = self.api.get_server(server['id'])
+ self.assertEqual(rebuild_image_ref, server['image']['id'])
diff --git a/nova/tests/unit/compute/test_compute_api.py b/nova/tests/unit/compute/test_compute_api.py
index d7278bea2b..ee7df9c096 100644
--- a/nova/tests/unit/compute/test_compute_api.py
+++ b/nova/tests/unit/compute/test_compute_api.py
@@ -2489,7 +2489,7 @@ class _ComputeAPIUnitTestMixIn(object):
injected_files=files_to_inject, image_ref=new_image_href,
orig_image_ref=orig_image_href,
orig_sys_metadata=orig_system_metadata, bdms=bdms,
- preserve_ephemeral=False, host=instance.host, kwargs={})
+ preserve_ephemeral=False, host=None, kwargs={})
_check_auto_disk_config.assert_called_once_with(image=new_image)
_checks_for_create_and_rebuild.assert_called_once_with(self.context,
--
2.13.6