File libvirt-aggregate-status.patch of Package openstack-monasca-agent
commit 48e31739c9cd784e796457a5148e4e6746c3e718
Author: Johannes Grassler <johannes.grassler@suse.com>
Date: Tue Nov 21 16:56:33 2017 +0100
Upstream: available in master and stable branches from stable/queens onwards
Added aggregate VM counts to livbirt check
This commit adds aggregate metrics to the libvirt check that
report the numbers and percentages of VMs by VM state, plus a
total of VMs on the host. These aggregate metrics make it
easier to visualize VM metrics in Grafana, which is good at
aggregating over time but very bad at aggregating over multi
dimensional (host, VM) state metrics.
Change-Id: I5e553b933bb0ac49ca7fb2e7835ca3cc6aaa7ca3
Index: monasca-agent-1.9.2.dev3/docs/Libvirt.md
===================================================================
--- monasca-agent-1.9.2.dev3.orig/docs/Libvirt.md
+++ monasca-agent-1.9.2.dev3/docs/Libvirt.md
@@ -440,11 +440,28 @@ All metrics include `resource_id` and `z
In addition to per-instance metrics, the Libvirt plugin will publish aggregate metrics across all instances.
-| Name | Description |
-| ------------------------------- | -------------------------------------------------- |
-| nova.vm.cpu.total_allocated | Total CPUs allocated across all VMs |
-| nova.vm.disk.total_allocated_gb | Total Gbytes of disk space allocated to all VMs |
-| nova.vm.mem.total_allocated_mb | Total Mbytes of memory allocated to all VMs |
+| Name | Description |
+| ----------------------------------------- | ----------------------------------------------------------- |
+| nova.vm.cpu.total_allocated | Total CPUs allocated across all VMs |
+| nova.vm.disk.total_allocated_gb | Total Gbytes of disk space allocated to all VMs |
+| nova.vm.mem.total_allocated_mb | Total Mbytes of memory allocated to all VMs |
+| nova.vm.total_count | Total number of VMs on host |
+| nova.vm.blocked_count | Total number of VMs in state blocked on host |
+| nova.vm.blocked_perc | Percentage of VMs in state blocked on host |
+| nova.vm.crashed_count | Total number of VMs in state crashed on host |
+| nova.vm.crashed_perc | Percentage of VMs in state crashed on host |
+| nova.vm.nostate_count | Total number of VMs with no state on host |
+| nova.vm.nostate_perc | Percentage of VMs with no state on host |
+| nova.vm.paused_count | Total number of VMs in state paused on host |
+| nova.vm.paused_perc | Percentage of VMs in state paused on host |
+| nova.vm.suspended_count | Total number of VMs in state suspended on host |
+| nova.vm.suspended_perc | Percentage of VMs in state suspended on host |
+| nova.vm.running_count | Total number of VMs in state running on host |
+| nova.vm.running_perc | Percentage of VMs in state running on host |
+| nova.vm.shutingdown_count | Total number of VMs in state shutting down on host |
+| nova.vm.shutingdown_perc | Percentage of VMs in state shutting down on host |
+| nova.vm.shutoff_count | Total number of VMs in state shutoff/Nova suspended on host |
+| nova.vm.shutoff_perc | Percentage of VMs in state shutoff/Nova suspended on host |
Aggregate dimensions include hostname and component from the Operations Value column above.
Index: monasca-agent-1.9.2.dev3/monasca_agent/collector/checks_d/libvirt.py
===================================================================
--- monasca-agent-1.9.2.dev3.orig/monasca_agent/collector/checks_d/libvirt.py
+++ monasca-agent-1.9.2.dev3/monasca_agent/collector/checks_d/libvirt.py
@@ -45,6 +45,16 @@ DOM_STATES = {libvirt.VIR_DOMAIN_BLOCKED
libvirt.VIR_DOMAIN_PMSUSPENDED: 'VM is in power management (s3) suspend',
libvirt.VIR_DOMAIN_SHUTDOWN: 'VM is shutting down',
libvirt.VIR_DOMAIN_SHUTOFF: 'VM has been shut off (other reason)'}
+
+DOM_ALIVE_NAMES = {libvirt.VIR_DOMAIN_BLOCKED: 'blocked',
+ libvirt.VIR_DOMAIN_CRASHED: 'crashed',
+ libvirt.VIR_DOMAIN_NONE: 'nostate',
+ libvirt.VIR_DOMAIN_PAUSED: 'paused',
+ libvirt.VIR_DOMAIN_PMSUSPENDED: 'suspended',
+ libvirt.VIR_DOMAIN_RUNNING: 'running',
+ libvirt.VIR_DOMAIN_SHUTDOWN: 'shuttingdown',
+ libvirt.VIR_DOMAIN_SHUTOFF: 'shutoff'} # shut off/nova suspend
+
DOM_SHUTOFF_STATES = {libvirt.VIR_DOMAIN_SHUTOFF_UNKNOWN: 'VM has been shutoff (reason unknown)',
libvirt.VIR_DOMAIN_SHUTOFF_SHUTDOWN: 'VM has been shut down',
libvirt.VIR_DOMAIN_SHUTOFF_DESTROYED: 'VM has been destroyed (forced off)',
@@ -589,7 +599,7 @@ class LibvirtCheck(AgentCheck):
dimensions=dims_operations,
value_meta=metatag)
- return dom_status
+ return inst_state[0]
def prepare_run(self):
"""Check if it is time for measurements to be collected"""
@@ -662,6 +672,16 @@ class LibvirtCheck(AgentCheck):
# Build dimensions for both the customer and for operations
dims_base = self._set_dimensions({'service': 'compute', 'component': 'vm'}, instance)
+ # Initialize aggregate alive status data structure (separate from
+ # aggregate gauges because every possible value needs to be counted
+ # separately)
+ agg_alive_counts = {}
+ for code in DOM_ALIVE_NAMES:
+ agg_alive_counts[code] = 0
+
+ # Per host total VM count
+ vm_count = 0
+
# Define aggregate gauges, gauge name to metric name
agg_gauges = {'vcpus': 'nova.vm.cpu.total_allocated',
'ram': 'nova.vm.mem.total_allocated_mb',
@@ -721,9 +741,15 @@ class LibvirtCheck(AgentCheck):
vm_probation_remaining))
continue
+ vm_dom_state = self._inspect_state(insp, inst, inst_name,
+ instance_cache, dims_customer,
+ dims_operations)
+
+ agg_alive_counts[vm_dom_state] += 1
+ vm_count += 1
+
# Skip further processing on VMs that are not in an active state
- if self._inspect_state(insp, inst, inst_name, instance_cache,
- dims_customer, dims_operations) != 0:
+ if vm_dom_state != libvirt.VIR_DOMAIN_RUNNING:
continue
# Skip the remainder of the checks if alive_only is True in the config
@@ -780,6 +806,10 @@ class LibvirtCheck(AgentCheck):
for gauge in agg_gauges:
self.gauge(agg_gauges[gauge], agg_values[gauge], dimensions=dims_base)
+ # Publish aggregate VM counts
+
+ self._gauge_agg_alive_counts(agg_alive_counts, vm_count, dims_base)
+
# Check results of ping tests
self._check_ping_results(ping_results)
@@ -797,6 +827,23 @@ class LibvirtCheck(AgentCheck):
rate_value = -1
return rate_value
+ def _gauge_agg_alive_counts(self, agg_alive_counts, vm_count, dims_base):
+ count_pfx = "nova.vm."
+ total_frac = (float(vm_count) / 100)
+ self.gauge(count_pfx + 'total_count', vm_count, dimensions=dims_base)
+
+ for agg in agg_alive_counts:
+ self.gauge(count_pfx + DOM_ALIVE_NAMES[agg] + "_count",
+ agg_alive_counts[agg],
+ dimensions=dims_base)
+ if total_frac != 0:
+ self.gauge(count_pfx + DOM_ALIVE_NAMES[agg] + "_perc",
+ agg_alive_counts[agg] / total_frac,
+ dimensions=dims_base)
+ else:
+ self.gauge(count_pfx + DOM_ALIVE_NAMES[agg] + "_perc",
+ 0, dimensions=dims_base)
+
def _update_dims_with_metadata(self, instance_cache, inst_name, dim_operations):
"""Update operations dimensions with metadata."""
dims = dim_operations