Skip to content

Commit

Permalink
Allow metrics to be enabled selectively and disable all by default.
Browse files Browse the repository at this point in the history
  • Loading branch information
progala committed Aug 7, 2024
1 parent 1039cf3 commit ffbd9fe
Show file tree
Hide file tree
Showing 6 changed files with 64 additions and 27 deletions.
2 changes: 2 additions & 0 deletions development/dev.env
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,5 @@ POSTGRES_HOST=postgres
MYSQL_DATABASE=nautobot
MYSQL_USER=nautobot
MYSQL_ROOT_HOST=%

NAUTOBOT_DLM_ENABLED_METRICS = "nautobot_lcm_software_compliance_per_device_type,nautobot_lcm_software_compliance_per_inventory_item,nautobot_lcm_hw_end_of_support_per_part_number,nautobot_metrics_lcm_hw_end_of_support_site"
3 changes: 2 additions & 1 deletion development/nautobot_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@
# }

# Expose Prometheus monitoring metrics at the HTTP endpoint '/metrics'
METRICS_ENABLED = False
METRICS_ENABLED = True

# Credentials that Nautobot will uses to authenticate to devices when connecting via NAPALM.
NAPALM_USERNAME = os.environ.get("NAPALM_USERNAME", "")
Expand Down Expand Up @@ -262,6 +262,7 @@
"barchart_bar_width": float(os.environ.get("BARCHART_BAR_WIDTH", 0.1)),
"barchart_width": int(os.environ.get("BARCHART_WIDTH", 12)),
"barchart_height": int(os.environ.get("BARCHART_HEIGHT", 5)),
"enabled_metrics": [x for x in os.environ.get("NAUTOBOT_DLM_ENABLED_METRICS", "").split(",") if x],
},
}

Expand Down
24 changes: 19 additions & 5 deletions docs/admin/install.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ PLUGINS_CONFIG = {
"barchart_bar_width": float(os.environ.get("BARCHART_BAR_WIDTH", 0.1)),
"barchart_width": int(os.environ.get("BARCHART_WIDTH", 12)),
"barchart_height": int(os.environ.get("BARCHART_HEIGHT", 5)),
"enabled_metrics": [x for x in os.environ.get("NAUTOBOT_DLM_ENABLED_METRICS", "").split(",") if x],
},
}
```
Expand Down Expand Up @@ -72,8 +73,21 @@ sudo systemctl restart nautobot nautobot-worker nautobot-scheduler

The plugin behavior can be controlled with the following list of settings.

| Key | Example | Default | Description |
| ------- | ------ | -------- | ------------------------------------- |
| enable_backup | True | True | A boolean to represent whether or not to run backup configurations within the plugin. |
| platform_slug_map | {"cisco_wlc": "cisco_aireos"} | None | A dictionary in which the key is the platform slug and the value is what netutils uses in any "network_os" parameter. |
| per_feature_bar_width | 0.15 | 0.15 | The width of the table bar within the overview report |
| Key | ENV VAR | Example | Default | Description |
| ------- | ------ | ------ | -------- | ------------------------------------- |
| barchart_bar_width | BARCHART_BAR_WIDTH | 0.15 | 0.1 | The width of the table bar within the overview report. |
| barchart_width | BARCHART_WIDTH | 15 | 12 | The width of the barchart within the overview report. |
| barchart_height | BARCHART_HEIGHT | 8 | 5 | The height of the barchart within the overview report. |
| enabled_metrics | NAUTOBOT_DLM_ENABLED_METRICS | `["nautobot_metrics_lcm_hw_end_of_support_site"]` | `[]` | Enables metrics corresponding to the provided entries. |

### Available Metric Names

Following are the metric names that can be defined in `enabled_metrics`:

- `nautobot_lcm_software_compliance_per_device_type`: Number of devices with valid/invalid software by device_type.

- `nautobot_lcm_software_compliance_per_inventory_item`: Number of inventory items with valid/invalid software.

- `nautobot_lcm_hw_end_of_support_per_part_number`: Number of End of Support devices and inventory items per Part Number.

- `nautobot_metrics_lcm_hw_end_of_support_site`: Number of End of Support devices and inventory items per Site.
1 change: 1 addition & 0 deletions nautobot_device_lifecycle_mgmt/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ class DeviceLifeCycleConfig(PluginConfig):
"barchart_bar_width": 0.1,
"barchart_width": 12,
"barchart_height": 5,
"enabled_metrics": [],
}
caching_config = {}

Expand Down
43 changes: 33 additions & 10 deletions nautobot_device_lifecycle_mgmt/metrics.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Nautobot Device LCM plugin application level metrics ."""
from datetime import datetime

from django.conf import settings
from django.db.models import Count, F, IntegerField, OuterRef, Q, Subquery, Value
from django.db.models.functions import Coalesce
from nautobot.dcim.models import Device, DeviceType, InventoryItem, Site
Expand All @@ -12,6 +13,8 @@
InventoryItemSoftwareValidationResult,
)

PLUGIN_CFG = settings.PLUGINS_CONFIG["nautobot_device_lifecycle_mgmt"]


def metrics_lcm_validation_report_device_type():
"""Calculate number of devices with valid/invalid software by device_type.
Expand Down Expand Up @@ -111,8 +114,8 @@ def metrics_lcm_validation_report_inventory_item():
yield inventory_item_software_compliance_gauge


def metrics_lcm_hw_end_of_support(): # pylint: disable=too-many-locals
"""Calculate number of End of Support devices and inventory items per Part Number and per Site.
def metrics_lcm_hw_end_of_support_part_number(): # pylint: disable=too-many-locals
"""Calculate number of End of Support devices and inventory items per Part Number.
Yields:
GaugeMetricFamily: Prometheus Metrics
Expand All @@ -122,9 +125,6 @@ def metrics_lcm_hw_end_of_support(): # pylint: disable=too-many-locals
"Nautobot LCM Hardware End of Support per Part Number",
labels=["part_number"],
)
hw_end_of_support_site_gauge = GaugeMetricFamily(
"nautobot_lcm_hw_end_of_support_per_site", "Nautobot LCM Hardware End of Support per Site", labels=["site"]
)

today = datetime.today().date()
hw_end_of_support = HardwareLCM.objects.filter(end_of_support__lt=today)
Expand Down Expand Up @@ -177,6 +177,25 @@ def metrics_lcm_hw_end_of_support(): # pylint: disable=too-many-locals

yield hw_end_of_support_part_number_gauge


def metrics_lcm_hw_end_of_support_site(): # pylint: disable=too-many-locals
"""Calculate number of End of Support devices and inventory items per Site.
Yields:
GaugeMetricFamily: Prometheus Metrics
"""
hw_end_of_support_site_gauge = GaugeMetricFamily(
"nautobot_lcm_hw_end_of_support_per_site", "Nautobot LCM Hardware End of Support per Site", labels=["site"]
)

today = datetime.today().date()
hw_end_of_support = HardwareLCM.objects.filter(end_of_support__lt=today)
hw_end_of_support_device_types = hw_end_of_support.exclude(device_type__isnull=True).values_list(
"device_type", flat=True
)
hw_end_of_support_invitems = hw_end_of_support.exclude(inventory_item__isnull=True).values_list(
"inventory_item", flat=True
)
# Initialize per site count to 0 for all sites
init_site_counts = Site.objects.values(site_slug=F("slug")).annotate(
site_count=Value(0, output_field=IntegerField())
Expand Down Expand Up @@ -214,8 +233,12 @@ def metrics_lcm_hw_end_of_support(): # pylint: disable=too-many-locals
yield hw_end_of_support_site_gauge


metrics = [
metrics_lcm_hw_end_of_support,
metrics_lcm_validation_report_device_type,
metrics_lcm_validation_report_inventory_item,
]
metrics = []
if "nautobot_lcm_software_compliance_per_device_type" in PLUGIN_CFG["enabled_metrics"]:
metrics.append(metrics_lcm_validation_report_device_type)
if "nautobot_lcm_software_compliance_per_inventory_item" in PLUGIN_CFG["enabled_metrics"]:
metrics.append(metrics_lcm_validation_report_inventory_item)
if "nautobot_lcm_hw_end_of_support_per_part_number" in PLUGIN_CFG["enabled_metrics"]:
metrics.append(metrics_lcm_hw_end_of_support_part_number)
if "nautobot_metrics_lcm_hw_end_of_support_site" in PLUGIN_CFG["enabled_metrics"]:
metrics.append(metrics_lcm_hw_end_of_support_site)
18 changes: 7 additions & 11 deletions nautobot_device_lifecycle_mgmt/tests/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
from django.test import TestCase

from nautobot_device_lifecycle_mgmt.metrics import (
metrics_lcm_hw_end_of_support,
metrics_lcm_hw_end_of_support_part_number,
metrics_lcm_hw_end_of_support_site,
metrics_lcm_validation_report_device_type,
metrics_lcm_validation_report_inventory_item,
)
Expand Down Expand Up @@ -53,22 +54,20 @@ def test_metrics_lcm_validation_report_inventory_item(self):
sample_labels = tuple(sample.labels.items())
self.assertEqual(expected_ts_samples[sample_labels], sample.value)

def test_metrics_lcm_hw_end_of_support_does_not_error(self):
def test_metrics_lcm_hw_end_of_support_site_does_not_error(self):
"""Query providing data to hw_end_of_support_site_gauge metric should not error out.
Guards against https://github.com/nautobot/nautobot-app-device-lifecycle-mgmt/issues/309
"""
metric_gen = metrics_lcm_hw_end_of_support()
# skip hw_end_of_support_part_number_gauge
next(metric_gen)
metric_gen = metrics_lcm_hw_end_of_support_site()
try:
# Get hw_end_of_support_site_gauge
next(metric_gen)
except ProgrammingError:
self.fail("hw_end_of_support_site_gauge query bug")
self.fail("hw_end_of_support_site query bug")

def test_metrics_lcm_hw_end_of_support_part_number(self):
"""Test metric hw_end_of_support_part_number_gauge."""
metric_gen = metrics_lcm_hw_end_of_support()
metric_gen = metrics_lcm_hw_end_of_support_part_number()

# Get hw_end_of_support_part_number_gauge
metric = next(metric_gen)
Expand All @@ -86,10 +85,7 @@ def test_metrics_lcm_hw_end_of_support_part_number(self):

def test_metrics_lcm_hw_end_of_support_site_gauge(self):
"""Test metric hw_end_of_support_site_gauge."""
metric_gen = metrics_lcm_hw_end_of_support()

# skip hw_end_of_support_part_number_gauge
next(metric_gen)
metric_gen = metrics_lcm_hw_end_of_support_site()

# Get hw_end_of_support_site_gauge
metric = next(metric_gen)
Expand Down

0 comments on commit ffbd9fe

Please sign in to comment.