Skip to content

Commit 5bcfc9c

Browse files
authored
Bug 1942867 - Add an alert management system for telemetry alerts. (#9015)
This patch adds a new alert management system for telemetry alerts. Some generic base and utility classes are added directly to the auto_perf_sheriffing folder. These are not specific to telemetry alerting and could be used in other performance sheriffing automation. The concrete classes for telemetry alert management are found in the treeherder/perf/auto_perf_sheriffing folder. These are then integrated into the telemetry detection code in Sherlock through the TelemetryAlertManager and run from TelemetryAlertManager.manage_alerts. The manage_alerts method is defined generically in the AlertManager class. It starts by updating the DB with any changes made in telemetry bugs in Bugzilla - this is only for their resolutions at the moment. After this, bugs are filed for the alerts that are generated for any probes that specify a bug should be filed (by setting the monitor.alert field to True in their probe definition). Once bugs are filed, modifications are made to these bugs and any existing bugs as needed. This currently only modifies the see_also field to associate all bugs filed for the same detection range together - in other words, all the bugs that are part of the same PerformanceTelemetryAlertSummary. At the end of this "bug handling" phase, emails are produced for any alerts that request it (either bugs are produced or emails, but never both to reduce spamming). Finally, it's possible that either the bug modifications or emails fail. In that case, we have a "house keeping" stage where we do retries of the failed alerts on a daily basis.
1 parent 7f18dcb commit 5bcfc9c

32 files changed

+5948
-52
lines changed

docker-compose.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ services:
2929
- PROJECTS_TO_INGEST=${PROJECTS_TO_INGEST:-autoland,try}
3030
- BUGZILLA_API_URL=${BUGZILLA_API_URL:-}
3131
- BUG_FILER_API_KEY=${BUG_FILER_API_KEY:-}
32+
- BUG_COMMENTER_API_KEY=${BUG_COMMENTER_API_KEY:-}
3233
- TLS_CERT_PATH=${TLS_CERT_PATH:-}
3334
- TELEMETRY_ENABLE_ALERTS=${TELEMETRY_ENABLE_ALERTS:-}
3435
- GCLOUD_PROJECT=${GCLOUD_PROJECT:-}

tests/perf/auto_perf_sheriffing/conftest.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,3 +261,24 @@ def job_from_try(hundred_job_blobs, create_jobs):
261261
job.repository.is_try_repo = True
262262
job.repository.save()
263263
return job
264+
265+
266+
@pytest.fixture
267+
def mock_bugfiler_settings(monkeypatch):
268+
"""Mock Django settings for Bugfiler API."""
269+
monkeypatch.setattr(
270+
"treeherder.perf.auto_perf_sheriffing.base_bug_manager.settings.BUGFILER_API_URL",
271+
"https://bugzilla.mozilla.org",
272+
)
273+
monkeypatch.setattr(
274+
"treeherder.perf.auto_perf_sheriffing.base_bug_manager.settings.BUGFILER_API_KEY",
275+
"test-api-key",
276+
)
277+
monkeypatch.setattr(
278+
"treeherder.perf.auto_perf_sheriffing.base_bug_manager.settings.COMMENTER_API_KEY",
279+
"test-commenter-key",
280+
)
281+
monkeypatch.setattr(
282+
"treeherder.perf.auto_perf_sheriffing.base_bug_manager.settings.SITE_HOSTNAME",
283+
"treeherder.mozilla.org",
284+
)

tests/perf/auto_perf_sheriffing/telemetry_alerting/__init__.py

Whitespace-only changes.
Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
from datetime import datetime
2+
from unittest.mock import Mock
3+
4+
import pytest
5+
6+
from treeherder.perf.auto_perf_sheriffing.telemetry_alerting.alert import TelemetryAlert
7+
from treeherder.perf.models import (
8+
PerformanceTelemetryAlert,
9+
PerformanceTelemetryAlertSummary,
10+
PerformanceTelemetrySignature,
11+
)
12+
13+
14+
@pytest.fixture
15+
def detection_push(create_push, test_repository):
16+
return create_push(
17+
test_repository,
18+
revision="abcdef123456",
19+
20+
time=datetime(2024, 1, 15, 12, 0, 0),
21+
)
22+
23+
24+
@pytest.fixture
25+
def prev_push(create_push, test_repository):
26+
return create_push(
27+
test_repository,
28+
revision="prev123456",
29+
30+
time=datetime(2024, 1, 14, 12, 0, 0),
31+
)
32+
33+
34+
@pytest.fixture
35+
def next_push(create_push, test_repository):
36+
return create_push(
37+
test_repository,
38+
revision="next123456",
39+
40+
time=datetime(2024, 1, 16, 12, 0, 0),
41+
)
42+
43+
44+
@pytest.fixture
45+
def test_telemetry_signature(db):
46+
return PerformanceTelemetrySignature.objects.create(
47+
channel="Nightly",
48+
platform="Windows",
49+
probe="networking_http_channel_page_open_to_first_sent",
50+
probe_type="Glean",
51+
application="Firefox",
52+
)
53+
54+
55+
@pytest.fixture
56+
def test_telemetry_alert_summary(
57+
test_repository, test_perf_framework, detection_push, prev_push, next_push, test_issue_tracker
58+
):
59+
return PerformanceTelemetryAlertSummary.objects.create(
60+
repository=test_repository,
61+
framework=test_perf_framework,
62+
prev_push=prev_push,
63+
push=next_push,
64+
original_push=detection_push,
65+
manually_created=False,
66+
created=datetime(2024, 1, 16, 13, 0, 0),
67+
issue_tracker=test_issue_tracker,
68+
)
69+
70+
71+
@pytest.fixture
72+
def telemetry_alert_obj(
73+
test_telemetry_alert, test_telemetry_alert_summary, test_telemetry_signature
74+
):
75+
return TelemetryAlert(
76+
test_telemetry_alert, test_telemetry_alert_summary, test_telemetry_signature
77+
)
78+
79+
80+
@pytest.fixture
81+
def mock_probe():
82+
"""Mock probe for testing with default configuration."""
83+
probe = Mock()
84+
probe.name = "test_probe_metric"
85+
probe.get_notification_emails.return_value = ["[email protected]"]
86+
probe.should_file_bug.return_value = True
87+
probe.should_email.return_value = False
88+
return probe
89+
90+
91+
@pytest.fixture
92+
def base_metric_info():
93+
"""Base metric info structure matching real telemetry data."""
94+
return {
95+
"name": "networking_http_channel_page_open_to_first_sent",
96+
"data": {
97+
"name": "networking.http_channel_page_open_to_first_sent",
98+
"description": "Time in milliseconds from AsyncOpen to first byte of request sent",
99+
"tags": ["Core :: Networking"],
100+
"in_source": True,
101+
"latest_fx_release_version": "143.0",
102+
"extra_keys": None,
103+
"type": "timing_distribution",
104+
"expires": None,
105+
"expiry_text": "never",
106+
"sampled": False,
107+
"sampled_text": "Not sampled",
108+
"is_part_of_info_section": False,
109+
"bugs": ["https://bugzilla.mozilla.org/show_bug.cgi?id=1697480"],
110+
"has_annotation": False,
111+
"origin": "gecko",
112+
},
113+
"platform": "desktop",
114+
}
115+
116+
117+
@pytest.fixture
118+
def metric_info_with_alert(base_metric_info):
119+
"""Metric info with alert=True and bugzilla_notification_emails."""
120+
base_metric_info["data"]["monitor"] = {
121+
"alert": True,
122+
"bugzilla_notification_emails": ["[email protected]"],
123+
}
124+
return base_metric_info
125+
126+
127+
@pytest.fixture
128+
def alert_without_bug(test_telemetry_alert_summary, test_telemetry_signature):
129+
"""Create a TelemetryAlert object without a bug number."""
130+
from treeherder.perf.auto_perf_sheriffing.telemetry_alerting.alert import (
131+
TelemetryAlertFactory,
132+
)
133+
134+
alert_row = PerformanceTelemetryAlert.objects.create(
135+
summary=test_telemetry_alert_summary,
136+
series_signature=test_telemetry_signature,
137+
is_regression=True,
138+
amount_pct=15.5,
139+
amount_abs=100.0,
140+
prev_value=645.5,
141+
new_value=745.5,
142+
sustained=True,
143+
direction="increase",
144+
confidence=0.95,
145+
prev_median=650.0,
146+
new_median=750.0,
147+
prev_p90=700.0,
148+
new_p90=800.0,
149+
prev_p95=720.0,
150+
new_p95=820.0,
151+
bug_number=None,
152+
notified=False,
153+
)
154+
return TelemetryAlertFactory.construct_alert(alert_row)
155+
156+
157+
@pytest.fixture
158+
def alert_with_bug(test_telemetry_alert_summary, test_telemetry_signature):
159+
"""Create a TelemetryAlert object with a bug number."""
160+
from treeherder.perf.auto_perf_sheriffing.telemetry_alerting.alert import (
161+
TelemetryAlertFactory,
162+
)
163+
164+
alert_row = PerformanceTelemetryAlert.objects.create(
165+
summary=test_telemetry_alert_summary,
166+
series_signature=test_telemetry_signature,
167+
is_regression=True,
168+
amount_pct=15.5,
169+
amount_abs=100.0,
170+
prev_value=645.5,
171+
new_value=745.5,
172+
sustained=True,
173+
direction="increase",
174+
confidence=0.95,
175+
prev_median=650.0,
176+
new_median=750.0,
177+
prev_p90=700.0,
178+
new_p90=800.0,
179+
prev_p95=720.0,
180+
new_p95=820.0,
181+
bug_number=123456,
182+
notified=False,
183+
)
184+
return TelemetryAlertFactory.construct_alert(alert_row)
185+
186+
187+
@pytest.fixture
188+
def create_telemetry_alert(test_telemetry_alert_summary):
189+
"""Factory fixture to create telemetry alerts with custom parameters."""
190+
191+
def _create_alert(signature, **kwargs):
192+
defaults = {
193+
"is_regression": True,
194+
"amount_pct": 15.5,
195+
"amount_abs": 100.0,
196+
"prev_value": 645.5,
197+
"new_value": 745.5,
198+
"sustained": True,
199+
"direction": "increase",
200+
"confidence": 0.95,
201+
"prev_median": 650.0,
202+
"new_median": 750.0,
203+
"prev_p90": 700.0,
204+
"new_p90": 800.0,
205+
"prev_p95": 720.0,
206+
"new_p95": 820.0,
207+
"bug_number": None,
208+
"notified": False,
209+
"summary": test_telemetry_alert_summary,
210+
}
211+
defaults.update(kwargs)
212+
return PerformanceTelemetryAlert.objects.create(series_signature=signature, **defaults)
213+
214+
return _create_alert
215+
216+
217+
@pytest.fixture
218+
def create_telemetry_signature():
219+
"""Factory fixture to create telemetry signatures with custom parameters."""
220+
221+
def _create_signature(**kwargs):
222+
defaults = {
223+
"channel": "Nightly",
224+
"platform": "Windows",
225+
"probe": "test_probe",
226+
"probe_type": "Glean",
227+
"application": "Firefox",
228+
}
229+
defaults.update(kwargs)
230+
return PerformanceTelemetrySignature.objects.create(**defaults)
231+
232+
return _create_signature
233+
234+
235+
@pytest.fixture
236+
def test_telemetry_alert(create_telemetry_signature, create_telemetry_alert):
237+
return create_telemetry_alert(create_telemetry_signature())

0 commit comments

Comments
 (0)