Fix LPID - proper list of pushes, classify all tasks in single .update where possible. (#8968)

jmaher · web-flow · commit d213bcc13058 · 2025-09-15T16:40:45.000-07:00
diff --git a/treeherder/log_parser/intermittents.py b/treeherder/log_parser/intermittents.py
@@ -1,6 +1,38 @@
 import datetime
 
-from treeherder.model.models import Group, GroupStatus, Job, Push
+from treeherder.model.models import Group, GroupStatus, Job, Push, TextLogError
+
+
+def classify(jobs_to_classify, jobs_to_unclassify):
+    # TODO: consider job.result=(busted, exception)
+    if jobs_to_classify:
+        target_jobs = Job.objects.filter(
+            id__in=jobs_to_classify, result="testfailed", failure_classification_id__in=[1, 6]
+        )
+        if target_jobs:
+            target_jobs.update(failure_classification_id=8)
+
+    if jobs_to_unclassify:
+        # TODO: query text_log_error for new_failure and use 6 if previously set
+        new_jobs = (
+            TextLogError.objects.filter(
+                job__id__in=jobs_to_unclassify, new_failure=True, job__failure_classification_id=8
+            )
+            .values("job__id")
+            .distinct()
+        )
+        jobs_to_newfailure = [j["job__id"] for j in new_jobs]
+        jobs_to_regular_failure = list(set(jobs_to_unclassify) - set(jobs_to_newfailure))
+
+        # classification_id: 6 == new failure needs classification, 1 == no classified
+        if jobs_to_newfailure:
+            target_jobs = Job.objects.filter(id__in=jobs_to_newfailure, result="testfailed")
+            if target_jobs:
+                target_jobs.update(failure_classification_id=6)
+        if jobs_to_regular_failure:
+            target_jobs = Job.objects.filter(id__in=jobs_to_regular_failure, result="testfailed")
+            if target_jobs:
+                target_jobs.update(failure_classification_id=1)
 
 
 def _check_and_mark_infra(current_job, job_ids, push_ids):
@@ -42,7 +74,7 @@ def _check_and_mark_infra(current_job, job_ids, push_ids):
 
     # ignore previous classified, we are looking for NEW extra jobs
     if len([ej for ej in extra_jobs if ej["failure_classification_id"] != 8]) == 0:
-        return
+        return [], []
 
     # ensure 50% 'success' rate
     # success here means the task ran and produced groups | is success
@@ -52,20 +84,24 @@ def _check_and_mark_infra(current_job, job_ids, push_ids):
         if job["id"] not in job_ids and job["result"] != "success":
             extra_failed.append(job)
 
+    jobs_to_classify = []
+    jobs_to_unclassify = []
+
     # look for failure rate > 50% and exit early
     if len(extra_failed) / len(extra_jobs) > 0.5:
         # as failure rate > 50%, if any jobs are fc_id=8 classify as fc_id=1
         for job in extra_failed:
             if job["failure_classification_id"] == 8:
-                Job.objects.filter(id=job["id"]).update(failure_classification_id=1)
-        return
+                jobs_to_unclassify.append(job["id"])
 
     # any extra_jobs will be failures without groups (infra/timeout/etc.)
     # theoretically there could be many jobs here
     # mark extra_jobs as `intermittent_needs_classification`
     for job in extra_failed:
         if job["failure_classification_id"] not in [4, 8]:
-            Job.objects.filter(id=job["id"]).update(failure_classification_id=8)
+            jobs_to_classify.append(job["id"])
+
+    return jobs_to_classify, jobs_to_unclassify
 
 
 def check_and_mark_intermittent(job_id):
@@ -86,7 +122,7 @@ def check_and_mark_intermittent(job_id):
         # get list of pushes, find the current push and recent pushes
         idlist = (
             Push.objects.filter(repository__id=current_job.repository.id, time__gte=start_date)
-            .values("id")
+            .values_list("id", flat=True)
             .order_by("-id")
         )
         counter = -1
@@ -135,7 +171,8 @@ def check_and_mark_intermittent(job_id):
     # If no groups, look for infra
     distinct_job_ids = list(set([f["job_logs__job__id"] for f in all_groups]))
     if len(distinct_job_ids) == 1:
-        return _check_and_mark_infra(current_job, distinct_job_ids, ids)
+        to_classify, to_unclassify = _check_and_mark_infra(current_job, distinct_job_ids, ids)
+        return classify(to_classify, to_unclassify)
 
     mappings = {}
     job_classifications = {}
@@ -151,6 +188,7 @@ def check_and_mark_intermittent(job_id):
             # we have a variant
             continue
 
+        # TODO: consider storing a list of job.id that are fc_id=8
         # store job:fc_id so we can reference what needs changed
         if item["job_logs__job__id"] not in job_classifications:
             job_classifications[item["job_logs__job__id"]] = item[
@@ -181,18 +219,14 @@ def check_and_mark_intermittent(job_id):
     current_changed_groups = {}
     for group in mappings.get(current_job.push.id, {}).get("groups", []):
         all_data = []
-        current_data = []
+        current_data = [
+            mappings[current_job.push.id]["groups"][group][j]
+            for j in mappings[current_job.push.id]["groups"][group]
+        ]
         for id in mappings.keys():
             all_data.extend(
                 [mappings[id]["groups"][group][j] for j in mappings[id]["groups"].get(group, {})]
             )
-            if id == current_job.push.id:
-                current_data.extend(
-                    [
-                        mappings[id]["groups"][group][j]
-                        for j in mappings[id]["groups"].get(group, {})
-                    ]
-                )
 
         # if new data changes results, update
         pass_rate = len([s for s in all_data if s == GroupStatus.OK]) / len(all_data)
@@ -203,9 +237,9 @@ def check_and_mark_intermittent(job_id):
             current_changed_groups[group] = True
 
     # all changed_groups need to be evaluated on previous 'failed' jobs to ensure all groups in that task are 'passing'
+    jobs_to_classify = []  # mark as fcid=8 (known intermittent)
+    jobs_to_unclassify = []  # previously parked as fcid=8, new failing data, now fcid=1
     for id in mappings.keys():
-        jobs_to_classify = []  # mark as fcid=8 (known intermittent)
-        jobs_to_unclassify = []  # previously parked as fcid=8, new failing data, now fcid=1
         for job in mappings[id]["jobs"]:
             all_green = True
             current_all_green = True
@@ -229,19 +263,7 @@ def check_and_mark_intermittent(job_id):
             elif job_classifications[job] == 8:
                 jobs_to_unclassify.append(job)
 
-        # TODO: consider job.result=(busted, exception)
-        for job in jobs_to_classify:
-            target_job = Job.objects.filter(
-                id=job, result="testfailed", failure_classification_id__in=[1, 6]
-            )
-            if target_job:
-                target_job.update(failure_classification_id=8)
-
-        for job in jobs_to_unclassify:
-            target_job = Job.objects.filter(
-                id=job, result="testfailed", failure_classification_id=8
-            )
-            if target_job:
-                target_job.update(failure_classification_id=1)
-
-    return _check_and_mark_infra(current_job, distinct_job_ids, ids)
+    to_classify, to_unclassify = _check_and_mark_infra(current_job, distinct_job_ids, ids)
+    jobs_to_classify.extend(to_classify)
+    jobs_to_unclassify.extend(to_unclassify)
+    return classify(jobs_to_classify, jobs_to_unclassify)