From 084550f16f4dedf6ca61690cd5aa5600bc58fa02 Mon Sep 17 00:00:00 2001 From: David Vogt Date: Mon, 23 Dec 2024 14:19:12 +0100 Subject: [PATCH] fix(statistics): use subquery instead of join to avoid cartesian product The way we filter in the statistics view, any added filter (that affects reports) adds a "dimension" to the cartesian product, exploding the total number of hours reported. Instead of using JOIN, we do EXISTS(SUBQUERY) now, which should avoid this issue. Might be a tiny bit slower, but let's try to make it correct first, then fast. --- backend/timed/reports/views.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/backend/timed/reports/views.py b/backend/timed/reports/views.py index 1821b9cb3..62f32f02a 100644 --- a/backend/timed/reports/views.py +++ b/backend/timed/reports/views.py @@ -8,7 +8,7 @@ from zipfile import ZipFile from django.conf import settings -from django.db.models import F, Q, QuerySet, Sum +from django.db.models import F, Q, Exists, OuterRef, QuerySet, Sum from django.db.models.functions import ExtractMonth, ExtractYear from django.http import HttpResponse from django.utils.http import content_disposition_header @@ -117,9 +117,14 @@ def filter(self, /, **kwargs): return new_qs def filter_base(self, *args, **kwargs): + filtered = ( + self.model.objects.filter(*args, **kwargs) + .values("pk") + .filter(pk=OuterRef("pk")) + ) return StatisticQueryset( model=self.model, - base_qs=self._base.filter(*args, **kwargs), + base_qs=self._base.filter(Exists(filtered)), catch_prefixes=self._catch_prefixes, agg_filters=self._agg_filters, )