Skip to content

Commit

Permalink
refactor(statistics): introduce a statistics base model based on SQL …
Browse files Browse the repository at this point in the history
…VIEW

This is still WIP, but I think this is the right way forward.

TODO:

* Further refactor the views, as the customer,project,task viewsets should
  return all the selected objects, regardless of reported sum. I think we
  will need to base those viewsets off of their respective models and
  join/prefetch the report statistic onto it
* Finalize the rest of the refactoring to make it work again

Optionally (and I hope it's not necessary): Build a statistic (database) view
for all statistics endpoints. This would allow us to optimize the whole thing
much more (and improve performance AND readability) at the cost of quite some
more boilerplate code
  • Loading branch information
winged committed Dec 27, 2024
1 parent 0a75577 commit 77ee70c
Show file tree
Hide file tree
Showing 7 changed files with 143 additions and 56 deletions.
4 changes: 4 additions & 0 deletions backend/timed/reports/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@
from timed.tracking.models import Report


class ReportStatisticFilterset(FilterSet):
pass


class StatisticFiltersetBase:
def filter_has_reviewer(
self, queryset: QuerySet[Report], _name: str, value: int
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
from django.db import migrations, models

CREATE_VIEW_SQL = """
CREATE VIEW "reports_reportstatistic" AS (
SELECT
task_id as id, -- avoid jumping through hoops for Django
task_id,
user_id,
projects_project.id as project_id,
projects_project.customer_id as customer_id,
sum(duration) as duration,
date,
to_char(date, 'YYYY') as year,
to_char(date, 'YYYY-MM') as month
FROM tracking_report
JOIN projects_task ON projects_task.id = tracking_report.task_id
JOIN projects_project ON projects_project.id = projects_task.project_id
GROUP BY
task_id,
user_id,
date,
projects_task.id,
projects_project.id,
projects_project.customer_id
)
"""

DROP_VIEW_SQL = """
DROP VIEW "reports_reportstatistic"
"""


class Migration(migrations.Migration):

dependencies = [
('tracking', '0001_initial')
]

operations = [
migrations.RunSQL(CREATE_VIEW_SQL, DROP_VIEW_SQL),
migrations.CreateModel(
name='reportstatistic',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('duration', models.DurationField()),
('month', models.CharField(max_length=7)),
('year', models.CharField(max_length=4)),
],
options={
'managed': False,
},
),
]
Empty file.
27 changes: 27 additions & 0 deletions backend/timed/reports/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from django.db import models

from timed.employment.models import User
from timed.projects.models import Customer, Project, Task


class ReportStatistic(models.Model):
# Implement a basic STAR SCHEMA view: Basic aggregate is done in the VIEW,
# everything else of interest can be JOINed directly

task = models.OneToOneField(
Task, on_delete=models.DO_NOTHING, null=False, related_name="+"
)
user = models.ForeignKey(User, on_delete=models.DO_NOTHING, null=False)
project = models.ForeignKey(Project, on_delete=models.DO_NOTHING, null=False)
customer = models.ForeignKey(Customer, on_delete=models.DO_NOTHING, null=False)
duration = models.DurationField()

month = models.CharField(max_length=7)
year = models.CharField(max_length=4)
date = models.DateField()

class Meta:
managed = False

def __str__(self):
return f"ReportStat({self.task} @ {self.date}, {self.user})"
12 changes: 6 additions & 6 deletions backend/timed/reports/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,15 @@


class YearStatisticSerializer(TotalTimeRootMetaMixin, Serializer):
duration = DurationField()
duration = DurationField(read_only=True)
year = IntegerField()

class Meta:
resource_name = "year-statistics"


class MonthStatisticSerializer(TotalTimeRootMetaMixin, Serializer):
duration = DurationField()
duration = DurationField(read_only=True)
year = IntegerField()
month = IntegerField()

Expand All @@ -39,16 +39,16 @@ class Meta:


class CustomerStatisticSerializer(TotalTimeRootMetaMixin, Serializer):
duration = DurationField()
name = CharField(read_only=True)
duration = DurationField(read_only=True)
name = CharField(read_only=True, source="customer__name")

class Meta:
resource_name = "customer-statistics"


class ProjectStatisticSerializer(TotalTimeRootMetaMixin, Serializer):
duration = DurationField()
name = CharField()
duration = DurationField(read_only=True)
name = CharField(source="project__name")
amount_offered = DecimalField(max_digits=None, decimal_places=2)
amount_offered_currency = CharField()
amount_invoiced = DecimalField(max_digits=None, decimal_places=2)
Expand Down
12 changes: 9 additions & 3 deletions backend/timed/reports/tests/test_customer_statistic.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,13 @@


@pytest.mark.parametrize(
("is_employed", "is_customer_assignee", "is_customer", "expected", "status_code"),
(
"is_employed",
"is_customer_assignee",
"is_customer",
"num_queries",
"status_code",
),
[
(False, True, False, 1, status.HTTP_403_FORBIDDEN),
(False, True, True, 1, status.HTTP_403_FORBIDDEN),
Expand All @@ -24,7 +30,7 @@ def test_customer_statistic_list(
is_employed,
is_customer_assignee,
is_customer,
expected,
num_queries,
status_code,
django_assert_num_queries,
setup_customer_and_employment_status,
Expand All @@ -49,7 +55,7 @@ def test_customer_statistic_list(
report2 = ReportFactory.create(duration=timedelta(hours=4))

url = reverse("customer-statistic-list")
with django_assert_num_queries(expected):
with django_assert_num_queries(num_queries):
result = auth_client.get(url, data={"ordering": "duration"})
assert result.status_code == status_code

Expand Down
86 changes: 39 additions & 47 deletions backend/timed/reports/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from timed.permissions import IsAuthenticated, IsInternal, IsSuperUser
from timed.projects.models import Customer, Project, Task
from timed.reports import serializers
from timed.reports.models import ReportStatistic
from timed.tracking.filters import ReportFilterSet
from timed.tracking.models import Report
from timed.tracking.views import ReportViewSet
Expand All @@ -35,7 +36,7 @@
from timed.employment.models import User


class BaseStatisticQuerysetMixin:
class BaseStatisticQuerysetMixin(AggregateQuerysetMixin):
"""Base statistics queryset mixin.
Build and filter the statistics queryset according to the following
Expand All @@ -51,7 +52,7 @@ class BaseStatisticQuerysetMixin:
For this to work, each viewset defines two properties:
* The `qs_fields` define which fields are to be selected
* The `select_fields` is a list of fields to select (and implicitly GROUP BY)
* The `pk_field` is an expression that will be used as a primary key in the
REST sense (not really related to the database primary key, but serves as
a row identifier)
Expand All @@ -61,30 +62,29 @@ class BaseStatisticQuerysetMixin:
"""

def get_queryset(self):
return (
Report.objects.all()
.select_related("user", "task", "task__project", "task__project__customer")
.annotate(year=ExtractYear("date"))
.annotate(month=ExtractYear("date") * 100 + ExtractMonth("date"))
# TODO we're doing select_related() here, which makes a JOIN inside
# the VIEW superfluous. Refactor this to drop it in the VIEW and join
# normally here - should be slightly faster. But "first make it correct"
return ReportStatistic.objects.all().select_related(
"user", "task", "project", "customer"
)

def filter_queryset(self, queryset):
queryset = super().filter_queryset(queryset)
if isinstance(self.qs_fields, dict):
# qs fields need to be aliased
queryset = queryset.annotate(**self.qs_fields)

queryset = queryset.values(*list(self.qs_fields))
# need to name it `total_duration` as `duration` is already
# taken on the `Report` model
queryset = queryset.values(*self.select_fields)
queryset = queryset.annotate(duration=Sum("duration"))
queryset = queryset.annotate(pk=F(self.pk_field))
return queryset
return queryset # noqa: RET504


class YearStatisticViewSet(BaseStatisticQuerysetMixin, ReadOnlyModelViewSet):
"""Year statistics calculates total reported time per year."""

serializer_class = serializers.YearStatisticSerializer
filterset_class = ReportFilterSet
filterset_class = filters.ReportStatisticFilterset
ordering_fields = (
"year",
"duration",
Expand All @@ -97,15 +97,15 @@ class YearStatisticViewSet(BaseStatisticQuerysetMixin, ReadOnlyModelViewSet):
),
)

qs_fields = ("year", "duration")
pk_field = "year"
select_fields = ("year",)


class MonthStatisticViewSet(BaseStatisticQuerysetMixin, ReadOnlyModelViewSet):
"""Month statistics calculates total reported time per month."""

serializer_class = serializers.MonthStatisticSerializer
filterset_class = ReportFilterSet
filterset_class = filters.ReportStatisticFilterset
ordering_fields = (
"year",
"month",
Expand All @@ -122,20 +122,18 @@ class MonthStatisticViewSet(BaseStatisticQuerysetMixin, ReadOnlyModelViewSet):
),
)

qs_fields = ("year", "month", "duration")
pk_field = "month"
select_fields = ("year", "month")


class CustomerStatisticViewSet(BaseStatisticQuerysetMixin, ReadOnlyModelViewSet):
"""Customer statistics calculates total reported time per customer."""

serializer_class = serializers.CustomerStatisticSerializer
filterset_class = ReportFilterSet
filterset_class = filters.ReportStatisticFilterset
ordering_fields = (
"task__project__customer__name",
"customer__name",
"duration",
"estimated_time",
"remaining_effort",
)

ordering = ("name",)
Expand All @@ -145,22 +143,17 @@ class CustomerStatisticViewSet(BaseStatisticQuerysetMixin, ReadOnlyModelViewSet)
(IsInternal | IsSuperUser) & IsAuthenticated
),
)
qs_fields = { # noqa: RUF012
"year": F("year"),
"month": F("month"),
"name": F("task__project__customer__name"),
"customer_id": F("task__project__customer_id"),
}
pk_field = "customer_id"
select_fields = ("customer__name",)


class ProjectStatisticViewSet(AggregateQuerysetMixin, ReadOnlyModelViewSet):
class ProjectStatisticViewSet(BaseStatisticQuerysetMixin, ReadOnlyModelViewSet):
"""Project statistics calculates total reported time per project."""

serializer_class = serializers.ProjectStatisticSerializer
filterset_class = ReportFilterSet
filterset_class = filters.ReportStatisticFilterset
ordering_fields = (
"task__project__name",
"project__name",
"duration",
"estimated_time",
"remaining_effort",
Expand All @@ -173,20 +166,20 @@ class ProjectStatisticViewSet(AggregateQuerysetMixin, ReadOnlyModelViewSet):
),
)

qs_fields = { # noqa: RUF012
"year": F("year"),
"month": F("month"),
"name": F("task__project__name"),
"project_id": F("task__project_id"),
}
pk_field = "project_id"
select_fields = (
"customer__name",
"project__name",
"task__estimated_time",
"task__remaining_effort",
)


class TaskStatisticViewSet(BaseStatisticQuerysetMixin, ReadOnlyModelViewSet):
"""Task statistics calculates total reported time per task."""

serializer_class = serializers.TaskStatisticSerializer
filterset_class = ReportFilterSet
filterset_class = filters.ReportStatisticFilterset
ordering_fields = (
"task__name",
"duration",
Expand All @@ -195,25 +188,23 @@ class TaskStatisticViewSet(BaseStatisticQuerysetMixin, ReadOnlyModelViewSet):
)
ordering = ("task__name",)
permission_classes = (
(
# internal employees or super users may read all customer statistics
(IsInternal | IsSuperUser) & IsAuthenticated
),
# internal employees or super users may read all customer statistics
((IsInternal | IsSuperUser) & IsAuthenticated),
)

qs_fields = { # noqa: RUF012
"year": F("year"),
"month": F("month"),
"name": F("task__name"),
}
pk_field = "task_id"
select_fields = (
"task__name",
"task__estimated_time",
"task__remaining_effort",
)


class UserStatisticViewSet(BaseStatisticQuerysetMixin, ReadOnlyModelViewSet):
"""User calculates total reported time per user."""

serializer_class = serializers.UserStatisticSerializer
filterset_class = ReportFilterSet
filterset_class = filters.ReportStatisticFilterset
ordering_fields = (
"user__username",
"duration",
Expand All @@ -227,6 +218,7 @@ class UserStatisticViewSet(BaseStatisticQuerysetMixin, ReadOnlyModelViewSet):
)

pk_field = "user"
select_fields = ("user__username",)


class WorkReportViewSet(GenericViewSet):
Expand All @@ -236,7 +228,7 @@ class WorkReportViewSet(GenericViewSet):
in several projects work reports will be returned as zip.
"""

filterset_class = ReportFilterSet
filterset_class = filters.ReportStatisticFilterset
ordering = ReportViewSet.ordering
ordering_fields = ReportViewSet.ordering_fields

Expand Down

0 comments on commit 77ee70c

Please sign in to comment.