From e7d786066d8ff11e8cfe7529a5d55557615ccc48 Mon Sep 17 00:00:00 2001 From: Dmitrii Lavrukhin Date: Mon, 11 Nov 2024 15:46:23 +0300 Subject: [PATCH 1/3] not prefetching images when not needed --- cvat/apps/dataset_manager/task.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/cvat/apps/dataset_manager/task.py b/cvat/apps/dataset_manager/task.py index 5b72f92a1ebc..29e6e916311b 100644 --- a/cvat/apps/dataset_manager/task.py +++ b/cvat/apps/dataset_manager/task.py @@ -13,7 +13,7 @@ from datumaro.components.errors import DatasetError, DatasetImportError, DatasetNotFoundError from django.db import transaction -from django.db.models.query import Prefetch +from django.db.models.query import Prefetch, QuerySet from django.conf import settings from rest_framework.exceptions import ValidationError @@ -81,9 +81,10 @@ def merge_table_rows(rows, keys_for_merge, field_id): return list(merged_rows.values()) + class JobAnnotation: @classmethod - def add_prefetch_info(cls, queryset): + def add_prefetch_info(cls, queryset: QuerySet, prefetch_images: bool): assert issubclass(queryset.model, models.Job) label_qs = add_prefetch_fields(models.Label.objects.all(), [ @@ -93,6 +94,12 @@ def add_prefetch_info(cls, queryset): ]) label_qs = JobData.add_prefetch_info(label_qs) + task_data_queryset = models.Data.objects.select_related('video') + if prefetch_images: + task_data_queryset = task_data_queryset.prefetch_related( + Prefetch('images', queryset=models.Image.objects.order_by('frame')) + ) + return queryset.select_related( 'segment', 'segment__task', @@ -103,18 +110,15 @@ def add_prefetch_info(cls, queryset): 'segment__task__project__owner', 'segment__task__project__assignee', - Prefetch('segment__task__data', - queryset=models.Data.objects.select_related('video').prefetch_related( - Prefetch('images', queryset=models.Image.objects.order_by('frame')) - )), + Prefetch('segment__task__data', queryset=task_data_queryset), Prefetch('segment__task__label_set', queryset=label_qs), Prefetch('segment__task__project__label_set', queryset=label_qs), ) - def __init__(self, pk, *, is_prefetched=False, queryset=None): + def __init__(self, pk, *, is_prefetched: bool = False, queryset: QuerySet = None, prefetch_images: bool = True): if queryset is None: - queryset = self.add_prefetch_info(models.Job.objects) + queryset = self.add_prefetch_info(models.Job.objects, prefetch_images=prefetch_images) if is_prefetched: self.db_job: models.Job = queryset.select_related( @@ -1018,7 +1022,7 @@ def put_job_data(pk, data): @plugin_decorator @transaction.atomic def patch_job_data(pk, data, action): - annotation = JobAnnotation(pk) + annotation = JobAnnotation(pk, prefetch_images=False) if action == PatchAction.CREATE: annotation.create(data) elif action == PatchAction.UPDATE: @@ -1031,7 +1035,7 @@ def patch_job_data(pk, data, action): @silk_profile(name="DELETE job data") @transaction.atomic def delete_job_data(pk): - annotation = JobAnnotation(pk) + annotation = JobAnnotation(pk, prefetch_images=False) annotation.delete() def export_job(job_id, dst_file, format_name, server_url=None, save_images=False): From 1c59e5064ef0c10b37fc4297f00c42ae6e3db40d Mon Sep 17 00:00:00 2001 From: Dmitrii Lavrukhin Date: Tue, 12 Nov 2024 19:01:27 +0300 Subject: [PATCH 2/3] fixing tests --- cvat/apps/dataset_manager/task.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cvat/apps/dataset_manager/task.py b/cvat/apps/dataset_manager/task.py index 29e6e916311b..0ad01b06129e 100644 --- a/cvat/apps/dataset_manager/task.py +++ b/cvat/apps/dataset_manager/task.py @@ -84,7 +84,7 @@ def merge_table_rows(rows, keys_for_merge, field_id): class JobAnnotation: @classmethod - def add_prefetch_info(cls, queryset: QuerySet, prefetch_images: bool): + def add_prefetch_info(cls, queryset: QuerySet, prefetch_images: bool = True): assert issubclass(queryset.model, models.Job) label_qs = add_prefetch_fields(models.Label.objects.all(), [ From b20565295aef09d487d21672e8821d69ae04fa80 Mon Sep 17 00:00:00 2001 From: Dmitrii Lavrukhin Date: Wed, 13 Nov 2024 13:08:48 +0300 Subject: [PATCH 3/3] changelog entry --- .../20241113_130658_dmitrii.lavrukhin_no_queryset_cache.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 changelog.d/20241113_130658_dmitrii.lavrukhin_no_queryset_cache.md diff --git a/changelog.d/20241113_130658_dmitrii.lavrukhin_no_queryset_cache.md b/changelog.d/20241113_130658_dmitrii.lavrukhin_no_queryset_cache.md new file mode 100644 index 000000000000..512c4b1cd5a6 --- /dev/null +++ b/changelog.d/20241113_130658_dmitrii.lavrukhin_no_queryset_cache.md @@ -0,0 +1,4 @@ +### Fixed + +- Optimized memory consumption when importing annotations to a task with a lot of jobs and images + ()