diff --git a/bireme/api/bibliographic.py b/bireme/api/bibliographic.py index 53ae7203..9d10750c 100644 --- a/bireme/api/bibliographic.py +++ b/bireme/api/bibliographic.py @@ -2,6 +2,7 @@ from django.conf import settings from django.urls import re_path from django.db.models import Q +from django.db.models import Prefetch from django.contrib.contenttypes.models import ContentType from tastypie.serializers import Serializer @@ -27,8 +28,19 @@ class ReferenceResource(CustomResource): + _version_cache = None + class Meta: - queryset = Reference.objects.prefetch_related('indexed_database', 'created_by', 'updated_by').all() + queryset = Reference.objects.select_related( + 'created_by', + 'updated_by' + ).prefetch_related( + 'indexed_database', + Prefetch('referencealternateid_set', to_attr='alternate_ids'), + Prefetch('referencelocal_set', to_attr='library_records'), + Prefetch('referencecomplement_set', to_attr='complement_data'), + ).all() + allowed_methods = ['get'] serializer = ISISSerializer(formats=['json', 'xml', 'isis_id'], field_tag=field_tag_map) resource_name = 'bibliographic' @@ -130,11 +142,10 @@ def full_dehydrate(self, bundle, for_list=False): bundle = super(ReferenceResource, self).full_dehydrate(bundle) # Check type of Reference to add additional fields to bundle - reference_id = bundle.obj.id if 'a' in bundle.data['treatment_level']: - obj = ReferenceAnalytic.objects.get(pk=reference_id) + obj = bundle.obj.referenceanalytic else: - obj = ReferenceSource.objects.get(pk=reference_id) + obj = bundle.obj.referencesource # Add additional fields to bundle bundle = self.add_fields_to_bundle(bundle, obj) @@ -157,20 +168,22 @@ def full_dehydrate(self, bundle, for_list=False): bundle.data['source_control'] = 'FONTE' # Add system version control number - version_file = open(os.path.join(settings.BASE_DIR, 'templates/version.txt')) - version_number = version_file.readlines()[0] - bundle.data['system_version'] = version_number.rstrip() + if self._version_cache is None: + with open(os.path.join(settings.BASE_DIR, 'templates/version.txt')) as f: + self._version_cache = f.readlines()[0].rstrip() + bundle.data['system_version'] = self._version_cache return bundle def add_fields_to_bundle(self, bundle, obj, import_field_list=[]): - for field in obj._meta.get_fields(): + fields = obj._meta.get_fields() + for field in fields: field_value = getattr(obj, field.name, {}) # check if field has multiples values (ex. ManyToManyField) - if hasattr(field_value, 'all'): + if hasattr(field_value, 'exists'): # if field is empty skip to next field - if not field_value.all().exists(): + if not field_value.exists(): continue if field_value: @@ -190,11 +203,12 @@ def dehydrate(self, bundle): c_type = ContentType.objects.get_for_model(child_class) descriptors = Descriptor.objects.filter(object_id=bundle.obj.id, content_type=c_type, status=1) - thematic_areas = ResourceThematic.objects.filter(object_id=bundle.obj.id, content_type=c_type, status=1) + thematic_areas = ResourceThematic.objects.filter(object_id=bundle.obj.id, content_type=c_type, status=1).select_related('thematic_area') attachments = Attachment.objects.filter(object_id=bundle.obj.id, content_type=c_type) - alternate_ids = ReferenceAlternateID.objects.filter(reference_id=bundle.obj.id) - library_records = ReferenceLocal.objects.filter(source=bundle.obj.id) - complement_data = ReferenceComplement.objects.filter(source=bundle.obj.id) + alternate_ids = getattr(bundle.obj, 'alternate_ids', []) + library_records = getattr(bundle.obj, 'library_records', []) + complement_data = getattr(bundle.obj, 'complement_data', []) + related_obj_id = 'biblio-{}'.format(bundle.obj.id) related_resources = LinkedResource.objects.filter( Q(object_id=bundle.obj.id, content_type=c_type) | Q(internal_id=related_obj_id) ) related_research = LinkedResearchData.objects.filter(object_id=bundle.obj.id, content_type=c_type) diff --git a/bireme/biblioref/management/commands/export_references.py b/bireme/biblioref/management/commands/export_references.py new file mode 100644 index 00000000..559d08e7 --- /dev/null +++ b/bireme/biblioref/management/commands/export_references.py @@ -0,0 +1,110 @@ +# coding: utf-8 +""" +Exporta References em JSON via Tastypie Resource. + +Este comando exporta todos os registros do modelo Reference (biblioref.models.Reference) +usando a lógica completa do ReferenceResource (api.bibliographic.ReferenceResource), +incluindo full_dehydrate() e dehydrate() para formato idêntico à API. + +FILTRO APLICADO (equivalente ao fq do get_search): + status__in=[-3, 0, 1] + +ARQUIVOS GERADOS: + export_references_jsons/references_00000000_00000100.json + export_references_jsons/references_00000100_00000200.json + ... + +USO: + # Exporta tudo em chunks de 1000 (default: 100) + python manage.py export_references 1000 + + # Retoma exportação a partir do offset 50000 + python manage.py export_references 1000 --offset=50000 + + # Especifica diretório customizado + python manage.py export_references 500 --outdir=/backup/refs_json +""" + +from django.core.management.base import BaseCommand +from api.bibliographic import ReferenceResource +import json +import os +import time + + +class Command(BaseCommand): + help = 'Exporta toda a base do ReferenceResource em JSON, em arquivos paginados (status -3, 0, 1)' + + def add_arguments(self, parser): + parser.add_argument('count', type=int, nargs='?', default=100) + parser.add_argument('--offset', type=int, default=0) + parser.add_argument( + '--outdir', + default='export_references_jsons', + help='Diretório de saída dos arquivos JSON (default: export_references_jsons)' + ) + + def handle(self, *args, **options): + resource = ReferenceResource() + base_qs = resource._meta.queryset + qs = base_qs.filter(status__in=[-3, 0, 1]) # filtro equivalente ao fq da API + + count = options['count'] + initial_offset = options['offset'] + outdir = options['outdir'] + os.makedirs(outdir, exist_ok=True) + + total = qs.count() + if total == 0: + self.stdout.write(self.style.WARNING('Nenhum registro para exportar (status -3, 0, 1)')) + return + + # garante que o offset inicial não ultrapasse o total + if initial_offset >= total: + self.stdout.write( + self.style.WARNING(f'Offset inicial {initial_offset} >= total {total}. Nada para exportar.') + ) + return + + total_exported = 0 + i = 0 + for slice_from in range(initial_offset, total, count): + t0 = time.time() # início do timer + + slice_to = min(slice_from + count, total) + + objects = list(qs[slice_from:slice_to]) + if not objects: + continue + + data = [] + for obj in objects: + bundle = resource.build_bundle(obj=obj, request=None) + bundle = resource.full_dehydrate(bundle) + bundle = resource.dehydrate(bundle) + data.append(bundle.data) + + filename = f"references_{slice_from:08d}_{slice_to:08d}.json" + filepath = os.path.join(outdir, filename) + + with open(filepath, 'w', encoding='utf-8') as f: + json.dump(data, f, ensure_ascii=False, default=str) + + exported_now = len(data) + total_exported += exported_now + i += 1 + + dt = time.time() - t0 # fim do timer + + self.stdout.write( + self.style.SUCCESS( + f'Chunk {i}: exportados {exported_now} registros de um total de {total}' + f' (status -3, 0, 1) para {filepath} em {dt:.2f}s' + ) + ) + + self.stdout.write( + self.style.SUCCESS( + f'Exportação concluída. Total de registros exportados: {total_exported} de {total}' + ) + ) \ No newline at end of file diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 690543b6..50af97f7 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -20,7 +20,7 @@ services: - nginx-proxy fi_admin_cache: - image: bitnami/memcached:1.6.32 + image: bitnami/memcached:latest container_name: fi-admin-cache restart: unless-stopped ports: