Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 28 additions & 14 deletions bireme/api/bibliographic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from django.conf import settings
from django.urls import re_path
from django.db.models import Q
from django.db.models import Prefetch
from django.contrib.contenttypes.models import ContentType

from tastypie.serializers import Serializer
Expand All @@ -27,8 +28,19 @@


class ReferenceResource(CustomResource):
_version_cache = None

class Meta:
queryset = Reference.objects.prefetch_related('indexed_database', 'created_by', 'updated_by').all()
queryset = Reference.objects.select_related(
'created_by',
'updated_by'
).prefetch_related(
'indexed_database',
Prefetch('referencealternateid_set', to_attr='alternate_ids'),
Prefetch('referencelocal_set', to_attr='library_records'),
Prefetch('referencecomplement_set', to_attr='complement_data'),
).all()

allowed_methods = ['get']
serializer = ISISSerializer(formats=['json', 'xml', 'isis_id'], field_tag=field_tag_map)
resource_name = 'bibliographic'
Expand Down Expand Up @@ -130,11 +142,10 @@ def full_dehydrate(self, bundle, for_list=False):
bundle = super(ReferenceResource, self).full_dehydrate(bundle)

# Check type of Reference to add additional fields to bundle
reference_id = bundle.obj.id
if 'a' in bundle.data['treatment_level']:
obj = ReferenceAnalytic.objects.get(pk=reference_id)
obj = bundle.obj.referenceanalytic
else:
obj = ReferenceSource.objects.get(pk=reference_id)
obj = bundle.obj.referencesource

# Add additional fields to bundle
bundle = self.add_fields_to_bundle(bundle, obj)
Expand All @@ -157,20 +168,22 @@ def full_dehydrate(self, bundle, for_list=False):
bundle.data['source_control'] = 'FONTE'

# Add system version control number
version_file = open(os.path.join(settings.BASE_DIR, 'templates/version.txt'))
version_number = version_file.readlines()[0]
bundle.data['system_version'] = version_number.rstrip()
if self._version_cache is None:
with open(os.path.join(settings.BASE_DIR, 'templates/version.txt')) as f:
self._version_cache = f.readlines()[0].rstrip()
bundle.data['system_version'] = self._version_cache

return bundle

def add_fields_to_bundle(self, bundle, obj, import_field_list=[]):
for field in obj._meta.get_fields():
fields = obj._meta.get_fields()
for field in fields:
field_value = getattr(obj, field.name, {})

# check if field has multiples values (ex. ManyToManyField)
if hasattr(field_value, 'all'):
if hasattr(field_value, 'exists'):
# if field is empty skip to next field
if not field_value.all().exists():
if not field_value.exists():
continue

if field_value:
Expand All @@ -190,11 +203,12 @@ def dehydrate(self, bundle):
c_type = ContentType.objects.get_for_model(child_class)

descriptors = Descriptor.objects.filter(object_id=bundle.obj.id, content_type=c_type, status=1)
thematic_areas = ResourceThematic.objects.filter(object_id=bundle.obj.id, content_type=c_type, status=1)
thematic_areas = ResourceThematic.objects.filter(object_id=bundle.obj.id, content_type=c_type, status=1).select_related('thematic_area')
attachments = Attachment.objects.filter(object_id=bundle.obj.id, content_type=c_type)
alternate_ids = ReferenceAlternateID.objects.filter(reference_id=bundle.obj.id)
library_records = ReferenceLocal.objects.filter(source=bundle.obj.id)
complement_data = ReferenceComplement.objects.filter(source=bundle.obj.id)
alternate_ids = getattr(bundle.obj, 'alternate_ids', [])
library_records = getattr(bundle.obj, 'library_records', [])
complement_data = getattr(bundle.obj, 'complement_data', [])

related_obj_id = 'biblio-{}'.format(bundle.obj.id)
related_resources = LinkedResource.objects.filter( Q(object_id=bundle.obj.id, content_type=c_type) | Q(internal_id=related_obj_id) )
related_research = LinkedResearchData.objects.filter(object_id=bundle.obj.id, content_type=c_type)
Expand Down
110 changes: 110 additions & 0 deletions bireme/biblioref/management/commands/export_references.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# coding: utf-8
"""
Exporta References em JSON via Tastypie Resource.

Este comando exporta todos os registros do modelo Reference (biblioref.models.Reference)
usando a lógica completa do ReferenceResource (api.bibliographic.ReferenceResource),
incluindo full_dehydrate() e dehydrate() para formato idêntico à API.

FILTRO APLICADO (equivalente ao fq do get_search):
status__in=[-3, 0, 1]

ARQUIVOS GERADOS:
export_references_jsons/references_00000000_00000100.json
export_references_jsons/references_00000100_00000200.json
...

USO:
# Exporta tudo em chunks de 1000 (default: 100)
python manage.py export_references 1000

# Retoma exportação a partir do offset 50000
python manage.py export_references 1000 --offset=50000

# Especifica diretório customizado
python manage.py export_references 500 --outdir=/backup/refs_json
"""

from django.core.management.base import BaseCommand
from api.bibliographic import ReferenceResource
import json
import os
import time


class Command(BaseCommand):
help = 'Exporta toda a base do ReferenceResource em JSON, em arquivos paginados (status -3, 0, 1)'

def add_arguments(self, parser):
parser.add_argument('count', type=int, nargs='?', default=100)
parser.add_argument('--offset', type=int, default=0)
parser.add_argument(
'--outdir',
default='export_references_jsons',
help='Diretório de saída dos arquivos JSON (default: export_references_jsons)'
)

def handle(self, *args, **options):
resource = ReferenceResource()
base_qs = resource._meta.queryset
qs = base_qs.filter(status__in=[-3, 0, 1]) # filtro equivalente ao fq da API

count = options['count']
initial_offset = options['offset']
outdir = options['outdir']
os.makedirs(outdir, exist_ok=True)

total = qs.count()
if total == 0:
self.stdout.write(self.style.WARNING('Nenhum registro para exportar (status -3, 0, 1)'))
return

# garante que o offset inicial não ultrapasse o total
if initial_offset >= total:
self.stdout.write(
self.style.WARNING(f'Offset inicial {initial_offset} >= total {total}. Nada para exportar.')
)
return

total_exported = 0
i = 0
for slice_from in range(initial_offset, total, count):
t0 = time.time() # início do timer

slice_to = min(slice_from + count, total)

objects = list(qs[slice_from:slice_to])
if not objects:
continue

data = []
for obj in objects:
bundle = resource.build_bundle(obj=obj, request=None)
bundle = resource.full_dehydrate(bundle)
bundle = resource.dehydrate(bundle)
data.append(bundle.data)

filename = f"references_{slice_from:08d}_{slice_to:08d}.json"
filepath = os.path.join(outdir, filename)

with open(filepath, 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, default=str)

exported_now = len(data)
total_exported += exported_now
i += 1

dt = time.time() - t0 # fim do timer

self.stdout.write(
self.style.SUCCESS(
f'Chunk {i}: exportados {exported_now} registros de um total de {total}'
f' (status -3, 0, 1) para {filepath} em {dt:.2f}s'
)
)

self.stdout.write(
self.style.SUCCESS(
f'Exportação concluída. Total de registros exportados: {total_exported} de {total}'
)
)
2 changes: 1 addition & 1 deletion docker-compose-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ services:
- nginx-proxy

fi_admin_cache:
image: bitnami/memcached:1.6.32
image: bitnami/memcached:latest
container_name: fi-admin-cache
restart: unless-stopped
ports:
Expand Down